diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HdfsConfigurationInitializer.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HdfsConfigurationInitializer.java index 4444ef1b3a91..635698b0f80f 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HdfsConfigurationInitializer.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HdfsConfigurationInitializer.java @@ -22,10 +22,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.net.DNSToSwitchMapping; -import org.apache.orc.OrcConf; -import org.apache.parquet.hadoop.ParquetOutputFormat; import javax.inject.Inject; import javax.net.SocketFactory; @@ -34,6 +31,7 @@ import java.util.Set; import static com.google.common.base.Preconditions.checkArgument; +import static io.prestosql.plugin.hive.util.CompressionConfigUtil.configureCompression; import static io.prestosql.plugin.hive.util.ConfigurationUtils.copy; import static java.lang.Math.toIntExact; import static java.util.Objects.requireNonNull; @@ -47,8 +45,6 @@ import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_KEY_PROVIDER_CACHE_EXPIRY_MS; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY; -import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT; -import static org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK; public class HdfsConfigurationInitializer { @@ -143,29 +139,6 @@ public void initializeConfiguration(Configuration config) configurationInitializers.forEach(configurationInitializer -> configurationInitializer.initializeConfiguration(config)); } - public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) - { - boolean compression = compressionCodec != HiveCompressionCodec.NONE; - config.setBoolean(COMPRESSRESULT.varname, compression); - config.setBoolean("mapred.output.compress", compression); - config.setBoolean(FileOutputFormat.COMPRESS, compression); - // For ORC - OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name()); - // For RCFile and Text - if (compressionCodec.getCodec().isPresent()) { - config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); - config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); - } - else { - config.unset("mapred.output.compression.codec"); - config.unset(FileOutputFormat.COMPRESS_CODEC); - } - // For Parquet - config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); - // For SequenceFile - config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); - } - public static class NoOpDNSToSwitchMapping implements DNSToSwitchMapping { diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/util/CompressionConfigUtil.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/util/CompressionConfigUtil.java new file mode 100644 index 000000000000..d978cbc3048b --- /dev/null +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/util/CompressionConfigUtil.java @@ -0,0 +1,55 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.hive.util; + +import io.prestosql.plugin.hive.HiveCompressionCodec; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.orc.OrcConf; +import org.apache.parquet.hadoop.ParquetOutputFormat; + +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT; +import static org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK; + +public final class CompressionConfigUtil +{ + private CompressionConfigUtil() {} + + public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) + { + boolean compression = compressionCodec != HiveCompressionCodec.NONE; + config.setBoolean(COMPRESSRESULT.varname, compression); + config.setBoolean("mapred.output.compress", compression); + config.setBoolean(FileOutputFormat.COMPRESS, compression); + + // For ORC + OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name()); + + // For RCFile and Text + if (compressionCodec.getCodec().isPresent()) { + config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); + config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); + } + else { + config.unset("mapred.output.compression.codec"); + config.unset(FileOutputFormat.COMPRESS_CODEC); + } + + // For Parquet + config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); + + // For SequenceFile + config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); + } +} diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveFileFormats.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveFileFormats.java index 5e1dcab0476c..9022cfbc71e1 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveFileFormats.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveFileFormats.java @@ -84,7 +84,6 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static io.prestosql.plugin.hive.HdfsConfigurationInitializer.configureCompression; import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; import static io.prestosql.plugin.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION; @@ -92,6 +91,7 @@ import static io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER; import static io.prestosql.plugin.hive.HiveTestUtils.isDistinctFrom; import static io.prestosql.plugin.hive.HiveTestUtils.mapType; +import static io.prestosql.plugin.hive.util.CompressionConfigUtil.configureCompression; import static io.prestosql.plugin.hive.util.HiveUtil.isStructuralType; import static io.prestosql.plugin.hive.util.SerDeUtils.serializeObject; import static io.prestosql.spi.type.BigintType.BIGINT; diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/benchmark/FileFormat.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/benchmark/FileFormat.java index cae026eed483..6977733127d3 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/benchmark/FileFormat.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/benchmark/FileFormat.java @@ -62,12 +62,12 @@ import java.util.Properties; import static io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode.BOTH; -import static io.prestosql.plugin.hive.HdfsConfigurationInitializer.configureCompression; import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; import static io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER; import static io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider; import static io.prestosql.plugin.hive.HiveType.toHiveType; import static io.prestosql.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat; +import static io.prestosql.plugin.hive.util.CompressionConfigUtil.configureCompression; import static java.util.stream.Collectors.joining; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT; import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS;