Skip to content

Commit

Permalink
Extract CompressionConfigUtil
Browse files Browse the repository at this point in the history
  • Loading branch information
electrum committed Sep 23, 2019
1 parent 9b3c162 commit aac07f6
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.net.DNSToSwitchMapping;
import org.apache.orc.OrcConf;
import org.apache.parquet.hadoop.ParquetOutputFormat;

import javax.inject.Inject;
import javax.net.SocketFactory;
Expand All @@ -34,6 +31,7 @@
import java.util.Set;

import static com.google.common.base.Preconditions.checkArgument;
import static io.prestosql.plugin.hive.util.CompressionConfigUtil.configureCompression;
import static io.prestosql.plugin.hive.util.ConfigurationUtils.copy;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
Expand All @@ -47,8 +45,6 @@
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_KEY_PROVIDER_CACHE_EXPIRY_MS;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT;
import static org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK;

public class HdfsConfigurationInitializer
{
Expand Down Expand Up @@ -143,29 +139,6 @@ public void initializeConfiguration(Configuration config)
configurationInitializers.forEach(configurationInitializer -> configurationInitializer.initializeConfiguration(config));
}

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec)
{
boolean compression = compressionCodec != HiveCompressionCodec.NONE;
config.setBoolean(COMPRESSRESULT.varname, compression);
config.setBoolean("mapred.output.compress", compression);
config.setBoolean(FileOutputFormat.COMPRESS, compression);
// For ORC
OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name());
// For RCFile and Text
if (compressionCodec.getCodec().isPresent()) {
config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
}
else {
config.unset("mapred.output.compression.codec");
config.unset(FileOutputFormat.COMPRESS_CODEC);
}
// For Parquet
config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
// For SequenceFile
config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}

public static class NoOpDNSToSwitchMapping
implements DNSToSwitchMapping
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.prestosql.plugin.hive.util;

import io.prestosql.plugin.hive.HiveCompressionCodec;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.orc.OrcConf;
import org.apache.parquet.hadoop.ParquetOutputFormat;

import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT;
import static org.apache.hadoop.io.SequenceFile.CompressionType.BLOCK;

public final class CompressionConfigUtil
{
private CompressionConfigUtil() {}

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec)
{
boolean compression = compressionCodec != HiveCompressionCodec.NONE;
config.setBoolean(COMPRESSRESULT.varname, compression);
config.setBoolean("mapred.output.compress", compression);
config.setBoolean(FileOutputFormat.COMPRESS, compression);

// For ORC
OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name());

// For RCFile and Text
if (compressionCodec.getCodec().isPresent()) {
config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
}
else {
config.unset("mapred.output.compression.codec");
config.unset(FileOutputFormat.COMPRESS_CODEC);
}

// For Parquet
config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());

// For SequenceFile
config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static io.prestosql.plugin.hive.HdfsConfigurationInitializer.configureCompression;
import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY;
import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR;
import static io.prestosql.plugin.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION;
import static io.prestosql.plugin.hive.HiveTestUtils.SESSION;
import static io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER;
import static io.prestosql.plugin.hive.HiveTestUtils.isDistinctFrom;
import static io.prestosql.plugin.hive.HiveTestUtils.mapType;
import static io.prestosql.plugin.hive.util.CompressionConfigUtil.configureCompression;
import static io.prestosql.plugin.hive.util.HiveUtil.isStructuralType;
import static io.prestosql.plugin.hive.util.SerDeUtils.serializeObject;
import static io.prestosql.spi.type.BigintType.BIGINT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@
import java.util.Properties;

import static io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode.BOTH;
import static io.prestosql.plugin.hive.HdfsConfigurationInitializer.configureCompression;
import static io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR;
import static io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER;
import static io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider;
import static io.prestosql.plugin.hive.HiveType.toHiveType;
import static io.prestosql.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat;
import static io.prestosql.plugin.hive.util.CompressionConfigUtil.configureCompression;
import static java.util.stream.Collectors.joining;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS;
Expand Down

0 comments on commit aac07f6

Please sign in to comment.