Skip to content

Commit

Permalink
Generalize storage format in file metastore
Browse files Browse the repository at this point in the history
  • Loading branch information
7c00 committed Mar 8, 2022
1 parent 235e556 commit 92d46bd
Show file tree
Hide file tree
Showing 7 changed files with 301 additions and 60 deletions.
5 changes: 5 additions & 0 deletions presto-hive-metastore/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@
<artifactId>slice</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,17 @@
import com.facebook.presto.hive.metastore.StorageFormat;
import com.facebook.presto.hive.metastore.Table;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.hive.HiveStorageFormat.getHiveStorageFormat;
import static com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters;
import static com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE;
import static com.facebook.presto.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT;
Expand All @@ -44,7 +46,7 @@ public class PartitionMetadata
private final List<Column> columns;
private final Map<String, String> parameters;

private final Optional<HiveStorageFormat> storageFormat;
private final StorageFormat storageFormat;
private final Optional<HiveBucketProperty> bucketProperty;
private final Map<String, String> storageParameters;
private final Map<String, String> serdeParameters;
Expand All @@ -59,7 +61,8 @@ public class PartitionMetadata
public PartitionMetadata(
@JsonProperty("columns") List<Column> columns,
@JsonProperty("parameters") Map<String, String> parameters,
@JsonProperty("storageFormat") Optional<HiveStorageFormat> storageFormat,
@JsonDeserialize(using = StorageFormatCompatDeserializer.class)
@JsonProperty("storageFormat") StorageFormat storageFormat,
@JsonProperty("bucketProperty") Optional<HiveBucketProperty> bucketProperty,
@JsonProperty("storageParameters") Map<String, String> storageParameters,
@JsonProperty("serdeParameters") Map<String, String> serdeParameters,
Expand All @@ -71,7 +74,7 @@ public PartitionMetadata(
this.columns = ImmutableList.copyOf(requireNonNull(columns, "columns is null"));
this.parameters = ImmutableMap.copyOf(requireNonNull(parameters, "parameters is null"));

this.storageFormat = requireNonNull(storageFormat, "storageFormat is null");
this.storageFormat = storageFormat == null ? VIEW_STORAGE_FORMAT : storageFormat;
this.bucketProperty = requireNonNull(bucketProperty, "bucketProperty is null");
this.storageParameters = ImmutableMap.copyOf(firstNonNull(storageParameters, ImmutableMap.of()));
this.serdeParameters = requireNonNull(serdeParameters, "serdeParameters is null");
Expand All @@ -82,6 +85,32 @@ public PartitionMetadata(
this.sealedPartition = sealedPartition;
}

@Deprecated
public PartitionMetadata(
List<Column> columns,
Map<String, String> parameters,
Optional<HiveStorageFormat> storageFormat,
Optional<HiveBucketProperty> bucketProperty,
Map<String, String> storageParameters,
Map<String, String> serdeParameters,
Optional<String> externalLocation,
Map<String, HiveColumnStatistics> columnStatistics,
boolean eligibleToIgnore,
boolean sealedPartition)
{
this(
columns,
parameters,
storageFormat.map(StorageFormat::fromHiveStorageFormat).orElse(VIEW_STORAGE_FORMAT),
bucketProperty,
storageParameters,
serdeParameters,
externalLocation,
columnStatistics,
eligibleToIgnore,
sealedPartition);
}

public PartitionMetadata(Table table, PartitionWithStatistics partitionWithStatistics)
{
Partition partition = partitionWithStatistics.getPartition();
Expand All @@ -90,10 +119,7 @@ public PartitionMetadata(Table table, PartitionWithStatistics partitionWithStati
this.columns = partition.getColumns();
this.parameters = updateStatisticsParameters(partition.getParameters(), statistics.getBasicStatistics());

StorageFormat tableFormat = partition.getStorage().getStorageFormat();
storageFormat = Arrays.stream(HiveStorageFormat.values())
.filter(format -> tableFormat.equals(StorageFormat.fromHiveStorageFormat(format)))
.findFirst();
storageFormat = partition.getStorage().getStorageFormat();

if (table.getTableType().equals(EXTERNAL_TABLE)) {
externalLocation = Optional.of(partition.getStorage().getLocation());
Expand Down Expand Up @@ -122,8 +148,15 @@ public Map<String, String> getParameters()
return parameters;
}

@JsonProperty
@Deprecated
@JsonIgnore
public Optional<HiveStorageFormat> getStorageFormat()
{
return getHiveStorageFormat(storageFormat);
}

@JsonProperty("storageFormat")
public StorageFormat getPartitionStorageFormat()
{
return storageFormat;
}
Expand Down Expand Up @@ -188,7 +221,7 @@ public Partition toPartition(String databaseName, String tableName, List<String>
values,
Storage.builder()
.setLocation(externalLocation.orElse(location))
.setStorageFormat(storageFormat.map(StorageFormat::fromHiveStorageFormat).orElse(VIEW_STORAGE_FORMAT))
.setStorageFormat(storageFormat)
.setBucketProperty(bucketProperty)
.setSerdeParameters(serdeParameters)
.setParameters(parameters)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.facebook.presto.hive.metastore.file;

import com.facebook.presto.hive.HiveStorageFormat;
import com.facebook.presto.hive.metastore.StorageFormat;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonDeserializer;

import java.io.IOException;

import static com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat;
import static com.fasterxml.jackson.core.JsonToken.VALUE_STRING;

public class StorageFormatCompatDeserializer
extends JsonDeserializer<StorageFormat>
{
@Override
public StorageFormat deserialize(JsonParser p, DeserializationContext ctxt)
throws IOException, JsonProcessingException
{
// Prior to version 0.271, HiveStorageFormat was used for storage format;
// this deserializer is to ensure backward compatibility
if (p.currentToken() == VALUE_STRING) {
HiveStorageFormat format = p.readValueAs(HiveStorageFormat.class);
return fromHiveStorageFormat(format);
}
return p.readValueAs(StorageFormat.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,17 @@
import com.facebook.presto.hive.metastore.StorageFormat;
import com.facebook.presto.hive.metastore.Table;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.facebook.presto.hive.HiveStorageFormat.getHiveStorageFormat;
import static com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE;
import static com.facebook.presto.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT;
import static com.google.common.base.MoreObjects.firstNonNull;
Expand All @@ -45,7 +47,7 @@ public class TableMetadata
private final List<Column> partitionColumns;
private final Map<String, String> parameters;

private final Optional<HiveStorageFormat> storageFormat;
private final StorageFormat storageFormat;
private final Optional<HiveBucketProperty> bucketProperty;
private final Map<String, String> storageParameters;
private final Map<String, String> serdeParameters;
Expand All @@ -64,7 +66,8 @@ public TableMetadata(
@JsonProperty("dataColumns") List<Column> dataColumns,
@JsonProperty("partitionColumns") List<Column> partitionColumns,
@JsonProperty("parameters") Map<String, String> parameters,
@JsonProperty("storageFormat") Optional<HiveStorageFormat> storageFormat,
@JsonDeserialize(using = StorageFormatCompatDeserializer.class)
@JsonProperty("storageFormat") StorageFormat storageFormat,
@JsonProperty("bucketProperty") Optional<HiveBucketProperty> bucketProperty,
@JsonProperty("storageParameters") Map<String, String> storageParameters,
@JsonProperty("serdeParameters") Map<String, String> serdeParameters,
Expand All @@ -79,7 +82,7 @@ public TableMetadata(
this.partitionColumns = ImmutableList.copyOf(requireNonNull(partitionColumns, "partitionColumns is null"));
this.parameters = ImmutableMap.copyOf(requireNonNull(parameters, "parameters is null"));
this.storageParameters = ImmutableMap.copyOf(firstNonNull(storageParameters, ImmutableMap.of()));
this.storageFormat = requireNonNull(storageFormat, "storageFormat is null");
this.storageFormat = storageFormat == null ? VIEW_STORAGE_FORMAT : storageFormat;
this.bucketProperty = requireNonNull(bucketProperty, "bucketProperty is null");
this.serdeParameters = requireNonNull(serdeParameters, "serdeParameters is null");
this.externalLocation = requireNonNull(externalLocation, "externalLocation is null");
Expand All @@ -96,6 +99,38 @@ public TableMetadata(
checkArgument(partitionColumns.isEmpty() || columnStatistics.isEmpty(), "column statistics cannot be set for partitioned table");
}

@Deprecated
public TableMetadata(
String owner,
PrestoTableType tableType,
List<Column> dataColumns,
List<Column> partitionColumns,
Map<String, String> parameters,
Optional<HiveStorageFormat> storageFormat,
Optional<HiveBucketProperty> bucketProperty,
Map<String, String> storageParameters,
Map<String, String> serdeParameters,
Optional<String> externalLocation,
Optional<String> viewOriginalText,
Optional<String> viewExpandedText,
Map<String, HiveColumnStatistics> columnStatistics)
{
this(
owner,
tableType,
dataColumns,
partitionColumns,
parameters,
storageFormat.map(StorageFormat::fromHiveStorageFormat).orElse(VIEW_STORAGE_FORMAT),
bucketProperty,
storageParameters,
serdeParameters,
externalLocation,
viewOriginalText,
viewExpandedText,
columnStatistics);
}

public TableMetadata(Table table)
{
this(table, ImmutableMap.of());
Expand All @@ -109,10 +144,7 @@ public TableMetadata(Table table, Map<String, HiveColumnStatistics> columnStatis
partitionColumns = table.getPartitionColumns();
parameters = table.getParameters();

StorageFormat tableFormat = table.getStorage().getStorageFormat();
storageFormat = Arrays.stream(HiveStorageFormat.values())
.filter(format -> tableFormat.equals(StorageFormat.fromHiveStorageFormat(format)))
.findFirst();
storageFormat = table.getStorage().getStorageFormat();
bucketProperty = table.getStorage().getBucketProperty();
storageParameters = table.getStorage().getParameters();
serdeParameters = table.getStorage().getSerdeParameters();
Expand Down Expand Up @@ -174,8 +206,15 @@ public Map<String, String> getParameters()
return parameters;
}

@JsonProperty
@Deprecated
@JsonIgnore
public Optional<HiveStorageFormat> getStorageFormat()
{
return getHiveStorageFormat(storageFormat);
}

@JsonProperty("storageFormat")
public StorageFormat getTableStorageFormat()
{
return storageFormat;
}
Expand Down Expand Up @@ -285,7 +324,7 @@ public Table toTable(String databaseName, String tableName, String location)
tableType,
Storage.builder()
.setLocation(externalLocation.orElse(location))
.setStorageFormat(storageFormat.map(StorageFormat::fromHiveStorageFormat).orElse(VIEW_STORAGE_FORMAT))
.setStorageFormat(storageFormat)
.setBucketProperty(bucketProperty)
.setParameters(storageParameters)
.setSerdeParameters(serdeParameters)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.facebook.airlift.json.JsonCodec;
import com.facebook.presto.hive.HiveStorageFormat;
import com.facebook.presto.hive.metastore.Column;
import com.facebook.presto.hive.metastore.StorageFormat;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.testng.annotations.Test;
Expand All @@ -26,9 +27,8 @@
import java.nio.file.Paths;
import java.util.Optional;

import static com.facebook.presto.hive.HiveStorageFormat.ORC;
import static com.facebook.presto.hive.HiveStorageFormat.PARQUET;
import static com.facebook.presto.hive.HiveType.HIVE_STRING;
import static com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat;
import static java.lang.String.format;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.testng.Assert.assertEquals;
Expand All @@ -40,6 +40,10 @@ public class TestPartitionMetadata
private static final String FILE_NAME_FORMAT = "partition-0.271-%s.json";
private static final String STORAGE_FORMAT_NOT_EQUALS = "storage format not equals";

private static final StorageFormat ORC = fromHiveStorageFormat(HiveStorageFormat.ORC);
private static final StorageFormat PARQUET = fromHiveStorageFormat(HiveStorageFormat.PARQUET);
private static final StorageFormat CUSTOM = StorageFormat.create("serde", "inputFormat", "outputFormat");

@Test
public void testAssertPartitionMetadataEquals()
{
Expand All @@ -58,38 +62,24 @@ public void testJsonRoundTrip()
assertJsonRoundTrip(createPartitionMetadata(null));
assertJsonRoundTrip(createPartitionMetadata(ORC));
assertJsonRoundTrip(createPartitionMetadata(PARQUET));
assertJsonRoundTrip(createPartitionMetadata(CUSTOM));
}

@Test
public void testDecodeFromFile()
public void testDecodeFromLegacyFile()
throws IOException
{
assertPartitionMetadataEquals(load("null"), createPartitionMetadata(null));
assertPartitionMetadataEquals(load("orc"), createPartitionMetadata(ORC));
assertPartitionMetadataEquals(load("parquet"), createPartitionMetadata(PARQUET));
}

@Test(enabled = false)
public void testEncodeToFile()
throws IOException
{
dump(createPartitionMetadata(null), "null");
dump(createPartitionMetadata(ORC), "orc");
dump(createPartitionMetadata(PARQUET), "parquet");
}

private static PartitionMetadata load(String tag)
throws IOException
{
return JSON_CODEC.fromBytes(Files.readAllBytes(Paths.get(BASE_DIR, format(FILE_NAME_FORMAT, tag))));
}

private static void dump(PartitionMetadata partition, String tag)
throws IOException
{
Files.write(Paths.get(BASE_DIR, format(FILE_NAME_FORMAT, tag)), JSON_CODEC.toBytes(partition));
}

private static void assertJsonRoundTrip(PartitionMetadata partition)
{
PartitionMetadata decoded = JSON_CODEC.fromJson(JSON_CODEC.toJson(partition));
Expand All @@ -108,14 +98,15 @@ private static void assertPartitionMetadataEquals(PartitionMetadata actual, Part
assertEquals(actual.getColumnStatistics(), expected.getColumnStatistics());
assertEquals(actual.isEligibleToIgnore(), expected.isEligibleToIgnore());
assertEquals(actual.isSealedPartition(), expected.isSealedPartition());
assertEquals(actual.getPartitionStorageFormat(), expected.getPartitionStorageFormat(), STORAGE_FORMAT_NOT_EQUALS);
}

private static PartitionMetadata createPartitionMetadata(HiveStorageFormat format)
private static PartitionMetadata createPartitionMetadata(StorageFormat format)
{
return new PartitionMetadata(
ImmutableList.of(column("col1"), column("col2")),
ImmutableMap.of("param1", "value1", "param2", "value2"),
Optional.ofNullable(format),
format,
Optional.empty(),
ImmutableMap.of(),
ImmutableMap.of(),
Expand Down
Loading

0 comments on commit 92d46bd

Please sign in to comment.