Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix reading Delta Table metadata from Glue #12164

Merged
merged 2 commits into from
Apr 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ public Optional<String> getOptionalLocation()

public String getLocation()
{
// Default getter requires location to be set. Location is not set only in rare case when in CREATE TABLE flow in Hive connector when
// delegate-transactional-managed-table-location-to-metastore is set to true and location is determined by HMS.
// Default getter requires location to be set. Location is not set in the following scenarios:
// 1. CREATE TABLE flow in Hive connector sets delegate-transactional-managed-table-location-to-metastore to true and location is determined by HMS.
// 2. Iceberg format tables
// 3. Delta format tables (sometimes)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I double checked this today. Looks like we always expect the path to be in the storage descriptor. We could change that for Managed tables, but right now it's required.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alexjo2144, do you want me to remove the assumption that storage won't be specified for delta tables then?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this change is fine, I made an Issue for follow up #12180

return location.orElseThrow();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
import static io.trino.spi.security.PrincipalType.USER;
import static java.util.Comparator.comparing;
import static java.util.Objects.requireNonNull;
import static java.util.function.Predicate.not;
import static java.util.function.UnaryOperator.identity;
import static java.util.stream.Collectors.toCollection;
import static java.util.stream.Collectors.toMap;
Expand Down Expand Up @@ -578,9 +579,10 @@ public void dropTable(String databaseName, String tableName, boolean deleteData)
throw new TrinoException(HIVE_METASTORE_ERROR, e);
}

String tableLocation = table.getStorage().getLocation();
if (deleteData && isManagedTable(table) && !isNullOrEmpty(tableLocation)) {
deleteDir(hdfsContext, hdfsEnvironment, new Path(tableLocation), true);
Optional<String> location = table.getStorage().getOptionalLocation()
.filter(not(String::isEmpty));
if (deleteData && isManagedTable(table) && location.isPresent()) {
deleteDir(hdfsContext, hdfsEnvironment, new Path(location.get()), true);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,15 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab
.setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText()))
.setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText()));

if (isIcebergTable(tableParameters) || isDeltaLakeTable(tableParameters)) {
// Iceberg and Delta Lake tables do not use the StorageDescriptor field, but we need to return a Table so the caller can check that
// the table is an Iceberg/Delta table and decide whether to redirect or fail.
StorageDescriptor sd = glueTable.getStorageDescriptor();

if (isIcebergTable(tableParameters) || (sd == null && isDeltaLakeTable(tableParameters))) {
// Iceberg tables do not need to read the StorageDescriptor field, but we still need to return dummy properties for compatibility
// Delta Lake tables only need to provide a dummy properties if a StorageDescriptor was not explicitly configured.
tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty())));
tableBuilder.getStorageBuilder().setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.PARQUET));
}
else {
StorageDescriptor sd = glueTable.getStorageDescriptor();
if (sd == null) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Table StorageDescriptor is null for table %s.%s (%s)", dbName, glueTable.getName(), glueTable));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.util.Optional;

import static com.amazonaws.util.CollectionUtils.isNullOrEmpty;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.plugin.hive.HiveType.HIVE_STRING;
import static io.trino.plugin.hive.metastore.glue.TestingMetastoreObjects.getGlueTestColumn;
import static io.trino.plugin.hive.metastore.glue.TestingMetastoreObjects.getGlueTestDatabase;
Expand Down Expand Up @@ -215,6 +216,15 @@ public void testTableNullParameters()
assertNotNull(trinoTable.getStorage().getSerdeParameters());
}

@Test
public void testIcebergTableNullStorageDescriptor()
{
testTable.setParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE));
testTable.setStorageDescriptor(null);
io.trino.plugin.hive.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName());
assertEquals(trinoTable.getDataColumns().size(), 1);
}

@Test
public void testIcebergTableNonNullStorageDescriptor()
{
Expand All @@ -224,13 +234,28 @@ public void testIcebergTableNonNullStorageDescriptor()
assertEquals(trinoTable.getDataColumns().size(), 1);
}

@Test
public void testDeltaTableNullStorageDescriptor()
{
testTable.setParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER));
testTable.setStorageDescriptor(null);
io.trino.plugin.hive.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName());
assertEquals(trinoTable.getDataColumns().size(), 1);
}

@Test
public void testDeltaTableNonNullStorageDescriptor()
{
testTable.setParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER));
assertNotNull(testTable.getStorageDescriptor());
io.trino.plugin.hive.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName());
assertEquals(trinoTable.getDataColumns().size(), 1);
assertEquals(
trinoTable.getDataColumns().stream()
.map(Column::getName)
.collect(toImmutableSet()),
testTable.getStorageDescriptor().getColumns().stream()
.map(com.amazonaws.services.glue.model.Column::getName)
.collect(toImmutableSet()));
}

@Test
Expand Down