Skip to content

Commit

Permalink
Fix failure when ACID table column name collides with ACID internal n…
Browse files Browse the repository at this point in the history
…ames

Previously, HiveMetadata.columnMetadataGetter threw
an exception due to duplicated keys when the table
contains column names of ACID format columns
(e.g. operation) and it's ACID table.
  • Loading branch information
ebyhr committed May 18, 2022
1 parent 37adec9 commit 50582d2
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior;
import io.trino.plugin.hive.LocationService.WriteInfo;
import io.trino.plugin.hive.acid.AcidOperation;
import io.trino.plugin.hive.acid.AcidSchema;
import io.trino.plugin.hive.acid.AcidTransaction;
import io.trino.plugin.hive.fs.DirectoryLister;
import io.trino.plugin.hive.metastore.Column;
Expand Down Expand Up @@ -163,14 +162,9 @@
import static io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables;
import static io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics;
import static io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics;
import static io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME;
import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY;
import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR;
import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED;
import static io.trino.plugin.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME;
import static io.trino.plugin.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME;
import static io.trino.plugin.hive.HiveColumnHandle.PARTITION_COLUMN_NAME;
import static io.trino.plugin.hive.HiveColumnHandle.PATH_COLUMN_NAME;
import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn;
import static io.trino.plugin.hive.HiveColumnHandle.updateRowIdColumnHandle;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH;
Expand Down Expand Up @@ -3411,29 +3405,12 @@ private static Function<HiveColumnHandle, ColumnMetadata> columnMetadataGetter(T
}
}

// add hidden columns
builder.put(PATH_COLUMN_NAME, Optional.empty());
if (table.getStorage().getBucketProperty().isPresent()) {
builder.put(BUCKET_COLUMN_NAME, Optional.empty());
}
builder.put(FILE_SIZE_COLUMN_NAME, Optional.empty());
builder.put(FILE_MODIFIED_TIME_COLUMN_NAME, Optional.empty());
if (!table.getPartitionColumns().isEmpty()) {
builder.put(PARTITION_COLUMN_NAME, Optional.empty());
}

if (isFullAcidTable(table.getParameters())) {
for (String name : AcidSchema.ACID_COLUMN_NAMES) {
builder.put(name, Optional.empty());
}
}

Map<String, Optional<String>> columnComment = builder.buildOrThrow();

return handle -> ColumnMetadata.builder()
.setName(handle.getName())
.setType(handle.getType())
.setComment(columnComment.get(handle.getName()))
.setComment(handle.isHidden() ? Optional.empty() : columnComment.get(handle.getName()))
.setExtraInfo(Optional.ofNullable(columnExtraInfo(handle.isPartitionKey())))
.setHidden(handle.isHidden())
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8328,6 +8328,30 @@ public void testUseColumnNames(HiveStorageFormat format, boolean formatUseColumn
assertUpdate("DROP TABLE " + tableName);
}

@Test(dataProvider = "hiddenColumnNames")
public void testHiddenColumnNameConflict(String columnName)
{
try (TestTable table = new TestTable(
getQueryRunner()::execute,
"test_hidden_column_name_conflict",
format("(\"%s\" int, _bucket int, _partition int) WITH (partitioned_by = ARRAY['_partition'], bucketed_by = ARRAY['_bucket'], bucket_count = 10)", columnName))) {
assertThatThrownBy(() -> query("SELECT * FROM " + table.getName()))
.hasMessageContaining("Multiple entries with same key: " + columnName);
}
}

@DataProvider
public Object[][] hiddenColumnNames()
{
return new Object[][] {
{"$path"},
{"$bucket"},
{"$file_size"},
{"$file_modified_time"},
{"$partition"},
};
}

@Test(dataProvider = "legalUseColumnNamesProvider")
public void testUseColumnAddDrop(HiveStorageFormat format, boolean formatUseColumnNames)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,29 @@ public void testCreateAcidTable(boolean isPartitioned, BucketingType bucketingTy
});
}

@Test(groups = HIVE_TRANSACTIONAL, dataProvider = "acidFormatColumnNames")
public void testAcidTableColumnNameConflict(String columnName)
{
withTemporaryTable("acid_column_name_conflict", true, true, NONE, tableName -> {
onHive().executeQuery("CREATE TABLE " + tableName + " (`" + columnName + "` INTEGER, fcol INTEGER, partcol INTEGER) STORED AS ORC " + hiveTableProperties(ACID, NONE));
onTrino().executeQuery("INSERT INTO " + tableName + " VALUES (1, 2, 3)");
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(row(1, 2, 3));
});
}

@DataProvider
public Object[][] acidFormatColumnNames()
{
return new Object[][] {
{"operation"},
{"originalTransaction"},
{"bucket"},
{"rowId"},
{"row"},
{"currentTransaction"},
};
}

@Test(groups = HIVE_TRANSACTIONAL)
public void testSimpleUnpartitionedTransactionalInsert()
{
Expand Down

0 comments on commit 50582d2

Please sign in to comment.