From 243d4860911c6dd57fc5756767306646d5e1b00a Mon Sep 17 00:00:00 2001 From: Kumud Kumar Srivatsava Tirupati Date: Mon, 27 Jun 2022 17:32:21 +0530 Subject: [PATCH] fix for updateTableParameters which is not excluding partition columns and updateTableProperties boolean check --- .../aws/sync/AWSGlueCatalogSyncClient.java | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java index f322ef79cafe..d3a2ea5cbe69 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java @@ -64,9 +64,9 @@ import java.util.stream.Collectors; import static org.apache.hudi.aws.utils.S3Utils.s3aToS3; -import static org.apache.hudi.common.util.MapUtils.nonEmpty; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE; +import static org.apache.hudi.common.util.MapUtils.isNullOrEmpty; import static org.apache.hudi.hive.util.HiveSchemaUtil.getPartitionKeyType; import static org.apache.hudi.hive.util.HiveSchemaUtil.parquetSchemaToMapSchema; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; @@ -193,7 +193,7 @@ public void dropPartitions(String tableName, List partitionsToDrop) { */ @Override public void updateTableProperties(String tableName, Map tableProperties) { - if (nonEmpty(tableProperties)) { + if (isNullOrEmpty(tableProperties)) { return; } try { @@ -210,10 +210,7 @@ public void updateTableSchema(String tableName, MessageType newSchema) { try { Table table = getTable(awsGlue, databaseName, tableName); Map newSchemaMap = parquetSchemaToMapSchema(newSchema, config.getBoolean(HIVE_SUPPORT_TIMESTAMP_TYPE), false); - List newColumns = newSchemaMap.keySet().stream().map(key -> { - String keyType = getPartitionKeyType(newSchemaMap, key); - return new Column().withName(key).withType(keyType.toLowerCase()).withComment(""); - }).collect(Collectors.toList()); + List newColumns = getColumnsFromSchema(newSchemaMap); StorageDescriptor sd = table.getStorageDescriptor(); sd.setColumns(newColumns); @@ -258,15 +255,7 @@ public void createTable(String tableName, try { Map mapSchema = parquetSchemaToMapSchema(storageSchema, config.getBoolean(HIVE_SUPPORT_TIMESTAMP_TYPE), false); - List schemaWithoutPartitionKeys = new ArrayList<>(); - for (String key : mapSchema.keySet()) { - String keyType = getPartitionKeyType(mapSchema, key); - Column column = new Column().withName(key).withType(keyType.toLowerCase()).withComment(""); - // In Glue, the full schema should exclude the partition keys - if (!config.getSplitStrings(META_SYNC_PARTITION_FIELDS).contains(key)) { - schemaWithoutPartitionKeys.add(column); - } - } + List schemaWithoutPartitionKeys = getColumnsFromSchema(mapSchema); // now create the schema partition List schemaPartitionKeys = config.getSplitStrings(META_SYNC_PARTITION_FIELDS).stream().map(partitionKey -> { @@ -419,6 +408,19 @@ public void deleteLastReplicatedTimeStamp(String tableName) { throw new UnsupportedOperationException("Not supported: `deleteLastReplicatedTimeStamp`"); } + private List getColumnsFromSchema(Map mapSchema) { + List cols = new ArrayList<>(); + for (String key : mapSchema.keySet()) { + // In Glue, the full schema should exclude the partition keys + if (!syncConfig.partitionFields.contains(key)) { + String keyType = getPartitionKeyType(mapSchema, key); + Column column = new Column().withName(key).withType(keyType.toLowerCase()).withComment(""); + cols.add(column); + } + } + return cols; + } + private enum TableType { MANAGED_TABLE, EXTERNAL_TABLE,