trinodb · findepi · Apr 3, 2023 · Mar 29, 2023
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveBucketProperty.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveBucketProperty.java
@@ -30,6 +30,7 @@
 import static com.google.common.base.MoreObjects.toStringHelper;
 import static com.google.common.collect.ImmutableList.toImmutableList;
 import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA;
+import static java.util.Locale.ENGLISH;
 import static java.util.Objects.requireNonNull;
 
 public class HiveBucketProperty
@@ -70,7 +71,11 @@ public static Optional<HiveBucketProperty> fromStorageDescriptor(Map<String, Str
                     .collect(toImmutableList());
         }
         BucketingVersion bucketingVersion = HiveBucketing.getBucketingVersion(tableParameters);
-        return Optional.of(new HiveBucketProperty(storageDescriptor.getBucketCols(), bucketingVersion, storageDescriptor.getNumBuckets(), sortedBy));
+        List<String> bucketColumnNames = storageDescriptor.getBucketCols().stream()
+                // Ensure that the names used for the bucket columns are specified in lower case to match the names of the table columns
+                .map(name -> name.toLowerCase(ENGLISH))
+                .collect(toImmutableList());
+        return Optional.of(new HiveBucketProperty(bucketColumnNames, bucketingVersion, storageDescriptor.getNumBuckets(), sortedBy));
     }
 
     @JsonProperty

diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SortingColumn.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SortingColumn.java
@@ -20,6 +20,7 @@
 
 import javax.annotation.concurrent.Immutable;
 
+import java.util.Locale;
 import java.util.Objects;
 
 import static com.google.common.base.MoreObjects.toStringHelper;
@@ -92,7 +93,9 @@ public Order getOrder()
 
     public static SortingColumn fromMetastoreApiOrder(io.trino.hive.thrift.metastore.Order order, String tablePartitionName)
     {
-        return new SortingColumn(order.getCol(), Order.fromMetastoreApiOrder(order.getOrder(), tablePartitionName));
+        // Ensure that the names used for the bucket columns are specified in lower case to match the names of the table columns
+        String orderColumnName = order.getCol().toLowerCase(Locale.ENGLISH);
+        return new SortingColumn(orderColumnName, Order.fromMetastoreApiOrder(order.getOrder(), tablePartitionName));
     }
 
     @Override

diff --git a/...o-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSparkCompatibility.java b/...o-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSparkCompatibility.java
@@ -216,6 +216,33 @@ public void testSparkParquetTimestampCompatibility(String sparkTimestampFormat,
         onSpark().executeQuery("DROP TABLE " + sparkTableName);
     }
 
+    @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
+    public void testSparkClusteringCaseSensitiveCompatibility()
+    {
+        String sparkTableNameWithClusteringDifferentCase = "test_spark_clustering_case_sensitive_" + randomNameSuffix();
+        onSpark().executeQuery(
+                String.format("CREATE TABLE %s (row_id int, `segment_id` int, value long) ", sparkTableNameWithClusteringDifferentCase) +
+                        "USING PARQUET " +
+                        "PARTITIONED BY (`part` string) " +
+                        "CLUSTERED BY (`SEGMENT_ID`) " +
+                        "  SORTED BY (`SEGMENT_ID`) " +
+                        "  INTO 10 BUCKETS");
+
+        onSpark().executeQuery(format("INSERT INTO %s ", sparkTableNameWithClusteringDifferentCase) +
+                "VALUES " +
+                "  (1, 1, 100, 'part1')," +
+                "  (100, 1, 123, 'part2')," +
+                "  (101, 2, 202, 'part2')");
+
+        // Ensure that Trino can successfully read from the Spark bucketed table even though the clustering
+        // column `SEGMENT_ID` is in a different case than the data column `segment_id`
+        assertThat(onTrino().executeQuery(format("SELECT * FROM %s.default.%s", TRINO_CATALOG, sparkTableNameWithClusteringDifferentCase)))
+                .containsOnly(List.of(
+                        row(1, 1, 100, "part1"),
+                        row(100, 1, 123, "part2"),
+                        row(101, 2, 202, "part2")));
+    }
+
     @Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
     public void testSparkParquetBloomFilterCompatibility()
     {