Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure that bucketing and sort column names correspond to table column names #16796

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;

public class HiveBucketProperty
Expand Down Expand Up @@ -70,7 +71,11 @@ public static Optional<HiveBucketProperty> fromStorageDescriptor(Map<String, Str
.collect(toImmutableList());
}
BucketingVersion bucketingVersion = HiveBucketing.getBucketingVersion(tableParameters);
return Optional.of(new HiveBucketProperty(storageDescriptor.getBucketCols(), bucketingVersion, storageDescriptor.getNumBuckets(), sortedBy));
List<String> bucketColumnNames = storageDescriptor.getBucketCols().stream()
// Ensure that the names used for the bucket columns are specified in lower case to match the names of the table columns
.map(name -> name.toLowerCase(ENGLISH))
.collect(toImmutableList());
return Optional.of(new HiveBucketProperty(bucketColumnNames, bucketingVersion, storageDescriptor.getNumBuckets(), sortedBy));
}

@JsonProperty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import javax.annotation.concurrent.Immutable;

import java.util.Locale;
import java.util.Objects;

import static com.google.common.base.MoreObjects.toStringHelper;
Expand Down Expand Up @@ -92,7 +93,9 @@ public Order getOrder()

public static SortingColumn fromMetastoreApiOrder(io.trino.hive.thrift.metastore.Order order, String tablePartitionName)
{
return new SortingColumn(order.getCol(), Order.fromMetastoreApiOrder(order.getOrder(), tablePartitionName));
// Ensure that the names used for the bucket columns are specified in lower case to match the names of the table columns
String orderColumnName = order.getCol().toLowerCase(Locale.ENGLISH);
return new SortingColumn(orderColumnName, Order.fromMetastoreApiOrder(order.getOrder(), tablePartitionName));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,33 @@ public void testSparkParquetTimestampCompatibility(String sparkTimestampFormat,
onSpark().executeQuery("DROP TABLE " + sparkTableName);
}

@Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
public void testSparkClusteringCaseSensitiveCompatibility()
{
String sparkTableNameWithClusteringDifferentCase = "test_spark_clustering_case_sensitive_" + randomNameSuffix();
onSpark().executeQuery(
String.format("CREATE TABLE %s (row_id int, `segment_id` int, value long) ", sparkTableNameWithClusteringDifferentCase) +
"USING PARQUET " +
"PARTITIONED BY (`part` string) " +
"CLUSTERED BY (`SEGMENT_ID`) " +
" SORTED BY (`SEGMENT_ID`) " +
" INTO 10 BUCKETS");

onSpark().executeQuery(format("INSERT INTO %s ", sparkTableNameWithClusteringDifferentCase) +
"VALUES " +
" (1, 1, 100, 'part1')," +
" (100, 1, 123, 'part2')," +
" (101, 2, 202, 'part2')");

// Ensure that Trino can successfully read from the Spark bucketed table even though the clustering
// column `SEGMENT_ID` is in a different case than the data column `segment_id`
assertThat(onTrino().executeQuery(format("SELECT * FROM %s.default.%s", TRINO_CATALOG, sparkTableNameWithClusteringDifferentCase)))
.containsOnly(List.of(
row(1, 1, 100, "part1"),
row(100, 1, 123, "part2"),
row(101, 2, 202, "part2")));
}

@Test(groups = {HIVE_SPARK, PROFILE_SPECIFIC_TESTS})
public void testSparkParquetBloomFilterCompatibility()
{
Expand Down