Skip to content

Commit

Permalink
Abstain from passing UUID predicates to Iceberg library
Browse files Browse the repository at this point in the history
Trino and Iceberg seem to order UUID values differently.
  • Loading branch information
findepi committed Jun 21, 2022
1 parent bb163dd commit 0c7fc7a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@
import static io.trino.spi.connector.RetryMode.NO_RETRIES;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.DateTimeEncoding.unpackMillisUtc;
import static io.trino.spi.type.UuidType.UUID;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.function.Function.identity;
Expand Down Expand Up @@ -1728,7 +1729,9 @@ public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(C
IcebergColumnHandle columnHandle = (IcebergColumnHandle) column;
// Iceberg metadata columns can not be used to filter a table scan in Iceberg library
// TODO (https://github.com/trinodb/trino/issues/8759) structural types cannot be used to filter a table scan in Iceberg library.
if (isMetadataColumnId(columnHandle.getId()) || isStructuralType(columnHandle.getType())) {
if (isMetadataColumnId(columnHandle.getId()) || isStructuralType(columnHandle.getType()) ||
// Iceberg orders UUID values differently than Trino (perhaps due to https://bugs.openjdk.org/browse/JDK-7025832), so allow only IS NULL / IS NOT NULL checks
(columnHandle.getType() == UUID && !(domain.isOnlyNull() || domain.getValues().isAll()))) {
unsupported.put(columnHandle, domain);
}
else if (canEnforceColumnConstraintInAllSpecs(typeOperators, icebergTable, columnHandle, domain)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,31 +581,18 @@ private void testSelectOrPartitionedByUuid(boolean partitioned)
assertQuery(format("SELECT * FROM %s WHERE x = UUID 'f79c3e09-677c-4bbd-a479-3f349cb785e7'", tableName), "SELECT CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID), 67890");
assertQuery(
format("SELECT * FROM %s WHERE x >= UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'", tableName),
(format == ORC && partitioned || format == PARQUET)
// TODO (https://github.com/trinodb/trino/issues/12834): reading Parquet, or partitioned ORC, with UUID filter yields incorrect results
? "VALUES (CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID), 12345)"
: "VALUES (CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID), 67890), (CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID), 12345)");
"VALUES (CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID), 67890), (CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID), 12345)");
assertQuery(
format("SELECT * FROM %s WHERE x >= UUID 'f79c3e09-677c-4bbd-a479-3f349cb785e7'", tableName),
partitioned
? "VALUES (CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID), 67890), (CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID), 12345)"
: "SELECT CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID), 67890");
"SELECT CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID), 67890");
assertQuery(format("SELECT * FROM %s WHERE x IS NULL", tableName), "SELECT NULL, 7531");
assertQuery(format("SELECT x FROM %s WHERE y = 12345", tableName), "SELECT CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID)");
assertQuery(format("SELECT x FROM %s WHERE y = 67890", tableName), "SELECT CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID)");
assertQuery(format("SELECT x FROM %s WHERE y = 7531", tableName), "SELECT NULL");

assertUpdate(format("INSERT INTO %s VALUES (UUID '206caec7-68b9-4778-81b2-a12ece70c8b1', 313), (UUID '906caec7-68b9-4778-81b2-a12ece70c8b1', 314)", tableName), 2);
assertThat(query("SELECT y FROM " + tableName + " WHERE x >= UUID '206caec7-68b9-4778-81b2-a12ece70c8b1'"))
.matches(
(partitioned)
// TODO (https://github.com/trinodb/trino/issues/12834): reading Parquet with UUID filter yields incorrect results
? "VALUES BIGINT '12345', 313"
: ((format == PARQUET)
// TODO (https://github.com/trinodb/trino/issues/12834): reading Parquet with UUID filter yields incorrect results
? "VALUES BIGINT '12345'"
// this one is correct
: "VALUES BIGINT '12345', 67890, 313, 314"));
.matches("VALUES BIGINT '12345', 67890, 313, 314");

assertUpdate("DROP TABLE " + tableName);
}
Expand Down Expand Up @@ -2133,28 +2120,12 @@ protected void testBucketTransformForType(
.isNotFullyPushedDown(FilterNode.class); // this could be subsumed

// Bucketing transform doesn't allow comparison filter elimination
if (format == PARQUET && type.equals("UUID")) {
// TODO (https://github.com/trinodb/trino/issues/12834): reading Parquet with UUID filter yields incorrect results
assertThatThrownBy(() -> assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + value)).isNotFullyPushedDown(FilterNode.class))
.isInstanceOf(AssertionError.class)
.hasMessageMatching("(?s)\\[Rows for query .*to contain exactly in any order:.*but could not find the following elements:.*");
}
else {
assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + value))
.isNotFullyPushedDown(FilterNode.class);
}
assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + value))
.isNotFullyPushedDown(FilterNode.class);
assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + greaterValueInSameBucket))
.isNotFullyPushedDown(FilterNode.class);
if (format == PARQUET && type.equals("UUID")) {
// TODO (https://github.com/trinodb/trino/issues/12834): reading Parquet with UUID filter yields incorrect results
assertThatThrownBy(() -> assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + valueInOtherBucket)).isNotFullyPushedDown(FilterNode.class))
.isInstanceOf(AssertionError.class)
.hasMessageMatching("(?s)\\[Rows for query .*to contain exactly in any order:.*but could not find the following elements:.*");
}
else {
assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + valueInOtherBucket))
.isNotFullyPushedDown(FilterNode.class);
}
assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + valueInOtherBucket))
.isNotFullyPushedDown(FilterNode.class);

dropTable(tableName);
}
Expand Down

0 comments on commit 0c7fc7a

Please sign in to comment.