diff --git a/core/trino-main/src/test/java/io/trino/sql/query/QueryAssertions.java b/core/trino-main/src/test/java/io/trino/sql/query/QueryAssertions.java index f8aa6f681c3e..70d7465af4c7 100644 --- a/core/trino-main/src/test/java/io/trino/sql/query/QueryAssertions.java +++ b/core/trino-main/src/test/java/io/trino/sql/query/QueryAssertions.java @@ -255,15 +255,25 @@ private String formatRowElement(Object value) static AssertProvider newQueryAssert(String query, QueryRunner runner, Session session) { MaterializedResult result = runner.execute(session, query); - return () -> new QueryAssert(runner, session, query, result); + return () -> new QueryAssert(runner, session, query, result, false, false, false); } - public QueryAssert(QueryRunner runner, Session session, String query, MaterializedResult actual) + private QueryAssert( + QueryRunner runner, + Session session, + String query, + MaterializedResult actual, + boolean ordered, + boolean skipTypesCheck, + boolean skipResultsCorrectnessCheckForPushdown) { super(actual, Object.class); this.runner = requireNonNull(runner, "runner is null"); this.session = requireNonNull(session, "session is null"); this.query = requireNonNull(query, "query is null"); + this.ordered = ordered; + this.skipTypesCheck = skipTypesCheck; + this.skipResultsCorrectnessCheckForPushdown = skipResultsCorrectnessCheckForPushdown; } public QueryAssert projected(int... columns) @@ -282,7 +292,10 @@ public QueryAssert projected(int... columns) .collect(toImmutableList()), IntStream.of(columns) .mapToObj(actual.getTypes()::get) - .collect(toImmutableList()))); + .collect(toImmutableList())), + ordered, + skipTypesCheck, + skipResultsCorrectnessCheckForPushdown); } public QueryAssert matches(BiFunction evaluator) diff --git a/core/trino-spi/src/main/java/io/trino/spi/connector/InMemoryRecordSet.java b/core/trino-spi/src/main/java/io/trino/spi/connector/InMemoryRecordSet.java index 759d795907d3..c6523f5d17bf 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/connector/InMemoryRecordSet.java +++ b/core/trino-spi/src/main/java/io/trino/spi/connector/InMemoryRecordSet.java @@ -17,6 +17,8 @@ import io.airlift.slice.Slices; import io.trino.spi.block.Block; import io.trino.spi.type.ArrayType; +import io.trino.spi.type.LongTimestamp; +import io.trino.spi.type.LongTimestampWithTimeZone; import io.trino.spi.type.RowType; import io.trino.spi.type.Type; @@ -307,6 +309,12 @@ else if (value instanceof Block) { else if (value instanceof Slice) { completedBytes += ((Slice) value).length(); } + else if (value instanceof LongTimestamp) { + completedBytes += LongTimestamp.INSTANCE_SIZE; + } + else if (value instanceof LongTimestampWithTimeZone) { + completedBytes += LongTimestampWithTimeZone.INSTANCE_SIZE; + } else { throw new IllegalArgumentException("Unknown type: " + value.getClass()); } diff --git a/core/trino-spi/src/main/java/io/trino/spi/type/LongTimestamp.java b/core/trino-spi/src/main/java/io/trino/spi/type/LongTimestamp.java index 20bdc9bfcd8a..cbf4cedd3a1a 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/type/LongTimestamp.java +++ b/core/trino-spi/src/main/java/io/trino/spi/type/LongTimestamp.java @@ -13,6 +13,8 @@ */ package io.trino.spi.type; +import org.openjdk.jol.info.ClassLayout; + import java.util.Objects; import static io.trino.spi.type.Timestamps.formatTimestamp; @@ -20,6 +22,8 @@ public final class LongTimestamp implements Comparable { + public static final int INSTANCE_SIZE = ClassLayout.parseClass(LongTimestamp.class).instanceSize(); + private static final int PICOSECONDS_PER_MICROSECOND = 1_000_000; private final long epochMicros; diff --git a/core/trino-spi/src/main/java/io/trino/spi/type/LongTimestampWithTimeZone.java b/core/trino-spi/src/main/java/io/trino/spi/type/LongTimestampWithTimeZone.java index 995e9bf9091e..ae5883637f31 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/type/LongTimestampWithTimeZone.java +++ b/core/trino-spi/src/main/java/io/trino/spi/type/LongTimestampWithTimeZone.java @@ -13,6 +13,8 @@ */ package io.trino.spi.type; +import org.openjdk.jol.info.ClassLayout; + import java.util.Objects; import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MILLISECOND; @@ -20,6 +22,8 @@ public final class LongTimestampWithTimeZone implements Comparable { + public static final int INSTANCE_SIZE = ClassLayout.parseClass(LongTimestampWithTimeZone.class).instanceSize(); + private final long epochMillis; private final int picosOfMilli; // number of picoseconds of the millisecond corresponding to epochMillis private final short timeZoneKey; diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java index d9f4896957c7..ca6e2dbcdd1d 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java @@ -274,8 +274,7 @@ public ConnectorTableProperties getTableProperties(ConnectorSession session, Con Object prestoValue = deserializePartitionValue( column.getType(), partitionColumnValueStrings.get(columnId).orElse(null), - column.getName(), - session.getTimeZoneKey()); + column.getName()); return NullableValue.of(column.getType(), prestoValue); })); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSource.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSource.java index 7ba244267602..d0938c5c3ddd 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSource.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSource.java @@ -19,7 +19,6 @@ import io.trino.spi.block.RunLengthEncodedBlock; import io.trino.spi.connector.ConnectorPageSource; import io.trino.spi.predicate.Utils; -import io.trino.spi.type.TimeZoneKey; import io.trino.spi.type.Type; import java.io.IOException; @@ -44,8 +43,7 @@ public class IcebergPageSource public IcebergPageSource( List columns, Map> partitionKeys, - ConnectorPageSource delegate, - TimeZoneKey timeZoneKey) + ConnectorPageSource delegate) { int size = requireNonNull(columns, "columns is null").size(); requireNonNull(partitionKeys, "partitionKeys is null"); @@ -60,7 +58,7 @@ public IcebergPageSource( if (partitionKeys.containsKey(column.getId())) { String partitionValue = partitionKeys.get(column.getId()).orElse(null); Type type = column.getType(); - Object prefilledValue = deserializePartitionValue(type, partitionValue, column.getName(), timeZoneKey); + Object prefilledValue = deserializePartitionValue(type, partitionValue, column.getName()); prefilledBlocks[outputIndex] = Utils.nativeValueToBlock(type, prefilledValue); delegateIndexes[outputIndex] = -1; } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java index e4ed3cc28baa..06db4cc8a373 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java @@ -186,7 +186,7 @@ public ConnectorPageSource createPageSource( regularColumns, effectivePredicate); - return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource, session.getTimeZoneKey()); + return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource); } private ConnectorPageSource createDataPageSource( diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitManager.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitManager.java index 4168ed5df985..14d54ba80476 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitManager.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitManager.java @@ -84,7 +84,6 @@ public ConnectorSplitSource getSplits( identityPartitionColumns, tableScan, dynamicFilter, - session.getTimeZoneKey(), dynamicFilteringWaitTimeout); return new ClassLoaderSafeConnectorSplitSource(splitSource, Thread.currentThread().getContextClassLoader()); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java index b373d2e19888..4182fb7c8ed8 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergSplitSource.java @@ -28,7 +28,6 @@ import io.trino.spi.predicate.Range; import io.trino.spi.predicate.TupleDomain; import io.trino.spi.predicate.ValueSet; -import io.trino.spi.type.TimeZoneKey; import org.apache.iceberg.CombinedScanTask; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.TableScan; @@ -49,6 +48,7 @@ import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; +import static io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino; import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; import static io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes; @@ -70,7 +70,6 @@ public class IcebergSplitSource private final TableScan tableScan; private final Map fieldIdToType; private final DynamicFilter dynamicFilter; - private final TimeZoneKey sessionZone; private final long dynamicFilteringWaitTimeoutMillis; private final Stopwatch dynamicFilterWaitStopwatch; @@ -83,7 +82,6 @@ public IcebergSplitSource( Set identityPartitionColumns, TableScan tableScan, DynamicFilter dynamicFilter, - TimeZoneKey sessionZone, Duration dynamicFilteringWaitTimeout) { this.tableHandle = requireNonNull(tableHandle, "tableHandle is null"); @@ -91,7 +89,6 @@ public IcebergSplitSource( this.tableScan = requireNonNull(tableScan, "tableScan is null"); this.fieldIdToType = primitiveFieldTypes(tableScan.schema()); this.dynamicFilter = requireNonNull(dynamicFilter, "dynamicFilter is null"); - this.sessionZone = requireNonNull(sessionZone, "sessionZone is null"); this.dynamicFilteringWaitTimeoutMillis = requireNonNull(dynamicFilteringWaitTimeout, "dynamicFilteringWaitTimeout is null").toMillis(); this.dynamicFilterWaitStopwatch = Stopwatch.createStarted(); } @@ -158,8 +155,7 @@ public CompletableFuture getNextBatch(ConnectorPartitionHan if (!partitionMatchesPredicate( identityPartitionColumns, icebergSplit.getPartitionKeys(), - dynamicFilterPredicate, - sessionZone)) { + dynamicFilterPredicate)) { continue; } if (!fileMatchesPredicate( @@ -246,16 +242,16 @@ private static Domain domainForStatistics(io.trino.spi.type.Type type, Object lo if (lowerBound != null && upperBound != null) { statisticsRange = Range.range( type, - PartitionTable.convert(lowerBound, icebergType), + convertIcebergValueToTrino(icebergType, lowerBound), true, - PartitionTable.convert(upperBound, icebergType), + convertIcebergValueToTrino(icebergType, upperBound), true); } else if (upperBound != null) { - statisticsRange = Range.lessThanOrEqual(type, PartitionTable.convert(upperBound, icebergType)); + statisticsRange = Range.lessThanOrEqual(type, convertIcebergValueToTrino(icebergType, upperBound)); } else { - statisticsRange = Range.greaterThanOrEqual(type, PartitionTable.convert(lowerBound, icebergType)); + statisticsRange = Range.greaterThanOrEqual(type, convertIcebergValueToTrino(icebergType, lowerBound)); } return Domain.create(ValueSet.ofRanges(statisticsRange), containsNulls); } @@ -264,8 +260,7 @@ else if (upperBound != null) { static boolean partitionMatchesPredicate( Set identityPartitionColumns, Map> partitionKeys, - TupleDomain dynamicFilterPredicate, - TimeZoneKey timeZoneKey) + TupleDomain dynamicFilterPredicate) { if (dynamicFilterPredicate.isNone()) { return false; @@ -275,7 +270,7 @@ static boolean partitionMatchesPredicate( for (IcebergColumnHandle partitionColumn : identityPartitionColumns) { Domain allowedDomain = domains.get(partitionColumn); if (allowedDomain != null) { - Object partitionValue = deserializePartitionValue(partitionColumn.getType(), partitionKeys.get(partitionColumn.getId()).orElse(null), partitionColumn.getName(), timeZoneKey); + Object partitionValue = deserializePartitionValue(partitionColumn.getType(), partitionKeys.get(partitionColumn.getId()).orElse(null), partitionColumn.getName()); if (!allowedDomain.includesNullableValue(partitionValue)) { return false; } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTypes.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTypes.java new file mode 100644 index 000000000000..f058ef18a827 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTypes.java @@ -0,0 +1,101 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg; + +import io.trino.spi.type.DecimalType; +import io.trino.spi.type.Decimals; +import io.trino.spi.type.UuidType; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.util.UUID; + +import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.plugin.iceberg.util.Timestamps.timestampTzFromMicros; +import static io.trino.spi.type.Decimals.isShortDecimal; +import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND; + +public final class IcebergTypes +{ + private IcebergTypes() {} + + /** + * Convert value from Iceberg representation to Trino representation. + */ + public static Object convertIcebergValueToTrino(Type icebergType, Object value) + { + if (value == null) { + return null; + } + if (icebergType instanceof Types.BooleanType) { + //noinspection RedundantCast + return (Boolean) value; + } + if (icebergType instanceof Types.IntegerType) { + return ((Integer) value).longValue(); + } + if (icebergType instanceof Types.LongType) { + //noinspection RedundantCast + return (Long) value; + } + if (icebergType instanceof Types.FloatType) { + return (long) Float.floatToIntBits((Float) value); + } + if (icebergType instanceof Types.DoubleType) { + //noinspection RedundantCast + return (Double) value; + } + if (icebergType instanceof Types.DecimalType) { + Types.DecimalType icebergDecimalType = (Types.DecimalType) icebergType; + DecimalType trinoDecimalType = DecimalType.createDecimalType(icebergDecimalType.precision(), icebergDecimalType.scale()); + if (isShortDecimal(trinoDecimalType)) { + return Decimals.encodeShortScaledValue((BigDecimal) value, trinoDecimalType.getScale()); + } + return Decimals.encodeScaledValue((BigDecimal) value, trinoDecimalType.getScale()); + } + if (icebergType instanceof Types.StringType) { + // Partition values are passed as String, but min/max values are passed as a CharBuffer + if (value instanceof CharBuffer) { + value = new String(((CharBuffer) value).array()); + } + return utf8Slice(((String) value)); + } + if (icebergType instanceof Types.BinaryType) { + // TODO the client sees the bytearray's tostring ouput instead of seeing actual bytes, needs to be fixed. + // TODO return Slice + return ((ByteBuffer) value).array().clone(); + } + if (icebergType instanceof Types.DateType) { + return ((Integer) value).longValue(); + } + if (icebergType instanceof Types.TimeType) { + return Math.multiplyExact((Long) value, PICOSECONDS_PER_MICROSECOND); + } + if (icebergType instanceof Types.TimestampType) { + long epochMicros = (long) value; + if (((Types.TimestampType) icebergType).shouldAdjustToUTC()) { + return timestampTzFromMicros(epochMicros); + } + return epochMicros; + } + if (icebergType instanceof Types.UUIDType) { + return UuidType.javaUuidToTrinoUuid((UUID) value); + } + + throw new UnsupportedOperationException("Unsupported iceberg type: " + icebergType); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java index 32e06c265f16..b45ed04f248e 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java @@ -26,7 +26,6 @@ import io.trino.spi.connector.SchemaTableName; import io.trino.spi.type.DecimalType; import io.trino.spi.type.Decimals; -import io.trino.spi.type.TimeZoneKey; import io.trino.spi.type.Type; import io.trino.spi.type.TypeManager; import io.trino.spi.type.UuidType; @@ -231,7 +230,7 @@ private static String quotedName(String name) return '"' + name.replace("\"", "\"\"") + '"'; } - public static Object deserializePartitionValue(Type type, String valueString, String name, TimeZoneKey timeZoneKey) + public static Object deserializePartitionValue(Type type, String valueString, String name) { if (valueString == null) { return null; @@ -269,7 +268,7 @@ public static Object deserializePartitionValue(Type type, String valueString, St return parseLong(valueString); } if (type.equals(TIMESTAMP_TZ_MICROS)) { - return timestampTzFromMicros(parseLong(valueString), timeZoneKey); + return timestampTzFromMicros(parseLong(valueString)); } if (type instanceof VarcharType) { Slice value = utf8Slice(valueString); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/PartitionTable.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/PartitionTable.java index c96759e0b986..d385e78297df 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/PartitionTable.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/PartitionTable.java @@ -25,10 +25,7 @@ import io.trino.spi.connector.SchemaTableName; import io.trino.spi.connector.SystemTable; import io.trino.spi.predicate.TupleDomain; -import io.trino.spi.type.DecimalType; -import io.trino.spi.type.Decimals; import io.trino.spi.type.RowType; -import io.trino.spi.type.TimeZoneKey; import io.trino.spi.type.TypeManager; import io.trino.spi.type.TypeUtils; import org.apache.iceberg.DataFile; @@ -45,9 +42,6 @@ import java.io.IOException; import java.io.UncheckedIOException; -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -56,15 +50,12 @@ import java.util.Set; import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino; import static io.trino.plugin.iceberg.IcebergUtil.getIdentityPartitions; import static io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes; import static io.trino.plugin.iceberg.Partition.convertBounds; import static io.trino.plugin.iceberg.TypeConverter.toTrinoType; -import static io.trino.plugin.iceberg.util.Timestamps.timestampTzFromMicros; import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.Decimals.isShortDecimal; -import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toSet; @@ -223,7 +214,7 @@ private RecordCursor buildRecordCursor(Map partiti // add data for partition columns for (int i = 0; i < partitionColumnTypes.size(); i++) { - row.add(convert(partition.getValues().get(i, partitionColumnClass.get(i)), partitionTypes.get(i))); + row.add(convertIcebergValueToTrino(partitionTypes.get(i), partition.getValues().get(i, partitionColumnClass.get(i)))); } // add the top level metrics. @@ -239,8 +230,8 @@ private RecordCursor buildRecordCursor(Map partiti } Integer fieldId = nonPartitionPrimitiveColumns.get(i).fieldId(); Type.PrimitiveType type = idToTypeMapping.get(fieldId); - Object min = convert(partition.getMinValues().get(fieldId), type); - Object max = convert(partition.getMaxValues().get(fieldId), type); + Object min = convertIcebergValueToTrino(type, partition.getMinValues().get(fieldId)); + Object max = convertIcebergValueToTrino(type, partition.getMaxValues().get(fieldId)); Long nullCount = partition.getNullCounts().get(fieldId); row.add(getColumnMetricBlock(columnMetricTypes.get(i), min, max, nullCount)); } @@ -274,51 +265,4 @@ private static Block getColumnMetricBlock(RowType columnMetricType, Object min, rowBlockBuilder.closeEntry(); return columnMetricType.getObject(rowBlockBuilder, 0); } - - /** - * Convert value from Iceberg representation to Trino representation. - */ - public static Object convert(Object value, Type type) - { - if (value == null) { - return null; - } - if (type instanceof Types.StringType) { - // Partition values are passed as String, but min/max values are passed as a CharBuffer - if (value instanceof CharBuffer) { - value = new String(((CharBuffer) value).array()); - } - return utf8Slice(((String) value)); - } - if (type instanceof Types.BinaryType) { - // TODO the client sees the bytearray's tostring ouput instead of seeing actual bytes, needs to be fixed. - return ((ByteBuffer) value).array().clone(); - } - if (type instanceof Types.TimestampType) { - long epochMicros = (long) value; - if (((Types.TimestampType) type).shouldAdjustToUTC()) { - return timestampTzFromMicros(epochMicros, TimeZoneKey.UTC_KEY); - } - return epochMicros; - } - if (type instanceof Types.TimeType) { - return Math.multiplyExact((Long) value, PICOSECONDS_PER_MICROSECOND); - } - if (type instanceof Types.FloatType) { - return (long) Float.floatToIntBits((Float) value); - } - if (type instanceof Types.IntegerType || type instanceof Types.DateType) { - return ((Integer) value).longValue(); - } - if (type instanceof Types.DecimalType) { - Types.DecimalType icebergDecimalType = (Types.DecimalType) type; - DecimalType trinoDecimalType = DecimalType.createDecimalType(icebergDecimalType.precision(), icebergDecimalType.scale()); - if (isShortDecimal(trinoDecimalType)) { - return Decimals.encodeShortScaledValue((BigDecimal) value, trinoDecimalType.getScale()); - } - return Decimals.encodeScaledValue((BigDecimal) value, trinoDecimalType.getScale()); - } - // TODO implement explicit conversion for all supported types - return value; - } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TableStatisticsMaker.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TableStatisticsMaker.java index ad60000afff3..d58fd132d924 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TableStatisticsMaker.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TableStatisticsMaker.java @@ -46,11 +46,11 @@ import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; +import static io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino; import static io.trino.plugin.iceberg.IcebergUtil.getColumns; import static io.trino.plugin.iceberg.IcebergUtil.getIdentityPartitions; import static io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes; import static io.trino.plugin.iceberg.Partition.convertBounds; -import static io.trino.plugin.iceberg.PartitionTable.convert; import static io.trino.plugin.iceberg.TypeConverter.toTrinoType; import static java.util.Objects.requireNonNull; import static java.util.function.Function.identity; @@ -186,7 +186,7 @@ private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Cons Object min = summary.getMinValues().get(fieldId); Object max = summary.getMaxValues().get(fieldId); if (min != null && max != null) { - columnBuilder.setRange(DoubleRange.from(columnHandle.getType(), convert(min, icebergType), convert(max, icebergType))); + columnBuilder.setRange(DoubleRange.from(columnHandle.getType(), convertIcebergValueToTrino(icebergType, min), convertIcebergValueToTrino(icebergType, max))); } columnHandleBuilder.put(columnHandle, columnBuilder.build()); } @@ -212,7 +212,7 @@ private boolean dataFileMatches( int fieldId = field.sourceId(); ColumnFieldDetails details = fieldDetails.get(fieldId); IcebergColumnHandle column = details.getColumnHandle(); - Object value = convert(dataFile.partition().get(index, details.getJavaClass()), details.getIcebergType()); + Object value = convertIcebergValueToTrino(details.getIcebergType(), dataFile.partition().get(index, details.getJavaClass())); Domain allowedDomain = domains.get(column); if (allowedDomain != null && !allowedDomain.includesNullableValue(value)) { return false; diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/util/Timestamps.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/util/Timestamps.java index c23721db5f23..6c5703d725c8 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/util/Timestamps.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/util/Timestamps.java @@ -15,8 +15,8 @@ import io.trino.spi.block.Block; import io.trino.spi.type.LongTimestampWithTimeZone; -import io.trino.spi.type.TimeZoneKey; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MICROS; import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND; import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND; @@ -34,11 +34,11 @@ public static long timestampTzToMicros(LongTimestampWithTimeZone timestamp) roundDiv(timestamp.getPicosOfMilli(), PICOSECONDS_PER_MICROSECOND); } - public static LongTimestampWithTimeZone timestampTzFromMicros(long epochMicros, TimeZoneKey timeZoneKey) + public static LongTimestampWithTimeZone timestampTzFromMicros(long epochMicros) { long epochMillis = floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND); int picosOfMillis = floorMod(epochMicros, MICROSECONDS_PER_MILLISECOND) * PICOSECONDS_PER_MICROSECOND; - return LongTimestampWithTimeZone.fromEpochMillisAndFraction(epochMillis, picosOfMillis, timeZoneKey); + return LongTimestampWithTimeZone.fromEpochMillisAndFraction(epochMillis, picosOfMillis, UTC_KEY); } public static LongTimestampWithTimeZone getTimestampTz(Block block, int position) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java index 18554c369d5a..bdab0398b676 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java @@ -98,6 +98,7 @@ import static io.trino.tpch.TpchTable.LINE_ITEM; import static io.trino.transaction.TransactionBuilder.transaction; import static java.lang.String.format; +import static java.lang.String.join; import static java.util.Collections.nCopies; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.joining; @@ -386,6 +387,147 @@ private void testSelectOrPartitionedByTimestamp(boolean partitioned) dropTable(tableName); } + @Test + public void testPartitionByTimestampWithTimeZone() + { + testSelectOrPartitionedByTimestampWithTimeZone(true); + } + + @Test + public void testSelectByTimestampWithTimeZone() + { + testSelectOrPartitionedByTimestampWithTimeZone(false); + } + + private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned) + { + String tableName = format("test_%s_by_timestamptz", partitioned ? "partitioned" : "selected"); + assertUpdate(format( + "CREATE TABLE %s (_timestamptz timestamp(6) with time zone) %s", + tableName, + partitioned ? "WITH (partitioning = ARRAY['_timestamptz'])" : "")); + + String instant1Utc = "TIMESTAMP '2021-10-31 00:30:00.005000 UTC'"; + String instant1La = "TIMESTAMP '2021-10-30 17:30:00.005000 America/Los_Angeles'"; + String instant2Utc = "TIMESTAMP '2021-10-31 00:30:00.006000 UTC'"; + String instant2La = "TIMESTAMP '2021-10-30 17:30:00.006000 America/Los_Angeles'"; + String instant3Utc = "TIMESTAMP '2021-10-31 00:30:00.007000 UTC'"; + String instant3La = "TIMESTAMP '2021-10-30 17:30:00.007000 America/Los_Angeles'"; + + assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant1Utc), 1); + assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant2La /* non-UTC for this one */), 1); + assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant3Utc), 1); + assertQuery(format("SELECT COUNT(*) from %s", tableName), "SELECT 3"); + + // = + assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant1Utc))) + .matches("VALUES " + instant1Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant1La))) + .matches("VALUES " + instant1Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant2Utc))) + .matches("VALUES " + instant2Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant2La))) + .matches("VALUES " + instant2Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant3Utc))) + .matches("VALUES " + instant3Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz = %s", tableName, instant3La))) + .matches("VALUES " + instant3Utc); + + // < + assertThat(query(format("SELECT * from %s WHERE _timestamptz < %s", tableName, instant2Utc))) + .matches("VALUES " + instant1Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz < %s", tableName, instant2La))) + .matches("VALUES " + instant1Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz < %s", tableName, instant3Utc))) + .matches(format("VALUES %s, %s", instant1Utc, instant2Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz < %s", tableName, instant3La))) + .matches(format("VALUES %s, %s", instant1Utc, instant2Utc)); + + // <= + assertThat(query(format("SELECT * from %s WHERE _timestamptz <= %s", tableName, instant2Utc))) + .matches(format("VALUES %s, %s", instant1Utc, instant2Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz <= %s", tableName, instant2La))) + .matches(format("VALUES %s, %s", instant1Utc, instant2Utc)); + + // > + assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s", tableName, instant2Utc))) + .matches("VALUES " + instant3Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s", tableName, instant2La))) + .matches("VALUES " + instant3Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s", tableName, instant1Utc))) + .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s", tableName, instant1La))) + .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); + + // >= + assertThat(query(format("SELECT * from %s WHERE _timestamptz >= %s", tableName, instant2Utc))) + .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz >= %s", tableName, instant2La))) + .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); + + // open range + assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s AND _timestamptz < %s", tableName, instant1Utc, instant3Utc))) + .matches("VALUES " + instant2Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s AND _timestamptz < %s", tableName, instant1La, instant3La))) + .matches("VALUES " + instant2Utc); + + // closed range + assertThat(query(format("SELECT * from %s WHERE _timestamptz BETWEEN %s AND %s", tableName, instant1Utc, instant2Utc))) + .matches(format("VALUES %s, %s", instant1Utc, instant2Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz BETWEEN %s AND %s", tableName, instant1La, instant2La))) + .matches(format("VALUES %s, %s", instant1Utc, instant2Utc)); + + // != + assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant1Utc))) + .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant1La))) + .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant2Utc))) + .matches(format("VALUES %s, %s", instant1Utc, instant3Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz != %s", tableName, instant2La))) + .matches(format("VALUES %s, %s", instant1Utc, instant3Utc)); + + // IS DISTINCT FROM + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant1Utc))) + .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant1La))) + .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant2Utc))) + .matches(format("VALUES %s, %s", instant1Utc, instant3Utc)); + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant2La))) + .matches(format("VALUES %s, %s", instant1Utc, instant3Utc)); + + // IS NOT DISTINCT FROM + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant1Utc))) + .matches("VALUES " + instant1Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant1La))) + .matches("VALUES " + instant1Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant2Utc))) + .matches("VALUES " + instant2Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant2La))) + .matches("VALUES " + instant2Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant3Utc))) + .matches("VALUES " + instant3Utc); + assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant3La))) + .matches("VALUES " + instant3Utc); + + if (partitioned) { + assertThat(query(format("SELECT row_count, file_count, _timestamptz FROM \"%s$partitions\"", tableName))) + .matches(format("VALUES (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s)", instant1Utc, instant2Utc, instant3Utc)); + } + else { + assertThat(query(format("SELECT row_count, file_count, _timestamptz FROM \"%s$partitions\"", tableName))) + .matches(format == ORC + ? "VALUES (BIGINT '3', BIGINT '3', CAST(NULL AS row(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint)))" + : format( + "VALUES (BIGINT '3', BIGINT '3', CAST(ROW(%s, %s, 0) AS row(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint)))", + instant1Utc, + instant3Utc)); + } + + assertUpdate("DROP TABLE " + tableName); + } + @Test public void testUuid() { @@ -426,62 +568,242 @@ public void testCreatePartitionedTable() { assertUpdate("" + "CREATE TABLE test_partitioned_table (" + - " _string VARCHAR" + - ", _bigint BIGINT" + - ", _integer INTEGER" + - ", _real REAL" + - ", _double DOUBLE" + - ", _boolean BOOLEAN" + - ", _decimal_short DECIMAL(3,2)" + - ", _decimal_long DECIMAL(30,10)" + - ", _timestamp TIMESTAMP(6)" + - ", _date DATE" + + " a_boolean boolean, " + + " an_integer integer, " + + " a_bigint bigint, " + + " a_real real, " + + " a_double double, " + + " a_short_decimal decimal(5,2), " + + " a_long_decimal decimal(38,20), " + + " a_varchar varchar, " + + " a_varbinary varbinary, " + + " a_date date, " + + " a_time time(6), " + + " a_timestamp timestamp(6), " + + " a_timestamptz timestamp(6) with time zone, " + + " a_uuid uuid, " + + " a_row row(id integer , vc varchar), " + + " an_array array(varchar), " + + " a_map map(integer, varchar) " + ") " + "WITH (" + "partitioning = ARRAY[" + - " '_string'," + - " '_integer'," + - " '_bigint'," + - " '_boolean'," + - " '_real'," + - " '_double'," + - " '_decimal_short', " + - " '_decimal_long'," + - " '_timestamp'," + - " '_date']" + + " 'a_boolean', " + + " 'an_integer', " + + " 'a_bigint', " + + " 'a_real', " + + " 'a_double', " + + " 'a_short_decimal', " + + " 'a_long_decimal', " + + " 'a_varchar', " + + // " 'a_varbinary', " + TODO (https://github.com/trinodb/trino/issues/9755) this yields incorrect query results + " 'a_date', " + + " 'a_time', " + + " 'a_timestamp', " + + " 'a_timestamptz', " + + " 'a_uuid' " + + // Note: partitioning on non-primitive columns is not allowed in Iceberg + " ]" + ")"); assertQueryReturnsEmptyResult("SELECT * FROM test_partitioned_table"); - @Language("SQL") String select = "" + - "SELECT" + - " 'foo' _string" + - ", CAST(123 AS BIGINT) _bigint" + - ", 456 _integer" + - ", CAST('123.45' AS REAL) _real" + - ", CAST('3.14' AS DOUBLE) _double" + - ", true _boolean" + - ", CAST('3.14' AS DECIMAL(3,2)) _decimal_short" + - ", CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) _decimal_long" + - ", CAST('2017-05-01 10:12:34' AS TIMESTAMP) _timestamp" + - ", CAST('2017-05-01' AS DATE) _date"; - - assertUpdate(format("INSERT INTO test_partitioned_table %s", select), 1); - assertQuery("SELECT * FROM test_partitioned_table", select); + String values = "VALUES (" + + "true, " + + "1, " + + "BIGINT '1', " + + "REAL '1.0', " + + "DOUBLE '1.0', " + + "CAST(1.0 AS decimal(5,2)), " + + "CAST(11.0 AS decimal(38,20)), " + + "VARCHAR 'onefsadfdsf', " + + "X'000102f0feff', " + + "DATE '2021-07-24'," + + "TIME '02:43:57.987654', " + + "TIMESTAMP '2021-07-24 03:43:57.987654'," + + "TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + + "UUID '20050910-1330-11e9-ffff-2a86e4085a59', " + + "CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)), " + + "ARRAY[VARCHAR 'uno', 'dos', 'tres'], " + + "map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one'])) "; + + String nullValues = nCopies(17, "NULL").stream() + .collect(joining(", ", "VALUES (", ")")); + + assertUpdate("INSERT INTO test_partitioned_table " + values, 1); + assertUpdate("INSERT INTO test_partitioned_table " + nullValues, 1); + + // SELECT + assertThat(query("SELECT * FROM test_partitioned_table")) + .matches(values + " UNION ALL " + nullValues); + + // SELECT with predicates + assertThat(query("SELECT * FROM test_partitioned_table WHERE " + + " a_boolean = true " + + "AND an_integer = 1 " + + "AND a_bigint = BIGINT '1' " + + "AND a_real = REAL '1.0' " + + "AND a_double = DOUBLE '1.0' " + + "AND a_short_decimal = CAST(1.0 AS decimal(5,2)) " + + "AND a_long_decimal = CAST(11.0 AS decimal(38,20)) " + + "AND a_varchar = VARCHAR 'onefsadfdsf' " + + "AND a_varbinary = X'000102f0feff' " + + "AND a_date = DATE '2021-07-24' " + + "AND a_time = TIME '02:43:57.987654' " + + "AND a_timestamp = TIMESTAMP '2021-07-24 03:43:57.987654' " + + "AND a_timestamptz = TIMESTAMP '2021-07-24 04:43:57.987654 UTC' " + + "AND a_uuid = UUID '20050910-1330-11e9-ffff-2a86e4085a59' " + + "AND a_row = CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)) " + + "AND an_array = ARRAY[VARCHAR 'uno', 'dos', 'tres'] " + + "AND a_map = map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']) " + + "")) + .matches(values); - assertQuery( - "SELECT * FROM test_partitioned_table WHERE" + - " 'foo' = _string" + - " AND 456 = _integer" + - " AND CAST(123 AS BIGINT) = _bigint" + - " AND true = _boolean" + - " AND CAST('3.14' AS DECIMAL(3,2)) = _decimal_short" + - " AND CAST('12345678901234567890.0123456789' AS DECIMAL(30,10)) = _decimal_long" + - " AND CAST('2017-05-01 10:12:34' AS TIMESTAMP) = _timestamp" + - " AND CAST('2017-05-01' AS DATE) = _date", - select); + assertThat(query("SELECT * FROM test_partitioned_table WHERE " + + " a_boolean IS NULL " + + "AND an_integer IS NULL " + + "AND a_bigint IS NULL " + + "AND a_real IS NULL " + + "AND a_double IS NULL " + + "AND a_short_decimal IS NULL " + + "AND a_long_decimal IS NULL " + + "AND a_varchar IS NULL " + + "AND a_varbinary IS NULL " + + "AND a_date IS NULL " + + "AND a_time IS NULL " + + "AND a_timestamp IS NULL " + + "AND a_timestamptz IS NULL " + + "AND a_uuid IS NULL " + + "AND a_row IS NULL " + + "AND an_array IS NULL " + + "AND a_map IS NULL " + + "")) + .skippingTypesCheck() + .matches(nullValues); + + // SHOW STATS + if (format == ORC) { + // TODO (https://github.com/trinodb/trino/issues/9714, https://github.com/trinodb/trino/issues/9716) SHOW STATS fails with NullPointerException + assertThatThrownBy(() -> query("SHOW STATS FOR test_partitioned_table")) + .hasToString("java.lang.RuntimeException: java.lang.NullPointerException") + .hasStackTraceContaining("at io.trino.plugin.iceberg.TableStatisticsMaker.updatePartitionedStats"); + } + else { + assertThat(query("SHOW STATS FOR test_partitioned_table")) + .skippingTypesCheck() + .projected(0, 2, 3, 4, 5, 6) // ignore data size which is varying for Parquet (and not available for ORC) + .matches("VALUES " + + " ('a_boolean', NULL, 0.5e0, NULL, 'true', 'true'), " + + " ('an_integer', NULL, 0.5e0, NULL, '1', '1'), " + + " ('a_bigint', NULL, 0.5e0, NULL, '1', '1'), " + + " ('a_real', NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_double', NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_short_decimal', NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_long_decimal', NULL, 0.5e0, NULL, '11.0', '11.0'), " + + " ('a_varchar', NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_varbinary', NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_date', NULL, 0.5e0, NULL, '2021-07-24', '2021-07-24'), " + + " ('a_time', NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_timestamp', NULL, 0.5e0, NULL, '2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'), " + + " ('a_timestamptz', NULL, 0.5e0, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + + " ('a_uuid', NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_row', NULL, NULL, NULL, NULL, NULL), " + + " ('an_array', NULL, NULL, NULL, NULL, NULL), " + + " ('a_map', NULL, NULL, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, 2e0, NULL, NULL)"); + } - dropTable("test_partitioned_table"); + // $partitions + String schema = getSession().getSchema().orElseThrow(); + assertThat(query("SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' AND table_name = 'test_partitioned_table$partitions' ")) + .skippingTypesCheck() + .matches("VALUES " + + // Generic columns (selected tested below) + " 'row_count', " + + " 'file_count', " + + " 'total_size', " + + // Table columns + " 'a_boolean', " + + " 'an_integer', " + + " 'a_bigint', " + + " 'a_real', " + + " 'a_double', " + + " 'a_short_decimal', " + + " 'a_long_decimal', " + + " 'a_varchar', " + + " 'a_varbinary', " + + " 'a_date', " + + " 'a_time', " + + " 'a_timestamp', " + + " 'a_timestamptz', " + + " 'a_uuid' " + + // Note: non-primitive columns not being returned from $partitions (otherwise they would be tested below) + ""); + assertThat(query("SELECT " + + " row_count," + + " file_count, " + + " a_boolean, " + + " an_integer, " + + " a_bigint, " + + " a_real, " + + " a_double, " + + " a_short_decimal, " + + " a_long_decimal, " + + " a_varchar, " + + " a_varbinary, " + + " a_date, " + + " a_time, " + + " a_timestamp, " + + " a_timestamptz, " + + " a_uuid " + + // Note: partitioning on non-primitive columns is not allowed in Iceberg + " FROM \"test_partitioned_table$partitions\" ")) + .matches("" + + "VALUES (" + + " BIGINT '1', " + + " BIGINT '1', " + + " true, " + + " 1, " + + " BIGINT '1', " + + " REAL '1.0', " + + " DOUBLE '1.0', " + + " CAST(1.0 AS decimal(5,2)), " + + " CAST(11.0 AS decimal(38,20)), " + + " VARCHAR 'onefsadfdsf', " + + // TODO (https://github.com/trinodb/trino/issues/9755) include in partitioning + (format == ORC + ? " CAST(ROW(NULL, NULL, 0) AS ROW(min varbinary, max varbinary, null_count bigint)), " + : " CAST(ROW(X'000102f0feff', X'000102f0feff', 0) AS ROW(min varbinary, max varbinary, null_count bigint)), ") + + " DATE '2021-07-24'," + + " TIME '02:43:57.987654', " + + " TIMESTAMP '2021-07-24 03:43:57.987654'," + + " TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + + " UUID '20050910-1330-11e9-ffff-2a86e4085a59' " + + ")" + + "UNION ALL " + + "VALUES (" + + " BIGINT '1', " + + " BIGINT '1', " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + // TODO (https://github.com/trinodb/trino/issues/9755) include in partitioning + (format == ORC + ? " NULL, " + : " CAST(ROW(NULL, NULL, 1) AS ROW(min varbinary, max varbinary, null_count bigint)), ") + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL " + + ")"); + + assertUpdate("DROP TABLE test_partitioned_table"); } @Test @@ -500,55 +822,6 @@ public void testCreatePartitionedTableWithNestedTypes() dropTable("test_partitioned_table_nested_type"); } - @Test - public void testPartitionedTableWithNullValues() - { - assertUpdate("CREATE TABLE test_partitioned_table_with_null_values (" + - " _string VARCHAR" + - ", _bigint BIGINT" + - ", _integer INTEGER" + - ", _real REAL" + - ", _double DOUBLE" + - ", _boolean BOOLEAN" + - ", _decimal_short DECIMAL(3,2)" + - ", _decimal_long DECIMAL(30,10)" + - ", _timestamp TIMESTAMP(6)" + - ", _date DATE" + - ") " + - "WITH (" + - "partitioning = ARRAY[" + - " '_string'," + - " '_integer'," + - " '_bigint'," + - " '_boolean'," + - " '_real'," + - " '_double'," + - " '_decimal_short', " + - " '_decimal_long'," + - " '_timestamp'," + - " '_date']" + - ")"); - - assertQueryReturnsEmptyResult("SELECT * from test_partitioned_table_with_null_values"); - - @Language("SQL") String select = "" + - "SELECT" + - " null _string" + - ", null _bigint" + - ", null _integer" + - ", null _real" + - ", null _double" + - ", null _boolean" + - ", null _decimal_short" + - ", null _decimal_long" + - ", null _timestamp" + - ", null _date"; - - assertUpdate("INSERT INTO test_partitioned_table_with_null_values " + select, 1); - assertQuery("SELECT * from test_partitioned_table_with_null_values", select); - dropTable("test_partitioned_table_with_null_values"); - } - @Test public void testCreatePartitionedTableAs() { @@ -716,7 +989,7 @@ public void testLargeInFailureOnPartitionedColumns() .map(Object::toString) .collect(toImmutableList()); - String filter = format("col2 IN (%s)", String.join(",", predicates)); + String filter = format("col2 IN (%s)", join(",", predicates)); assertThatThrownBy(() -> getQueryRunner().execute(format("SELECT * FROM test_large_in_failure WHERE %s", filter))) .isInstanceOf(RuntimeException.class) .hasMessage("java.lang.StackOverflowError"); @@ -1696,7 +1969,7 @@ public void testPredicatePushdown() .filter(index -> index != 20L) .collect(toImmutableList()); assertTrue(values.size() > ICEBERG_DOMAIN_COMPACTION_THRESHOLD); - String valuesString = String.join(",", values.stream().map(Object::toString).collect(toImmutableList())); + String valuesString = join(",", values.stream().map(Object::toString).collect(toImmutableList())); String inPredicate = "%s IN (" + valuesString + ")"; assertQuery( format("SELECT * FROM %s WHERE %s AND %s", tableName, format(inPredicate, "col1"), format(inPredicate, "col2")), @@ -2093,22 +2366,23 @@ public void testSplitPruningForFilterOnPartitionColumn() public void testAllAvailableTypes() { assertUpdate("CREATE TABLE test_all_types (" + - " a_bool BOOLEAN" + - ", a_int INTEGER" + - ", a_big BIGINT" + - ", a_real REAL" + - ", a_double DOUBLE" + - ", a_short_decimal DECIMAL(5,2)" + - ", a_long_decimal DECIMAL(38,20)" + - ", a_date DATE" + - ", a_time TIME(6)" + - ", a_timestamp TIMESTAMP(6)" + - ", a_timestamptz TIMESTAMP(6) WITH TIME ZONE" + - ", a_string VARCHAR" + - ", a_binary VARBINARY" + - ", a_row ROW(id INTEGER , vc VARCHAR)" + - ", a_array ARRAY(VARCHAR)" + - ", a_map MAP(INTEGER, VARCHAR)" + + " a_boolean boolean, " + + " an_integer integer, " + + " a_bigint bigint, " + + " a_real real, " + + " a_double double, " + + " a_short_decimal decimal(5,2), " + + " a_long_decimal decimal(38,20), " + + " a_varchar varchar, " + + " a_varbinary varbinary, " + + " a_date date, " + + " a_time time(6), " + + " a_timestamp timestamp(6), " + + " a_timestamptz timestamp(6) with time zone, " + + " a_uuid uuid, " + + " a_row row(id integer , vc varchar), " + + " an_array array(varchar), " + + " a_map map(integer, varchar) " + ")"); String values = "VALUES (" + @@ -2116,23 +2390,173 @@ public void testAllAvailableTypes() "1, " + "BIGINT '1', " + "REAL '1.0', " + - "DOUBLE '1.0', " + - "CAST(1.0 as DECIMAL(5,2)), " + - "CAST(11.0 as DECIMAL(38,20)), " + - "DATE '2021-07-24'," + - "TIME '02:43:57.348000', " + - "TIMESTAMP '2021-07-24 03:43:57.348000'," + - "TIMESTAMP '2021-07-24 04:43:57.348000 UTC', " + + "DOUBLE '1.0', " + + "CAST(1.0 AS decimal(5,2)), " + + "CAST(11.0 AS decimal(38,20)), " + "VARCHAR 'onefsadfdsf', " + "X'000102f0feff', " + - "(CAST(ROW(null, 'this is a random value') AS ROW(id int, vc varchar))), " + - "array[VARCHAR 'uno', 'dos', 'tres'], " + - "map(array[1,2], array['ek', VARCHAR 'one']))"; + "DATE '2021-07-24'," + + "TIME '02:43:57.987654', " + + "TIMESTAMP '2021-07-24 03:43:57.987654'," + + "TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + + "UUID '20050910-1330-11e9-ffff-2a86e4085a59', " + + "CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)), " + + "ARRAY[VARCHAR 'uno', 'dos', 'tres'], " + + "map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one'])) "; + + String nullValues = nCopies(17, "NULL").stream() + .collect(joining(", ", "VALUES (", ")")); + assertUpdate("INSERT INTO test_all_types " + values, 1); + assertUpdate("INSERT INTO test_all_types " + nullValues, 1); + // SELECT assertThat(query("SELECT * FROM test_all_types")) + .matches(values + " UNION ALL " + nullValues); + + // SELECT with predicates + assertThat(query("SELECT * FROM test_all_types WHERE " + + " a_boolean = true " + + "AND an_integer = 1 " + + "AND a_bigint = BIGINT '1' " + + "AND a_real = REAL '1.0' " + + "AND a_double = DOUBLE '1.0' " + + "AND a_short_decimal = CAST(1.0 AS decimal(5,2)) " + + "AND a_long_decimal = CAST(11.0 AS decimal(38,20)) " + + "AND a_varchar = VARCHAR 'onefsadfdsf' " + + "AND a_varbinary = X'000102f0feff' " + + "AND a_date = DATE '2021-07-24' " + + "AND a_time = TIME '02:43:57.987654' " + + "AND a_timestamp = TIMESTAMP '2021-07-24 03:43:57.987654' " + + "AND a_timestamptz = TIMESTAMP '2021-07-24 04:43:57.987654 UTC' " + + "AND a_uuid = UUID '20050910-1330-11e9-ffff-2a86e4085a59' " + + "AND a_row = CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)) " + + "AND an_array = ARRAY[VARCHAR 'uno', 'dos', 'tres'] " + + "AND a_map = map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']) " + + "")) .matches(values); + assertThat(query("SELECT * FROM test_all_types WHERE " + + " a_boolean IS NULL " + + "AND an_integer IS NULL " + + "AND a_bigint IS NULL " + + "AND a_real IS NULL " + + "AND a_double IS NULL " + + "AND a_short_decimal IS NULL " + + "AND a_long_decimal IS NULL " + + "AND a_varchar IS NULL " + + "AND a_varbinary IS NULL " + + "AND a_date IS NULL " + + "AND a_time IS NULL " + + "AND a_timestamp IS NULL " + + "AND a_timestamptz IS NULL " + + "AND a_uuid IS NULL " + + "AND a_row IS NULL " + + "AND an_array IS NULL " + + "AND a_map IS NULL " + + "")) + .skippingTypesCheck() + .matches(nullValues); + + // SHOW STATS + try { + // TODO (https://github.com/trinodb/trino/issues/9714, https://github.com/trinodb/trino/issues/9716) SHOW STATS may fail with NullPointerException, depending which file is processed first +// assertThat(query("SHOW STATS FOR test_all_types")) +// .skippingTypesCheck() +// .matches("...."); + + computeActual("SHOW STATS FOR test_all_types"); + } + catch (RuntimeException sometimesExpected) { + assertThat(sometimesExpected) + .hasToString("java.lang.RuntimeException: java.lang.NullPointerException") + .hasStackTraceContaining("at io.trino.plugin.iceberg.TableStatisticsMaker.makeTableStatistics"); + } + + // $partitions + String schema = getSession().getSchema().orElseThrow(); + assertThat(query("SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' AND table_name = 'test_all_types$partitions' ")) + .skippingTypesCheck() + .matches("VALUES " + + // Generic columns (selected tested below) + " 'row_count', " + + " 'file_count', " + + " 'total_size', " + + // Table columns + " 'a_boolean', " + + " 'an_integer', " + + " 'a_bigint', " + + " 'a_real', " + + " 'a_double', " + + " 'a_short_decimal', " + + " 'a_long_decimal', " + + " 'a_varchar', " + + " 'a_varbinary', " + + " 'a_date', " + + " 'a_time', " + + " 'a_timestamp', " + + " 'a_timestamptz', " + + " 'a_uuid' " + + // Note: non-primitive columns not being returned from $partitions (otherwise they would be tested below) + ""); + assertThat(query("SELECT " + + " row_count," + + " file_count, " + + " a_boolean, " + + " an_integer, " + + " a_bigint, " + + " a_real, " + + " a_double, " + + " a_short_decimal, " + + " a_long_decimal, " + + " a_varchar, " + + " a_varbinary, " + + " a_date, " + + " a_time, " + + " a_timestamp, " + + " a_timestamptz, " + + " a_uuid " + + // Note: partitioning on non-primitive columns is not allowed in Iceberg + " FROM \"test_all_types$partitions\" ")) + .matches( + format == ORC + ? "VALUES (" + + " BIGINT '2', " + + " BIGINT '2', " + + " CAST(NULL AS ROW(min boolean, max boolean, null_count bigint)), " + + " CAST(NULL AS ROW(min integer, max integer, null_count bigint)), " + + " CAST(NULL AS ROW(min bigint, max bigint, null_count bigint)), " + + " CAST(NULL AS ROW(min real, max real, null_count bigint)), " + + " CAST(NULL AS ROW(min double, max double, null_count bigint)), " + + " CAST(NULL AS ROW(min decimal(5,2), max decimal(5,2), null_count bigint)), " + + " CAST(NULL AS ROW(min decimal(38,20), max decimal(38,20), null_count bigint)), " + + " CAST(NULL AS ROW(min varchar, max varchar, null_count bigint)), " + + " CAST(NULL AS ROW(min varbinary, max varbinary, null_count bigint)), " + + " CAST(NULL AS ROW(min date, max date, null_count bigint)), " + + " CAST(NULL AS ROW(min time(6), max time(6), null_count bigint)), " + + " CAST(NULL AS ROW(min timestamp(6), max timestamp(6), null_count bigint)), " + + " CAST(NULL AS ROW(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint)), " + + " CAST(NULL AS ROW(min uuid, max uuid, null_count bigint)) " + + ")" + : "VALUES (" + + " BIGINT '2', " + + " BIGINT '2', " + + " CAST(ROW(true, true, 1) AS ROW(min boolean, max boolean, null_count bigint)), " + + " CAST(ROW(1, 1, 1) AS ROW(min integer, max integer, null_count bigint)), " + + " CAST(ROW(1, 1, 1) AS ROW(min bigint, max bigint, null_count bigint)), " + + " CAST(ROW(1, 1, 1) AS ROW(min real, max real, null_count bigint)), " + + " CAST(ROW(1, 1, 1) AS ROW(min double, max double, null_count bigint)), " + + " CAST(ROW(1, 1, 1) AS ROW(min decimal(5,2), max decimal(5,2), null_count bigint)), " + + " CAST(ROW(11, 11, 1) AS ROW(min decimal(38,20), max decimal(38,20), null_count bigint)), " + + " CAST(ROW('onefsadfdsf', 'onefsadfdsf', 1) AS ROW(min varchar, max varchar, null_count bigint)), " + + " CAST(ROW(X'000102f0feff', X'000102f0feff', 1) AS ROW(min varbinary, max varbinary, null_count bigint)), " + + " CAST(ROW(DATE '2021-07-24', DATE '2021-07-24', 1) AS ROW(min date, max date, null_count bigint)), " + + " CAST(ROW(TIME '02:43:57.987654', TIME '02:43:57.987654', 1) AS ROW(min time(6), max time(6), null_count bigint)), " + + " CAST(ROW(TIMESTAMP '2021-07-24 03:43:57.987654', TIMESTAMP '2021-07-24 03:43:57.987654', 1) AS ROW(min timestamp(6), max timestamp(6), null_count bigint)), " + + " CAST(ROW(TIMESTAMP '2021-07-24 04:43:57.987654 UTC', TIMESTAMP '2021-07-24 04:43:57.987654 UTC', 1) AS ROW(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint)), " + + " CAST(ROW(UUID '20050910-1330-11e9-ffff-2a86e4085a59', UUID '20050910-1330-11e9-ffff-2a86e4085a59', 1) AS ROW(min uuid, max uuid, null_count bigint)) " + + ")"); + assertUpdate("DROP TABLE test_all_types"); } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java index 6fe7f88b2314..b7010fbb8e0f 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java @@ -34,7 +34,6 @@ import io.trino.spi.predicate.Range; import io.trino.spi.predicate.TupleDomain; import io.trino.spi.predicate.ValueSet; -import io.trino.spi.type.TimeZoneKey; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.QueryRunner; import org.apache.iceberg.Table; @@ -155,7 +154,6 @@ public TupleDomain getCurrentPredicate() return TupleDomain.all(); } }, - TimeZoneKey.UTC_KEY, new Duration(2, SECONDS)); ImmutableList.Builder splits = ImmutableList.builder(); @@ -180,18 +178,15 @@ public void testBigintPartitionPruning() assertFalse(IcebergSplitSource.partitionMatchesPredicate( ImmutableSet.of(bigintColumn), ImmutableMap.of(1, Optional.of("1000")), - TupleDomain.fromFixedValues(ImmutableMap.of(bigintColumn, NullableValue.of(BIGINT, 100L))), - TimeZoneKey.UTC_KEY)); + TupleDomain.fromFixedValues(ImmutableMap.of(bigintColumn, NullableValue.of(BIGINT, 100L))))); assertTrue(IcebergSplitSource.partitionMatchesPredicate( ImmutableSet.of(bigintColumn), ImmutableMap.of(1, Optional.of("1000")), - TupleDomain.fromFixedValues(ImmutableMap.of(bigintColumn, NullableValue.of(BIGINT, 1000L))), - TimeZoneKey.UTC_KEY)); + TupleDomain.fromFixedValues(ImmutableMap.of(bigintColumn, NullableValue.of(BIGINT, 1000L))))); assertFalse(IcebergSplitSource.partitionMatchesPredicate( ImmutableSet.of(bigintColumn), ImmutableMap.of(1, Optional.of("1000")), - TupleDomain.fromFixedValues(ImmutableMap.of(bigintColumn, NullableValue.asNull(BIGINT))), - TimeZoneKey.UTC_KEY)); + TupleDomain.fromFixedValues(ImmutableMap.of(bigintColumn, NullableValue.asNull(BIGINT))))); } @Test