From 84a34666f26eb664aef0eae30f932c39f7ff6bf1 Mon Sep 17 00:00:00 2001 From: praveenkrishna Date: Mon, 17 Apr 2023 19:29:10 +0530 Subject: [PATCH] Add support for varchar to timestamp coercion in hive tables --- .../plugin/hive/coercions/CoercionUtils.java | 8 ++ .../hive/coercions/TimestampCoercer.java | 72 ++++++++++++++ .../plugin/hive/orc/OrcTypeTranslator.java | 17 ++++ .../plugin/hive/util/HiveCoercionPolicy.java | 3 +- .../hive/coercions/TestTimestampCoercer.java | 93 ++++++++++++++++++- .../product/hive/BaseTestHiveCoercion.java | 63 +++++++++---- .../TestHiveCoercionOnPartitionedTable.java | 13 ++- .../TestHiveCoercionOnUnpartitionedTable.java | 9 +- 8 files changed, 249 insertions(+), 29 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java index d6e7ef6ee5ae..edc691de3c34 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java @@ -16,6 +16,8 @@ import com.google.common.collect.ImmutableList; import io.trino.plugin.hive.HiveTimestampPrecision; import io.trino.plugin.hive.HiveType; +import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToLongTimestampCoercer; +import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToShortTimestampCoercer; import io.trino.plugin.hive.type.Category; import io.trino.plugin.hive.type.ListTypeInfo; import io.trino.plugin.hive.type.MapTypeInfo; @@ -91,6 +93,12 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH if (fromType instanceof VarcharType fromVarcharType && (toHiveType.equals(HIVE_BYTE) || toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG))) { return Optional.of(new VarcharToIntegerNumberCoercer<>(fromVarcharType, toType)); } + if (fromType instanceof VarcharType varcharType && toType instanceof TimestampType timestampType) { + if (timestampType.isShort()) { + return Optional.of(new VarcharToShortTimestampCoercer(varcharType, timestampType)); + } + return Optional.of(new VarcharToLongTimestampCoercer(varcharType, timestampType)); + } if (fromType instanceof VarcharType fromVarcharType && toType instanceof VarcharType toVarcharType) { if (narrowerThan(toVarcharType, fromVarcharType)) { return Optional.of(new VarcharCoercer(fromVarcharType, toVarcharType)); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java index 0f9ab56c0dc6..0b903a3561b4 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java @@ -13,6 +13,7 @@ */ package io.trino.plugin.hive.coercions; +import io.airlift.slice.Slice; import io.airlift.slice.Slices; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; @@ -25,19 +26,27 @@ import java.time.chrono.IsoChronology; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import static com.google.common.base.Preconditions.checkArgument; import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_TIMESTAMP_COERCION; +import static io.trino.spi.type.TimestampType.MAX_PRECISION; +import static io.trino.spi.type.TimestampType.MAX_SHORT_PRECISION; import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND; import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND; import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND; import static io.trino.spi.type.Timestamps.SECONDS_PER_DAY; +import static io.trino.spi.type.Timestamps.round; +import static io.trino.spi.type.Timestamps.roundDiv; import static io.trino.spi.type.Varchars.truncateToLength; import static java.lang.Math.floorDiv; import static java.lang.Math.floorMod; import static java.lang.Math.toIntExact; +import static java.lang.String.format; import static java.time.ZoneOffset.UTC; import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE; import static java.time.format.DateTimeFormatter.ISO_LOCAL_TIME; +import static java.time.format.ResolverStyle.STRICT; public final class TimestampCoercer { @@ -47,6 +56,7 @@ public final class TimestampCoercer .appendLiteral(' ') .append(ISO_LOCAL_TIME) .toFormatter() + .withResolverStyle(STRICT) .withChronology(IsoChronology.INSTANCE); // Before 1900, Java Time and Joda Time are not consistent with java.sql.Date and java.util.Calendar @@ -83,4 +93,66 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos toType)); } } + + public static class VarcharToShortTimestampCoercer + extends TypeCoercer + { + public VarcharToShortTimestampCoercer(VarcharType fromType, TimestampType toType) + { + super(fromType, toType); + checkArgument(toType.isShort(), format("TIMESTAMP precision must be in range [0, %s]: %s", MAX_PRECISION, toType.getPrecision())); + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + try { + Slice value = fromType.getSlice(block, position); + LocalDateTime dateTime = LOCAL_DATE_TIME.parse(value.toStringUtf8(), LocalDateTime::from); + long epochSecond = dateTime.toEpochSecond(UTC); + if (epochSecond < START_OF_MODERN_ERA_SECONDS) { + throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported"); + } + long roundedNanos = round(dateTime.getNano(), 9 - toType.getPrecision()); + long epochMicros = epochSecond * MICROSECONDS_PER_SECOND + roundDiv(roundedNanos, NANOSECONDS_PER_MICROSECOND); + toType.writeLong(blockBuilder, epochMicros); + } + catch (DateTimeParseException ignored) { + // Hive treats invalid String as null instead of propagating exception + // In case of bigger tables with all values being invalid, log output will be huge so avoiding log here. + blockBuilder.appendNull(); + } + } + } + + public static class VarcharToLongTimestampCoercer + extends TypeCoercer + { + public VarcharToLongTimestampCoercer(VarcharType fromType, TimestampType toType) + { + super(fromType, toType); + checkArgument(!toType.isShort(), format("Precision must be in the range [%s, %s]", MAX_SHORT_PRECISION + 1, MAX_PRECISION)); + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + try { + Slice value = fromType.getSlice(block, position); + LocalDateTime dateTime = LOCAL_DATE_TIME.parse(value.toStringUtf8(), LocalDateTime::from); + long epochSecond = dateTime.toEpochSecond(UTC); + if (epochSecond < START_OF_MODERN_ERA_SECONDS) { + throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported"); + } + long epochMicros = epochSecond * MICROSECONDS_PER_SECOND + dateTime.getNano() / NANOSECONDS_PER_MICROSECOND; + int picosOfMicro = (dateTime.getNano() % NANOSECONDS_PER_MICROSECOND) * PICOSECONDS_PER_NANOSECOND; + toType.writeObject(blockBuilder, new LongTimestamp(epochMicros, picosOfMicro)); + } + catch (DateTimeParseException ignored) { + // Hive treats invalid String as null instead of propagating exception + // In case of bigger tables with all values being invalid, log output will be huge so avoiding log here. + blockBuilder.appendNull(); + } + } + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java index 77028d6f4643..1167255a7711 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java @@ -15,14 +15,20 @@ import io.trino.orc.metadata.OrcType.OrcTypeKind; import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer; +import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToLongTimestampCoercer; +import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToShortTimestampCoercer; import io.trino.plugin.hive.coercions.TypeCoercer; +import io.trino.spi.type.TimestampType; import io.trino.spi.type.Type; import io.trino.spi.type.VarcharType; import java.util.Optional; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.STRING; import static io.trino.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.VARCHAR; import static io.trino.spi.type.TimestampType.TIMESTAMP_NANOS; +import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; public final class OrcTypeTranslator { @@ -33,6 +39,17 @@ private OrcTypeTranslator() {} if (fromOrcType == TIMESTAMP && toTrinoType instanceof VarcharType varcharType) { return Optional.of(new LongTimestampToVarcharCoercer(TIMESTAMP_NANOS, varcharType)); } + if (isVarcharType(fromOrcType) && toTrinoType instanceof TimestampType timestampType) { + if (timestampType.isShort()) { + return Optional.of(new VarcharToShortTimestampCoercer(createUnboundedVarcharType(), timestampType)); + } + return Optional.of(new VarcharToLongTimestampCoercer(createUnboundedVarcharType(), timestampType)); + } return Optional.empty(); } + + private static boolean isVarcharType(OrcTypeKind orcTypeKind) + { + return orcTypeKind == STRING || orcTypeKind == VARCHAR; + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java index ffe29c344785..927c1b899e19 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java @@ -63,7 +63,8 @@ private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType, HiveTimest toHiveType.equals(HIVE_BYTE) || toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || - toHiveType.equals(HIVE_LONG); + toHiveType.equals(HIVE_LONG) || + toHiveType.equals(HIVE_TIMESTAMP); } if (fromType instanceof CharType) { return toType instanceof CharType; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java index 1c5cb84171fa..0bd6f48be96b 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java @@ -27,11 +27,13 @@ import java.time.LocalDateTime; import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.plugin.hive.HiveTimestampPrecision.MICROSECONDS; import static io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS; import static io.trino.plugin.hive.HiveType.toHiveType; import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer; import static io.trino.spi.predicate.Utils.blockToNativeValue; import static io.trino.spi.predicate.Utils.nativeValueToBlock; +import static io.trino.spi.type.TimestampType.TIMESTAMP_MICROS; import static io.trino.spi.type.TimestampType.TIMESTAMP_PICOS; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; import static io.trino.spi.type.VarcharType.createVarcharType; @@ -51,6 +53,22 @@ public void testTimestampToVarchar(String timestampValue, String hiveTimestampVa assertLongTimestampToVarcharCoercions(TIMESTAMP_PICOS, new LongTimestamp(timestamp.getEpochMicros(), timestamp.getPicosOfMicros()), createUnboundedVarcharType(), hiveTimestampValue); } + @Test(dataProvider = "timestampValuesProvider") + public void testVarcharToShortTimestamp(String timestampValue, String hiveTimestampValue) + { + LocalDateTime localDateTime = LocalDateTime.parse(timestampValue); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_MICROS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertVarcharToShortTimestampCoercions(createUnboundedVarcharType(), utf8Slice(hiveTimestampValue), TIMESTAMP_MICROS, timestamp.getEpochMicros()); + } + + @Test(dataProvider = "timestampValuesProvider") + public void testVarcharToLongTimestamp(String timestampValue, String hiveTimestampValue) + { + LocalDateTime localDateTime = LocalDateTime.parse(timestampValue); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_PICOS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertVarcharToLongTimestampCoercions(createUnboundedVarcharType(), utf8Slice(hiveTimestampValue), TIMESTAMP_PICOS, new LongTimestamp(timestamp.getEpochMicros(), timestamp.getPicosOfMicros())); + } + @Test public void testTimestampToSmallerVarchar() { @@ -93,11 +111,53 @@ public void testHistoricalLongTimestampToVarchar() { LocalDateTime localDateTime = LocalDateTime.parse("1899-12-31T23:59:59.999999999"); SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_PICOS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); - assertThatThrownBy(() -> assertLongTimestampToVarcharCoercions( - TIMESTAMP_PICOS, - new LongTimestamp(timestamp.getEpochMicros(), timestamp.getPicosOfMicros()), + assertThatThrownBy(() -> + assertLongTimestampToVarcharCoercions( + TIMESTAMP_PICOS, + new LongTimestamp(timestamp.getEpochMicros(), timestamp.getPicosOfMicros()), + createUnboundedVarcharType(), + "1899-12-31 23:59:59.999999999")) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Coercion on historical dates is not supported"); + } + + @Test(dataProvider = "invalidValue") + public void testInvalidVarcharToShortTimestamp(String invalidValue) + { + assertVarcharToShortTimestampCoercions(createUnboundedVarcharType(), utf8Slice(invalidValue), TIMESTAMP_MICROS, null); + } + + @Test(dataProvider = "invalidValue") + public void testInvalidVarcharLongTimestamp(String invalidValue) + { + assertVarcharToLongTimestampCoercions(createUnboundedVarcharType(), utf8Slice(invalidValue), TIMESTAMP_MICROS, null); + } + + @Test + public void testHistoricalVarcharToShortTimestamp() + { + LocalDateTime localDateTime = LocalDateTime.parse("1899-12-31T23:59:59.999999"); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_MICROS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertThatThrownBy(() -> + assertVarcharToShortTimestampCoercions( + createUnboundedVarcharType(), + utf8Slice("1899-12-31 23:59:59.999999"), + TIMESTAMP_MICROS, + timestamp.getEpochMicros())) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Coercion on historical dates is not supported"); + } + + @Test + public void testHistoricalVarcharToLongTimestamp() + { + LocalDateTime localDateTime = LocalDateTime.parse("1899-12-31T23:59:59.999999"); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_PICOS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertThatThrownBy(() -> assertVarcharToShortTimestampCoercions( createUnboundedVarcharType(), - "1899-12-31 23:59:59.999999999")) + utf8Slice("1899-12-31 23:59:59.999999"), + TIMESTAMP_PICOS, + timestamp.getEpochMicros())) .isInstanceOf(TrinoException.class) .hasMessageContaining("Coercion on historical dates is not supported"); } @@ -129,11 +189,36 @@ public Object[][] timestampValuesProvider() }; } + @DataProvider + public Object[][] invalidValue() + { + return new Object[][] { + {"Invalid timestamp"}, // Invalid string + {"2022"}, // Partial timestamp value + {"2001-04-01T00:13:42.000"}, // ISOFormat date + {"2001-14-01 00:13:42.000"}, // Invalid month + {"2001-01-32 00:13:42.000"}, // Invalid day + {"2001-04-01 23:59:60.000"}, // Invalid second + {"2001-04-01 23:60:01.000"}, // Invalid minute + {"2001-04-01 27:01:01.000"}, // Invalid hour + }; + } + public static void assertLongTimestampToVarcharCoercions(TimestampType fromType, LongTimestamp valueToBeCoerced, VarcharType toType, String expectedValue) { assertCoercions(fromType, valueToBeCoerced, toType, utf8Slice(expectedValue), NANOSECONDS); } + public static void assertVarcharToShortTimestampCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue) + { + assertCoercions(fromType, valueToBeCoerced, toType, expectedValue, MICROSECONDS); + } + + public static void assertVarcharToLongTimestampCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue) + { + assertCoercions(fromType, valueToBeCoerced, toType, expectedValue, NANOSECONDS); + } + public static void assertCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue, HiveTimestampPrecision timestampPrecision) { Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), timestampPrecision).orElseThrow() diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java index a47442365ecd..a8e534539f07 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java @@ -66,6 +66,7 @@ import static java.sql.JDBCType.REAL; import static java.sql.JDBCType.SMALLINT; import static java.sql.JDBCType.STRUCT; +import static java.sql.JDBCType.TIMESTAMP; import static java.sql.JDBCType.VARCHAR; import static java.util.Collections.nCopies; import static java.util.Locale.ENGLISH; @@ -123,6 +124,8 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition) "timestamp_to_string", "timestamp_to_bounded_varchar", "timestamp_to_smaller_varchar", + "smaller_varchar_to_timestamp", + "varchar_to_timestamp", "id"); Function>> expected = engine -> expectedValuesForEngineProvider(engine, tableName, decimalToFloatVal, floatToDecimalVal); @@ -182,6 +185,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " TIMESTAMP '2121-07-15 15:30:12.123', " + " TIMESTAMP '2121-07-15 15:30:12.123', " + " TIMESTAMP '2121-07-15 15:30:12.123', " + + " '2121', " + + " '2019-01-29 23:59:59.123', " + " 1), " + "(" + " CAST(ROW (NULL, 1, -100, -2323, -12345, 2) AS ROW(keep VARCHAR, ti2si TINYINT, si2int SMALLINT, int2bi INTEGER, bi2vc BIGINT, lower2uppercase BIGINT)), " + @@ -215,6 +220,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " TIMESTAMP '1970-01-01 00:00:00.123', " + " TIMESTAMP '1970-01-01 00:00:00.123', " + " TIMESTAMP '1970-01-01 00:00:00.123', " + + " '1970', " + + " '1970-01-01 00:00:00.123', " + " 1)", tableName, floatToDoubleType)); @@ -367,6 +374,12 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { .put("timestamp_to_smaller_varchar", ImmutableList.of( "2121", "1970")) + .put("smaller_varchar_to_timestamp", Arrays.asList( + null, + null)) + .put("varchar_to_timestamp", Arrays.asList( + Timestamp.valueOf("2019-01-29 23:59:59.123"), + Timestamp.valueOf("1970-01-01 00:00:00.123"))) .put("id", ImmutableList.of( 1, 1)) @@ -383,10 +396,11 @@ protected void doTestHiveCoercionWithDifferentTimestampPrecision(HiveTableDefini """ INSERT INTO %s SELECT - (CAST(ROW (timestamp_value, -1, timestamp_value) AS ROW(keep TIMESTAMP(9), si2i SMALLINT, timestamp2string TIMESTAMP(9)))), - ARRAY [CAST(ROW (timestamp_value, -1, timestamp_value) AS ROW (keep TIMESTAMP(9), si2i SMALLINT, timestamp2string TIMESTAMP(9)))], - MAP (ARRAY [2], ARRAY [CAST(ROW (timestamp_value, -1, timestamp_value) AS ROW (keep TIMESTAMP(9), si2i SMALLINT, timestamp2string TIMESTAMP(9)))]), + (CAST(ROW (timestamp_value, -1, timestamp_value, CAST(timestamp_value AS VARCHAR)) AS ROW(keep TIMESTAMP(9), si2i SMALLINT, timestamp2string TIMESTAMP(9), string2timestamp VARCHAR))), + ARRAY [CAST(ROW (timestamp_value, -1, timestamp_value, CAST(timestamp_value AS VARCHAR)) AS ROW (keep TIMESTAMP(9), si2i SMALLINT, timestamp2string TIMESTAMP(9), string2timestamp VARCHAR))], + MAP (ARRAY [2], ARRAY [CAST(ROW (timestamp_value, -1, timestamp_value, CAST(timestamp_value AS VARCHAR)) AS ROW (keep TIMESTAMP(9), si2i SMALLINT, timestamp2string TIMESTAMP(9), string2timestamp VARCHAR))]), timestamp_value, + CAST(timestamp_value AS VARCHAR), 1 FROM (VALUES (TIMESTAMP '2121-07-15 15:30:12.123499'), @@ -397,18 +411,21 @@ protected void doTestHiveCoercionWithDifferentTimestampPrecision(HiveTableDefini (TIMESTAMP '2121-07-15 15:30:12.123500001')) AS t (timestamp_value) """.formatted(tableName)); - onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_row_to_row timestamp_row_to_row struct", tableName)); - onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_list_to_list timestamp_list_to_list array>", tableName)); - onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_map_to_map timestamp_map_to_map map>", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_row_to_row timestamp_row_to_row struct", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_list_to_list timestamp_list_to_list array>", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_map_to_map timestamp_map_to_map map>", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_string timestamp_to_string string", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN string_to_timestamp string_to_timestamp TIMESTAMP", tableName)); for (HiveTimestampPrecision hiveTimestampPrecision : HiveTimestampPrecision.values()) { + String timestampType = "timestamp(%d)".formatted(hiveTimestampPrecision.getPrecision()); setHiveTimestampPrecision(hiveTimestampPrecision); assertThat(onTrino().executeQuery("SHOW COLUMNS FROM " + tableName).project(1, 2)).containsExactlyInOrder( - row("timestamp_row_to_row", "row(keep timestamp(%d), si2i integer, timestamp2string varchar)".formatted(hiveTimestampPrecision.getPrecision())), - row("timestamp_list_to_list", "array(row(keep timestamp(%d), si2i integer, timestamp2string varchar))".formatted(hiveTimestampPrecision.getPrecision())), - row("timestamp_map_to_map", "map(integer, row(keep timestamp(%d), si2i integer, timestamp2string varchar))".formatted(hiveTimestampPrecision.getPrecision())), + row("timestamp_row_to_row", "row(keep %1$s, si2i integer, timestamp2string varchar, string2timestamp %1$s)".formatted(timestampType)), + row("timestamp_list_to_list", "array(row(keep %1$s, si2i integer, timestamp2string varchar, string2timestamp %1$s))".formatted(timestampType)), + row("timestamp_map_to_map", "map(integer, row(keep %1$s, si2i integer, timestamp2string varchar, string2timestamp %1$s))".formatted(timestampType)), row("timestamp_to_string", "varchar"), + row("string_to_timestamp", timestampType), row("id", "bigint")); List allColumns = ImmutableList.of( @@ -416,6 +433,7 @@ protected void doTestHiveCoercionWithDifferentTimestampPrecision(HiveTableDefini "timestamp_list_to_list", "timestamp_map_to_map", "timestamp_to_string", + "string_to_timestamp", "id"); // For Trino, remove unsupported columns @@ -448,12 +466,12 @@ protected Map> expectedRowsForEngineProvider(Engine engine, "2121-07-15 15:30:12.123500001"); if (engine == Engine.HIVE) { List baseData = ImmutableList.of( - "{\"keep\":\"2121-07-15 15:30:12.123499\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.123499\"}", - "{\"keep\":\"2121-07-15 15:30:12.1235\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.1235\"}", - "{\"keep\":\"2121-07-15 15:30:12.123501\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.123501\"}", - "{\"keep\":\"2121-07-15 15:30:12.123499999\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.123499999\"}", - "{\"keep\":\"2121-07-15 15:30:12.1235\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.1235\"}", - "{\"keep\":\"2121-07-15 15:30:12.123500001\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.123500001\"}"); + "{\"keep\":\"2121-07-15 15:30:12.123499\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.123499\",\"string2timestamp\":\"2121-07-15 15:30:12.123499\"}", + "{\"keep\":\"2121-07-15 15:30:12.1235\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.1235\",\"string2timestamp\":\"2121-07-15 15:30:12.1235\"}", + "{\"keep\":\"2121-07-15 15:30:12.123501\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.123501\",\"string2timestamp\":\"2121-07-15 15:30:12.123501\"}", + "{\"keep\":\"2121-07-15 15:30:12.123499999\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.123499999\",\"string2timestamp\":\"2121-07-15 15:30:12.123499999\"}", + "{\"keep\":\"2121-07-15 15:30:12.1235\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.1235\",\"string2timestamp\":\"2121-07-15 15:30:12.1235\"}", + "{\"keep\":\"2121-07-15 15:30:12.123500001\",\"si2i\":-1,\"timestamp2string\":\"2121-07-15 15:30:12.123500001\",\"string2timestamp\":\"2121-07-15 15:30:12.123500001\"}"); return ImmutableMap.>builder() .put("timestamp_row_to_row", baseData) .put("timestamp_list_to_list", baseData.stream() @@ -464,11 +482,15 @@ protected Map> expectedRowsForEngineProvider(Engine engine, .map("{2:%s}"::formatted) .collect(toImmutableList())) .put("timestamp_to_string", timestampAsString) + .put("string_to_timestamp", timestampAsString.stream() + .map(String.class::cast) + .map(Timestamp::valueOf) + .collect(toImmutableList())) .put("id", nCopies(6, 1)) .buildOrThrow(); } - List timestampValue = switch (timestampPrecision) { + List timestampValue = switch (timestampPrecision) { case MILLISECONDS -> ImmutableList.of( Timestamp.valueOf("2121-07-15 15:30:12.123"), Timestamp.valueOf("2121-07-15 15:30:12.124"), @@ -499,6 +521,7 @@ protected Map> expectedRowsForEngineProvider(Engine engine, .addField("keep", timestamp) .addField("si2i", -1) .addField("timestamp2string", timestampCoerced) + .addField("string2timestamp", timestamp) .build()) .collect(toImmutableList()); @@ -511,6 +534,7 @@ protected Map> expectedRowsForEngineProvider(Engine engine, .map(entry -> ImmutableMap.of(2, entry)) .collect(toImmutableList())) .put("timestamp_to_string", timestampAsString) + .put("string_to_timestamp", timestampValue) .put("id", nCopies(6, 1)) .buildOrThrow(); } @@ -745,6 +769,8 @@ private void assertProperAlteredTableSchema(String tableName) row("timestamp_to_string", "varchar"), row("timestamp_to_bounded_varchar", "varchar(30)"), row("timestamp_to_smaller_varchar", "varchar(4)"), + row("smaller_varchar_to_timestamp", "timestamp(3)"), + row("varchar_to_timestamp", "timestamp(3)"), row("id", "bigint")); } @@ -796,10 +822,13 @@ private void assertColumnTypes( .put("timestamp_to_string", VARCHAR) .put("timestamp_to_bounded_varchar", VARCHAR) .put("timestamp_to_smaller_varchar", VARCHAR) + .put("smaller_varchar_to_timestamp", TIMESTAMP) + .put("varchar_to_timestamp", TIMESTAMP) .put("timestamp_to_varchar", VARCHAR) .put("timestamp_row_to_row", engine == Engine.TRINO ? JAVA_OBJECT : STRUCT) // row .put("timestamp_list_to_list", ARRAY) // list .put("timestamp_map_to_map", JAVA_OBJECT) // map + .put("string_to_timestamp", TIMESTAMP) .buildOrThrow(); assertThat(queryResult) @@ -841,6 +870,8 @@ private static void alterTableColumnTypes(String tableName) onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_string timestamp_to_string string", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_bounded_varchar timestamp_to_bounded_varchar varchar(30)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_smaller_varchar timestamp_to_smaller_varchar varchar(4)", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN smaller_varchar_to_timestamp smaller_varchar_to_timestamp timestamp", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_timestamp varchar_to_timestamp timestamp", tableName)); } protected static TableInstance mutableTableInstanceOf(TableDefinition tableDefinition) diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java index ea7cae009979..ec3d901ce2ad 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java @@ -122,7 +122,9 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui " char_to_smaller_char CHAR(3)," + " timestamp_to_string TIMESTAMP," + " timestamp_to_bounded_varchar TIMESTAMP," + - " timestamp_to_smaller_varchar TIMESTAMP" + + " timestamp_to_smaller_varchar TIMESTAMP," + + " smaller_varchar_to_timestamp VARCHAR(4)," + + " varchar_to_timestamp STRING" + ") " + "PARTITIONED BY (id BIGINT) " + rowFormat.map(s -> format("ROW FORMAT %s ", s)).orElse("") + @@ -135,10 +137,11 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionFor return HiveTableDefinition.builder(tableName) .setCreateTableDDLTemplate("" + "CREATE TABLE %NAME%(" + - " timestamp_row_to_row STRUCT, " + - " timestamp_list_to_list ARRAY>, " + - " timestamp_map_to_map MAP>," + - " timestamp_to_string TIMESTAMP" + + " timestamp_row_to_row STRUCT, " + + " timestamp_list_to_list ARRAY>, " + + " timestamp_map_to_map MAP>," + + " timestamp_to_string TIMESTAMP," + + " string_to_timestamp STRING" + ") " + "PARTITIONED BY (id BIGINT) " + rowFormat.map(s -> format("ROW FORMAT %s ", s)).orElse("") + diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java index 82ec2347b987..358fb5836fb4 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java @@ -80,6 +80,8 @@ char_to_smaller_char CHAR(3), timestamp_to_string TIMESTAMP, timestamp_to_bounded_varchar TIMESTAMP, timestamp_to_smaller_varchar TIMESTAMP, + smaller_varchar_to_timestamp VARCHAR(4), + varchar_to_timestamp STRING, id BIGINT) STORED AS\s""" + fileFormat); } @@ -90,10 +92,11 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionFor return HiveTableDefinition.builder(tableName) .setCreateTableDDLTemplate(""" CREATE TABLE %NAME%( - timestamp_row_to_row STRUCT, - timestamp_list_to_list ARRAY>, - timestamp_map_to_map MAP>, + timestamp_row_to_row STRUCT, + timestamp_list_to_list ARRAY>, + timestamp_map_to_map MAP>, timestamp_to_string TIMESTAMP, + string_to_timestamp STRING, id BIGINT) STORED AS\s""" + fileFormat); }