From 55656b77d8ff2fbac73fa2244005e12998bd9803 Mon Sep 17 00:00:00 2001 From: tangjiangling Date: Tue, 16 Aug 2022 14:39:16 +0800 Subject: [PATCH] Add support for reading DateTime(timezone) from ClickHouse Previously Trino only supported reading `DateTime` from ClickHouse (no timezone specified) and for `DateTime(timezone)` users had to configure the parameter `unsupported-type-handling` or `jdbc-types-mapped-to-varchar` to support reading of this type. This commit supports reading `DateTime(timezone)` from ClickHouse and maps it to Trino's `TIMESTAMP(0) WITH TIME ZONE`. NOTE: writing data from Trino to ClickHouse `DateTime(timezone)` is not supported. --- docs/src/main/sphinx/connector/clickhouse.rst | 4 +- .../plugin/clickhouse/ClickHouseClient.java | 36 +++++++++++ .../clickhouse/BaseClickHouseTypeMapping.java | 64 +++++++++++++++++++ 3 files changed, 102 insertions(+), 2 deletions(-) diff --git a/docs/src/main/sphinx/connector/clickhouse.rst b/docs/src/main/sphinx/connector/clickhouse.rst index e2c95f8793de..3b6a225f1e96 100644 --- a/docs/src/main/sphinx/connector/clickhouse.rst +++ b/docs/src/main/sphinx/connector/clickhouse.rst @@ -238,8 +238,8 @@ to the following table: * - ``Date`` - ``DATE`` - - * - ``DateTime`` - - ``TIMESTAMP(0)`` + * - ``DateTime[(timezone)]`` + - ``TIMESTAMP(0) [WITH TIME ZONE]`` - * - ``IPv4`` - ``IPADDRESS`` diff --git a/plugin/trino-clickhouse/src/main/java/io/trino/plugin/clickhouse/ClickHouseClient.java b/plugin/trino-clickhouse/src/main/java/io/trino/plugin/clickhouse/ClickHouseClient.java index 4aebe91199d4..c72def7b21a8 100644 --- a/plugin/trino-clickhouse/src/main/java/io/trino/plugin/clickhouse/ClickHouseClient.java +++ b/plugin/trino-clickhouse/src/main/java/io/trino/plugin/clickhouse/ClickHouseClient.java @@ -35,6 +35,7 @@ import io.trino.plugin.jdbc.JdbcExpression; import io.trino.plugin.jdbc.JdbcTableHandle; import io.trino.plugin.jdbc.JdbcTypeHandle; +import io.trino.plugin.jdbc.LongReadFunction; import io.trino.plugin.jdbc.LongWriteFunction; import io.trino.plugin.jdbc.ObjectWriteFunction; import io.trino.plugin.jdbc.QueryBuilder; @@ -59,6 +60,7 @@ import io.trino.spi.type.Decimals; import io.trino.spi.type.Int128; import io.trino.spi.type.StandardTypes; +import io.trino.spi.type.TimeZoneKey; import io.trino.spi.type.Type; import io.trino.spi.type.TypeManager; import io.trino.spi.type.TypeSignature; @@ -79,8 +81,10 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Types; +import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; +import java.time.ZonedDateTime; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -143,6 +147,9 @@ import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; +import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; +import static io.trino.spi.type.DateTimeEncoding.unpackMillisUtc; +import static io.trino.spi.type.DateTimeEncoding.unpackZoneKey; import static io.trino.spi.type.DateType.DATE; import static io.trino.spi.type.DecimalType.createDecimalType; import static io.trino.spi.type.DoubleType.DOUBLE; @@ -151,6 +158,7 @@ import static io.trino.spi.type.SmallintType.SMALLINT; import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS; import static io.trino.spi.type.TimestampType.TIMESTAMP_SECONDS; +import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_SECONDS; import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND; import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND; import static io.trino.spi.type.TinyintType.TINYINT; @@ -589,6 +597,7 @@ public Optional toColumnMapping(ConnectorSession session, Connect case Types.TIMESTAMP: if (columnDataType == ClickHouseDataType.DateTime) { + // ClickHouse DateTime does not have sub-second precision verify(typeHandle.getRequiredDecimalDigits() == 0, "Expected 0 as timestamp precision, but got %s", typeHandle.getRequiredDecimalDigits()); return Optional.of(ColumnMapping.longMapping( TIMESTAMP_SECONDS, @@ -597,6 +606,16 @@ public Optional toColumnMapping(ConnectorSession session, Connect } // TODO (https://github.com/trinodb/trino/issues/10537) Add support for Datetime64 type return Optional.of(timestampColumnMappingUsingSqlTimestampWithRounding(TIMESTAMP_MILLIS)); + + case Types.TIMESTAMP_WITH_TIMEZONE: + if (columnDataType == ClickHouseDataType.DateTime) { + // ClickHouse DateTime does not have sub-second precision + verify(typeHandle.getRequiredDecimalDigits() == 0, "Expected 0 as timestamp with time zone precision, but got %s", typeHandle.getRequiredDecimalDigits()); + return Optional.of(ColumnMapping.longMapping( + TIMESTAMP_TZ_SECONDS, + shortTimestampWithTimeZoneReadFunction(), + shortTimestampWithTimeZoneWriteFunction())); + } } if (getUnsupportedTypeHandling(session) == CONVERT_TO_VARCHAR) { @@ -769,6 +788,23 @@ private static LongWriteFunction timestampSecondsWriteFunction(ClickHouseVersion }; } + private static LongReadFunction shortTimestampWithTimeZoneReadFunction() + { + return (resultSet, columnIndex) -> { + ZonedDateTime zonedDateTime = resultSet.getObject(columnIndex, ZonedDateTime.class); + return packDateTimeWithZone(zonedDateTime.toInstant().toEpochMilli(), zonedDateTime.getZone().getId()); + }; + } + + private static LongWriteFunction shortTimestampWithTimeZoneWriteFunction() + { + return (statement, index, value) -> { + long millisUtc = unpackMillisUtc(value); + TimeZoneKey timeZoneKey = unpackZoneKey(value); + statement.setObject(index, Instant.ofEpochMilli(millisUtc).atZone(timeZoneKey.getZoneId())); + }; + } + private ColumnMapping ipAddressColumnMapping(String writeBindExpression) { return ColumnMapping.sliceMapping( diff --git a/plugin/trino-clickhouse/src/test/java/io/trino/plugin/clickhouse/BaseClickHouseTypeMapping.java b/plugin/trino-clickhouse/src/test/java/io/trino/plugin/clickhouse/BaseClickHouseTypeMapping.java index b0a358356d72..263c05870b98 100644 --- a/plugin/trino-clickhouse/src/test/java/io/trino/plugin/clickhouse/BaseClickHouseTypeMapping.java +++ b/plugin/trino-clickhouse/src/test/java/io/trino/plugin/clickhouse/BaseClickHouseTypeMapping.java @@ -33,6 +33,7 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.ZoneId; +import java.util.function.Function; import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Verify.verify; @@ -45,6 +46,7 @@ import static io.trino.spi.type.RealType.REAL; import static io.trino.spi.type.SmallintType.SMALLINT; import static io.trino.spi.type.TimestampType.createTimestampType; +import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_SECONDS; import static io.trino.spi.type.TinyintType.TINYINT; import static io.trino.spi.type.VarbinaryType.VARBINARY; import static io.trino.spi.type.VarcharType.VARCHAR; @@ -778,6 +780,63 @@ public Object[][] unsupportedTimestampDataProvider() }; } + @Test(dataProvider = "sessionZonesDataProvider") + public void testClickHouseDateTimeWithTimeZone(ZoneId sessionZone) + { + Session session = Session.builder(getSession()) + .setTimeZoneKey(TimeZoneKey.getTimeZoneKey(sessionZone.getId())) + .build(); + + dateTimeWithTimeZoneTest(clickhouseDateTimeInputTypeFactory("datetime")) + .execute(getQueryRunner(), session, clickhouseCreateAndInsert("tpch.datetime_tz")); + } + + private SqlDataTypeTest dateTimeWithTimeZoneTest(Function inputTypeFactory) + { + ZoneId utc = ZoneId.of("UTC"); + SqlDataTypeTest tests = SqlDataTypeTest.create() + .addRoundTrip(format("Nullable(%s)", inputTypeFactory.apply(utc)), "NULL", TIMESTAMP_TZ_SECONDS, "CAST(NULL AS TIMESTAMP(0) WITH TIME ZONE)") + + // Since ClickHouse datetime(timezone) does not support values before epoch, we do not test this here. + + // epoch + .addRoundTrip(inputTypeFactory.apply(utc), "0", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '1970-01-01 00:00:00 Z'") + .addRoundTrip(inputTypeFactory.apply(utc), "'1970-01-01 00:00:00'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '1970-01-01 00:00:00 Z'") + .addRoundTrip(inputTypeFactory.apply(kathmandu), "'1970-01-01 00:00:00'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '1970-01-01 05:30:00 +05:30'") + + // after epoch + .addRoundTrip(inputTypeFactory.apply(utc), "'2019-03-18 10:01:17'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2019-03-18 10:01:17 Z'") + .addRoundTrip(inputTypeFactory.apply(kathmandu), "'2019-03-18 10:01:17'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2019-03-18 10:01:17 +05:45'") + .addRoundTrip(inputTypeFactory.apply(ZoneId.of("GMT")), "'2019-03-18 10:01:17'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2019-03-18 10:01:17 Z'") + .addRoundTrip(inputTypeFactory.apply(ZoneId.of("UTC+00:00")), "'2019-03-18 10:01:17'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2019-03-18 10:01:17 Z'") + + // time doubled in JVM zone + .addRoundTrip(inputTypeFactory.apply(utc), "'2018-10-28 01:33:17'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-10-28 01:33:17 Z'") + .addRoundTrip(inputTypeFactory.apply(jvmZone), "'2018-10-28 01:33:17'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-10-28 01:33:17 -05:00'") + .addRoundTrip(inputTypeFactory.apply(kathmandu), "'2018-10-28 01:33:17'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-10-28 01:33:17 +05:45'") + + // time doubled in Vilnius + .addRoundTrip(inputTypeFactory.apply(utc), "'2018-10-28 03:33:33'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-10-28 03:33:33 Z'") + .addRoundTrip(inputTypeFactory.apply(vilnius), "'2018-10-28 03:33:33'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-10-28 03:33:33 +03:00'") + .addRoundTrip(inputTypeFactory.apply(kathmandu), "'2018-10-28 03:33:33'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-10-28 03:33:33 +05:45'") + + // time gap in JVM zone + .addRoundTrip(inputTypeFactory.apply(utc), "'1970-01-01 00:13:42'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '1970-01-01 00:13:42 Z'") + // TODO: Check the range of DateTime(timezone) values written from Trino to ClickHouse to prevent ClickHouse from storing incorrect results. + // e.g. 1970-01-01 00:13:42 will become 1970-01-01 05:30:00 + // .addRoundTrip(inputTypeFactory.apply(kathmandu), "'1970-01-01 00:13:42'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '1970-01-01 00:13:42 +05:30'") + .addRoundTrip(inputTypeFactory.apply(utc), "'2018-04-01 02:13:55'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-04-01 02:13:55 Z'") + .addRoundTrip(inputTypeFactory.apply(kathmandu), "'2018-04-01 02:13:55'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-04-01 02:13:55 +05:45'") + + // time gap in Vilnius + .addRoundTrip(inputTypeFactory.apply(kathmandu), "'2018-03-25 03:17:17'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '2018-03-25 03:17:17 +05:45'") + + // time gap in Kathmandu + .addRoundTrip(inputTypeFactory.apply(vilnius), "'1986-01-01 00:13:07'", TIMESTAMP_TZ_SECONDS, "TIMESTAMP '1986-01-01 00:13:07 +03:00'"); + + return tests; + } + @DataProvider public Object[][] sessionZonesDataProvider() { @@ -884,4 +943,9 @@ protected SqlExecutor onRemoteDatabase() { return clickhouseServer::execute; } + + private static Function clickhouseDateTimeInputTypeFactory(String inputTypePrefix) + { + return zone -> format("%s('%s')", inputTypePrefix, zone); + } }