Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for reading TIMESTAMP WITH LOCAL TIME ZONE in Hive connector #13595

Merged
merged 2 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/src/main/sphinx/connector/hive.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1673,9 +1673,9 @@ Hive 3-related limitations

* For security reasons, the ``sys`` system catalog is not accessible.

* Hive's ``timestamp with local zone`` data type is not supported.
It is possible to read from a table with a column of this type, but the column
data is not accessible. Writing to such a table is not supported.
* Hive's ``timestamp with local zone`` data type is mapped to
``timestamp with time zone`` with UTC timezone. It only supports reading
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would writing not work? If the physical representation is the same, then writing should just work, I think. Have you tried it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It requires some additional changes, like adding cases to isWritablePrimitiveType andParquetSchemaConverter.getPrimitiveType. We'll need support for writing eventually too - I can add it in this PR or in a following one.

values - writing to tables with columns of this type is not supported.

* Due to Hive issues `HIVE-21002 <https://issues.apache.org/jira/browse/HIVE-21002>`_
and `HIVE-22167 <https://issues.apache.org/jira/browse/HIVE-22167>`_, Trino does
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import static io.trino.plugin.hive.util.SerdeConstants.INT_TYPE_NAME;
import static io.trino.plugin.hive.util.SerdeConstants.SMALLINT_TYPE_NAME;
import static io.trino.plugin.hive.util.SerdeConstants.STRING_TYPE_NAME;
import static io.trino.plugin.hive.util.SerdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME;
import static io.trino.plugin.hive.util.SerdeConstants.TIMESTAMP_TYPE_NAME;
import static io.trino.plugin.hive.util.SerdeConstants.TINYINT_TYPE_NAME;
import static java.util.Objects.requireNonNull;
Expand All @@ -72,6 +73,7 @@ public final class HiveType
public static final HiveType HIVE_DOUBLE = new HiveType(getPrimitiveTypeInfo(DOUBLE_TYPE_NAME));
public static final HiveType HIVE_STRING = new HiveType(getPrimitiveTypeInfo(STRING_TYPE_NAME));
public static final HiveType HIVE_TIMESTAMP = new HiveType(getPrimitiveTypeInfo(TIMESTAMP_TYPE_NAME));
public static final HiveType HIVE_TIMESTAMPLOCALTZ = new HiveType(getPrimitiveTypeInfo(TIMESTAMPLOCALTZ_TYPE_NAME));
public static final HiveType HIVE_DATE = new HiveType(getPrimitiveTypeInfo(DATE_TYPE_NAME));
public static final HiveType HIVE_BINARY = new HiveType(getPrimitiveTypeInfo(BINARY_TYPE_NAME));

Expand Down Expand Up @@ -197,10 +199,10 @@ private static boolean isSupported(PrimitiveTypeInfo typeInfo)
CHAR,
DATE,
TIMESTAMP,
TIMESTAMPLOCALTZ,
BINARY,
DECIMAL -> true;
case TIMESTAMPLOCALTZ,
INTERVAL_YEAR_MONTH,
case INTERVAL_YEAR_MONTH,
INTERVAL_DAY_TIME,
VOID,
UNKNOWN -> false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import io.trino.spi.type.MapType;
import io.trino.spi.type.RowType;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.TimestampWithTimeZoneType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;

Expand Down Expand Up @@ -943,7 +944,7 @@ public static Set<HiveColumnStatisticType> getSupportedColumnStatistics(Type typ
if (isNumericType(type) || type.equals(DATE)) {
return ImmutableSet.of(MIN_VALUE, MAX_VALUE, NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES);
}
if (type instanceof TimestampType) {
if (type instanceof TimestampType || type instanceof TimestampWithTimeZoneType) {
// TODO (https://github.com/trinodb/trino/issues/5859) Add support for timestamp MIN_VALUE, MAX_VALUE
return ImmutableSet.of(NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
import static io.trino.spi.type.RealType.REAL;
import static io.trino.spi.type.SmallintType.SMALLINT;
import static io.trino.spi.type.TimestampType.createTimestampType;
import static io.trino.spi.type.TimestampWithTimeZoneType.createTimestampWithTimeZoneType;
import static io.trino.spi.type.TinyintType.TINYINT;
import static io.trino.spi.type.TypeSignature.arrayType;
import static io.trino.spi.type.TypeSignature.mapType;
Expand Down Expand Up @@ -261,6 +262,8 @@ private static Type fromPrimitiveType(PrimitiveTypeInfo typeInfo, HiveTimestampP
return DATE;
case TIMESTAMP:
return createTimestampType(timestampPrecision.getPrecision());
case TIMESTAMPLOCALTZ:
return createTimestampWithTimeZoneType(timestampPrecision.getPrecision());
case BINARY:
return VARBINARY;
case DECIMAL:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
import io.trino.metadata.QualifiedObjectName;
import io.trino.metadata.TableHandle;
import io.trino.metadata.TableMetadata;
import io.trino.plugin.hive.metastore.Column;
import io.trino.plugin.hive.metastore.PrincipalPrivileges;
import io.trino.plugin.hive.metastore.Storage;
import io.trino.plugin.hive.metastore.StorageFormat;
import io.trino.plugin.hive.metastore.Table;
import io.trino.plugin.hive.metastore.file.FileHiveMetastore;
import io.trino.spi.connector.CatalogSchemaTableName;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ColumnMetadata;
Expand Down Expand Up @@ -83,6 +89,7 @@
import java.util.Map;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.Set;
import java.util.StringJoiner;
import java.util.function.BiConsumer;
Expand Down Expand Up @@ -136,6 +143,7 @@
import static io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY;
import static io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY;
import static io.trino.plugin.hive.HiveType.toHiveType;
import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore;
import static io.trino.plugin.hive.util.HiveUtil.columnExtraInfo;
import static io.trino.spi.security.Identity.ofUser;
import static io.trino.spi.security.SelectedRole.Type.ROLE;
Expand Down Expand Up @@ -8261,6 +8269,45 @@ public void testSelectFromPrestoViewReferencingHiveTableWithTimestamps()
assertThat(query(nanosSessions, "SELECT ts FROM hive_timestamp_nanos.tpch." + prestoViewNameNanos)).matches("VALUES TIMESTAMP '1990-01-02 12:13:14.123000000'");
}

@Test
public void testTimestampWithTimeZone()
{
assertUpdate("CREATE TABLE test_timestamptz_base (t timestamp) WITH (format = 'PARQUET')");
assertUpdate("INSERT INTO test_timestamptz_base (t) VALUES" +
"(timestamp '2022-07-26 12:13')", 1);

// Writing TIMESTAMP WITH LOCAL TIME ZONE is not supported, so we first create Parquet object by writing unzoned
// timestamp (which is converted to UTC using default timezone) and then creating another table that reads from the same file.
String tableLocation = getTableLocation("test_timestamptz_base");

// TIMESTAMP WITH LOCAL TIME ZONE is not mapped to any Trino type, so we need to create the metastore entry manually
FileHiveMetastore metastore = createTestingFileHiveMetastore(new File(getDistributedQueryRunner().getCoordinator().getBaseDataDir().toFile(), "hive_data"));
metastore.createTable(
new Table(
"tpch",
"test_timestamptz",
Optional.of("hive"),
"EXTERNAL_TABLE",
new Storage(
StorageFormat.fromHiveStorageFormat(HiveStorageFormat.PARQUET),
Optional.of(tableLocation),
Optional.empty(),
false,
Collections.emptyMap()),
List.of(new Column("t", HiveType.HIVE_TIMESTAMPLOCALTZ, Optional.empty())),
List.of(),
Collections.emptyMap(),
Optional.empty(),
Optional.empty(),
OptionalLong.empty()),
PrincipalPrivileges.fromHivePrivilegeInfos(Collections.emptySet()));

assertThat(query("SELECT * FROM test_timestamptz"))
.matches("VALUES TIMESTAMP '2022-07-26 17:13:00.000 UTC'");

assertUpdate("DROP TABLE test_timestamptz");
}

@Test(dataProvider = "legalUseColumnNamesProvider")
public void testUseColumnNames(HiveStorageFormat format, boolean formatUseColumnNames)
{
Expand Down