From 7601edf9b6027c8b87304f455f4906728f4a368f Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 13 May 2024 07:51:18 -0400 Subject: [PATCH] fix(snowflake): ensure that timestamp conversion from parquet files is correct --- ibis/backends/snowflake/__init__.py | 41 +++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 6627600337626..d8ed2814f7687 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -1057,22 +1057,41 @@ def read_parquet( # # see # https://community.snowflake.com/s/article/How-to-load-logical-type-TIMESTAMP-data-from-Parquet-files-into-Snowflake + type_mapper = self.compiler.type_mapper names_types = [ - ( - name, - self.compiler.type_mapper.to_string(typ), - typ.nullable, - typ.is_timestamp(), - ) - for name, typ in schema.items() + (name, type_mapper.to_string(typ), typ) for name, typ in schema.items() ] + snowflake_schema = ", ".join( - f"{sg.to_identifier(col, quoted=quoted)} {typ}{' NOT NULL' * (not nullable)}" - for col, typ, nullable, _ in names_types + f"{sg.to_identifier(col, quoted=quoted)} {typ}{' NOT NULL' * (not dtype.nullable)}" + for col, typ, dtype in names_types ) + + def make_column(value, typ, dtype): + # don't do anything special if we're not handling a timestamp + if not dtype.is_timestamp(): + return f"{value}::{typ}" + + comp = self.compiler + f = comp.f + + # apparently Snowflake sometimes treats Parquet timestamps as + # variant strings or variant numbers, so check for which one we + # have using is_integer + value = sge.Var(this=value) + return comp.if_( + sg.or_(f.is_integer(value), f.is_double(value), f.is_decimal(value)), + f.to_timestamp( + comp.cast(value, dt.int), + # seconds (dtype.scale == 0) is the snowflake default + dtype.scale or 0, + ), + f.try_to_timestamp(comp.cast(value, dt.string)), + ).sql(self.dialect) + cols = ", ".join( - f"$1:{col}{'::VARCHAR' * is_timestamp}::{typ}" - for col, typ, _, is_timestamp in names_types + make_column(f"$1:{col}", typ=typ, dtype=dtype) + for col, typ, dtype in names_types ) stmts = [