Skip to content

Commit

Permalink
fix(snowflake): ensure that timestamp conversion from parquet files i…
Browse files Browse the repository at this point in the history
…s correct (#9181)

Fixes a bug where the scale was incorrectly left up to Snowflake, which
can produce incorrectly scaled values if a timestamp is read in as a
variant integer and then cast to string.
  • Loading branch information
cpcloud authored May 13, 2024
1 parent b3abc9a commit 1ba4c32
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 19 deletions.
29 changes: 10 additions & 19 deletions ibis/backends/snowflake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1041,35 +1041,26 @@ def read_parquet(
qtable = sg.to_identifier(table, quoted=quoted)
threads = min((os.cpu_count() or 2) // 2, 99)

options = " " * bool(kwargs) + " ".join(
kwargs.setdefault("USE_LOGICAL_TYPE", True)
options = " ".join(
f"{name.upper()} = {value!r}" for name, value in kwargs.items()
)

# we can't infer the schema from the format alone because snowflake
# doesn't support logical timestamp types in parquet files
#
# see
# https://community.snowflake.com/s/article/How-to-load-logical-type-TIMESTAMP-data-from-Parquet-files-into-Snowflake
type_mapper = self.compiler.type_mapper
names_types = [
(
name,
self.compiler.type_mapper.to_string(typ),
typ.nullable,
typ.is_timestamp(),
)
(name, type_mapper.to_string(typ), typ.nullable)
for name, typ in schema.items()
]

snowflake_schema = ", ".join(
f"{sg.to_identifier(col, quoted=quoted)} {typ}{' NOT NULL' * (not nullable)}"
for col, typ, nullable, _ in names_types
)
cols = ", ".join(
f"$1:{col}{'::VARCHAR' * is_timestamp}::{typ}"
for col, typ, _, is_timestamp in names_types
f"{sg.to_identifier(col, quoted=quoted)} {typ}{' NOT NULL' * (not is_nullable)}"
for col, typ, is_nullable in names_types
)

cols = ", ".join(f"$1:{col}::{typ}" for col, typ, _ in names_types)

stmts = [
f"CREATE TEMP STAGE {stage} FILE_FORMAT = (TYPE = PARQUET{options})",
f"CREATE TEMP STAGE {stage} FILE_FORMAT = (TYPE = PARQUET {options})",
f"CREATE TEMP TABLE {qtable} ({snowflake_schema})",
]

Expand Down
16 changes: 16 additions & 0 deletions ibis/backends/snowflake/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,3 +355,19 @@ def test_list_tables_schema_warning_refactor(con):
con.list_tables(database=("IBIS_TESTING", "INFORMATION_SCHEMA"), like="TABLE")
== like_table
)


def test_timestamp_memtable():
con = ibis.snowflake.connect()
df = pd.DataFrame(
{
"ts": [
pd.Timestamp("1970-01-01 00:00:00"),
pd.Timestamp("1970-01-01 00:00:01"),
pd.Timestamp("1970-01-01 00:00:02"),
]
}
)
t = ibis.memtable(df)
result = con.to_pandas(t)
tm.assert_frame_equal(result, df)

0 comments on commit 1ba4c32

Please sign in to comment.