Skip to content

Commit

Permalink
perf(duckdb): speedup timestamp conversion by avoiding conversion to …
Browse files Browse the repository at this point in the history
…object (#9556)
  • Loading branch information
cpcloud authored Jul 13, 2024
1 parent 62a1864 commit 5923e1e
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 4 deletions.
2 changes: 1 addition & 1 deletion ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,7 @@ def execute(
# but calling `to_pylist()` will render it as None
col.null_count
)
else col.to_pandas(timestamp_as_object=True)
else col.to_pandas()
)
for name, col in zip(table.column_names, table.columns)
}
Expand Down
3 changes: 2 additions & 1 deletion ibis/backends/tests/test_asof_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op
result = result.sort_values(["group", "time"]).reset_index(drop=True)
expected = expected.sort_values(["group", "time"]).reset_index(drop=True)

tm.assert_frame_equal(result[expected.columns], expected)
# duckdb returns datetime64[us], pandas defaults to use datetime64[ns]
tm.assert_frame_equal(result[expected.columns], expected, check_dtype=False)
with pytest.raises(AssertionError):
tm.assert_series_equal(result["time"], result["time_right"])

Expand Down
6 changes: 4 additions & 2 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,10 @@ def convert_Boolean(cls, s, dtype, pandas_type):

@classmethod
def convert_Timestamp(cls, s, dtype, pandas_type):
if isinstance(dtype, pd.DatetimeTZDtype):
return s.dt.tz_convert(dtype.timezone)
if isinstance(pandas_type, pd.DatetimeTZDtype) and isinstance(
s.dtype, pd.DatetimeTZDtype
):
return s if s.dtype == pandas_type else s.dt.tz_convert(dtype.timezone)
elif pdt.is_datetime64_dtype(s.dtype):
return s.dt.tz_localize(dtype.timezone)
else:
Expand Down
14 changes: 14 additions & 0 deletions ibis/tests/benchmarks/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
import os
import random
import string
from datetime import datetime
from operator import attrgetter, itemgetter

import numpy as np
import pandas as pd
import pytest
import pytz
from packaging.version import parse as vparse
from pytest import param

Expand Down Expand Up @@ -914,3 +916,15 @@ def test_wide_drop_compile(benchmark, wide_table, cols_to_drop):
benchmark(
lambda expr: ibis.to_sql(expr, dialect="duckdb"), wide_table.drop(*cols_to_drop)
)


def test_duckdb_timestamp_conversion(benchmark):
pytest.importorskip("duckdb")

start = datetime(2000, 1, 1, tzinfo=pytz.UTC)
stop = datetime(2000, 2, 1, tzinfo=pytz.UTC)
expr = ibis.range(start, stop, ibis.interval(seconds=1)).unnest()

con = ibis.duckdb.connect()
series = benchmark(con.execute, expr)
assert series.size == (stop - start).total_seconds()

0 comments on commit 5923e1e

Please sign in to comment.