Skip to content

Commit

Permalink
perf(duckdb): improve to_pyarrow performance
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrist authored and cpcloud committed Aug 17, 2023
1 parent b21d351 commit 5970cfe
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
15 changes: 12 additions & 3 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,11 +862,20 @@ def to_pyarrow(
) -> pa.Table:
self._run_pre_execute_hooks(expr)
query_ast = self.compiler.to_ast_ensure_limit(expr, limit, params=params)
sql = query_ast.compile()

# We use `.sql` instead of `.execute` below for performance - in
# certain cases duckdb query -> arrow table can be significantly faster
# in this configuration. Currently `.sql` doesn't support parametrized
# queries, so we need to compile with literal_binds for now.
sql = str(
query_ast.compile().compile(
dialect=self.con.dialect, compile_kwargs={"literal_binds": True}
)
)

with self.begin() as con:
cursor = con.execute(sql)
table = cursor.cursor.fetch_arrow_table()
cursor = con.connection.cursor()
table = cursor.sql(sql).to_arrow_table()

return expr.__pyarrow_result__(table)

Expand Down
1 change: 1 addition & 0 deletions ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ def test_table_to_csv(tmp_path, backend, awards_players):
id="decimal256",
marks=[
pytest.mark.notyet(["impala"], reason="precision not supported"),
pytest.mark.notyet(["duckdb"], reason="precision is out of range"),
pytest.mark.notyet(
["druid", "duckdb", "snowflake", "trino"],
raises=sa.exc.ProgrammingError,
Expand Down

0 comments on commit 5970cfe

Please sign in to comment.