Skip to content

Commit

Permalink
perf(duckdb): speed up memtable registration (#9419)
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored Jun 21, 2024
1 parent 5235a4b commit 7878d8c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
8 changes: 6 additions & 2 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1552,8 +1552,12 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
)

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
# only register if we haven't already done so
if (name := op.name) not in self.list_tables():
name = op.name
try:
# this handles tables _and_ views
self.con.table(name)
except (duckdb.CatalogException, duckdb.InvalidInputException):
# only register if we haven't already done so
self.con.register(name, op.data.to_pyarrow(op.schema))

def _register_udfs(self, expr: ir.Expr) -> None:
Expand Down
19 changes: 19 additions & 0 deletions ibis/tests/benchmarks/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import functools
import inspect
import itertools
import math
import os
import string
from operator import attrgetter, itemgetter
Expand Down Expand Up @@ -865,3 +866,21 @@ def test_large_union_construct(benchmark, many_tables):
def test_large_union_compile(benchmark, many_tables):
expr = ibis.union(*many_tables)
assert benchmark(ibis.to_sql, expr) is not None


@pytest.fixture(scope="session")
def lots_of_tables(tmp_path_factory):
duckdb = pytest.importorskip("duckdb")
db = str(tmp_path_factory.mktemp("data") / "lots_of_tables.ddb")
n = 100_000
d = int(math.log10(n))
sql = ";".join(f"CREATE TABLE t{i:0>{d}} (x TINYINT)" for i in range(n))
with duckdb.connect(db) as con:
con.execute(sql)
return ibis.duckdb.connect(db)


def test_memtable_register(lots_of_tables, benchmark):
t = ibis.memtable({"x": [1, 2, 3]})
result = benchmark(lots_of_tables.execute, t)
assert len(result) == 3

0 comments on commit 7878d8c

Please sign in to comment.