Skip to content

Commit

Permalink
refactor(datafusion): avoid reinitializing memtables on every execute…
Browse files Browse the repository at this point in the history
… call (#10057)
  • Loading branch information
cpcloud authored Sep 9, 2024
1 parent 9488115 commit 43e5f12
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import datafusion as df
import pyarrow as pa
import pyarrow.dataset as ds
import pyarrow_hotfix # noqa: F401
import sqlglot as sg
import sqlglot.expressions as sge
Expand Down Expand Up @@ -418,14 +417,16 @@ def _register_failure(self):

def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
name = op.name
schema = op.schema

self.con.deregister_table(name)
if batches := op.data.to_pyarrow(schema).to_batches():
self.con.register_record_batches(name, [batches])
else:
empty_dataset = ds.dataset([], schema=schema.to_pyarrow())
self.con.register_dataset(name=name, dataset=empty_dataset)
db = self.con.catalog().database()

try:
db.table(name)
except Exception: # noqa: BLE001 because datafusion doesn't have anything better
# self.con.register_table is broken, so we do this roundabout thing
# of constructing a datafusion DataFrame, which has a side effect
# of registering the table
self.con.from_arrow_table(op.data.to_pyarrow(op.schema), name)

def read_csv(
self, path: str | Path, table_name: str | None = None, **kwargs: Any
Expand Down

0 comments on commit 43e5f12

Please sign in to comment.