Skip to content

Commit

Permalink
fix(duckdb): run pre-execute-hooks in duckdb before file export
Browse files Browse the repository at this point in the history
We weren't running these hooks before, which meant that a `memtable`
that hadn't been executed yet (via interactive mode or otherwise) would
not be registered with DuckDB at export time.
  • Loading branch information
gforsyth authored and jcrist committed Jul 18, 2023
1 parent 5020bdb commit 5bdaa1d
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,7 @@ def to_parquet(
>>> # partition on multiple columns
>>> con.to_parquet(penguins, "penguins_hive_dir", partition_by=("year", "island")) # doctest: +SKIP
"""
self._run_pre_execute_hooks(expr)
query = self._to_sql(expr, params=params)
args = ["FORMAT 'parquet'", *(f"{k.upper()} {v!r}" for k, v in kwargs.items())]
copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})"
Expand Down Expand Up @@ -855,6 +856,7 @@ def to_csv(
**kwargs
DuckDB CSV writer arguments. https://duckdb.org/docs/data/csv.html#parameters
"""
self._run_pre_execute_hooks(expr)
query = self._to_sql(expr, params=params)
args = [
"FORMAT 'csv'",
Expand Down
29 changes: 29 additions & 0 deletions ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,35 @@ def test_roundtrip_partitioned_parquet(tmp_path, con, backend, awards_players):
backend.assert_frame_equal(awards_players.to_pandas(), awards_players.to_pandas())


@pytest.mark.notimpl(
["dask", "druid", "impala", "pyspark"], reason="No support for exporting files"
)
@pytest.mark.notimpl(
["datafusion"],
reason="No memtable support",
)
@pytest.mark.parametrize("ftype", ["csv", "parquet"])
def test_memtable_to_file(tmp_path, con, ftype):
"""
Tests against a regression spotted in #6091 where a `memtable` that is
created and then immediately exported to `parquet` (or csv) will error
because we weren't registering the in-memory table before trying to export
it.
"""
outfile = tmp_path / f"memtable.{ftype}"
assert not outfile.is_file()

ibis.set_backend(con)

memtable = ibis.memtable({"col": [1, 2, 3, 4]})

getattr(con, f"to_{ftype}")(memtable, outfile)

assert outfile.is_file()

ibis.options.default_backend = None


@pytest.mark.notimpl(["dask", "impala", "pyspark"])
def test_table_to_csv(tmp_path, backend, awards_players):
outcsv = tmp_path / "out.csv"
Expand Down

0 comments on commit 5bdaa1d

Please sign in to comment.