Skip to content

Commit

Permalink
fix(duckdb): free memtables based on operation lifetime (#10042)
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored Sep 9, 2024
1 parent 3a37626 commit a121ab3
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
7 changes: 7 additions & 0 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import urllib
import warnings
import weakref
from operator import itemgetter
from pathlib import Path
from typing import TYPE_CHECKING, Any
Expand Down Expand Up @@ -1605,6 +1606,12 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
# only register if we haven't already done so
self.con.register(name, op.data.to_pyarrow(op.schema))

# if we don't aggressively unregister tables duckdb will keep a
# reference to every memtable ever registered, even if there's no
# way for a user to access the operation anymore, resulting in a
# memory leak
weakref.finalize(op, self.con.unregister, name)

def _register_udfs(self, expr: ir.Expr) -> None:
con = self.con

Expand Down
9 changes: 9 additions & 0 deletions ibis/backends/duckdb/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,3 +417,12 @@ def test_read_csv_with_types(tmp_path, input, all_varchar):
path.write_bytes(data)
t = con.read_csv(path, all_varchar=all_varchar, **input)
assert t.schema()["geom"].is_geospatial()


def test_memtable_doesnt_leak(con, monkeypatch):
monkeypatch.setattr(ibis.options, "default_backend", con)
name = "memtable_doesnt_leak"
assert name not in con.list_tables()
df = ibis.memtable({"a": [1, 2, 3]}, name=name).execute()
assert name not in con.list_tables()
assert len(df) == 3

0 comments on commit a121ab3

Please sign in to comment.