From 8ccb81d49e252b57310bdb3a97eeb77ef1d28bac Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 12 Sep 2022 16:01:29 -0400 Subject: [PATCH] feat(ux): add duckdb as the default backend --- ibis/backends/dask/core.py | 2 +- ibis/backends/pandas/core.py | 2 +- ibis/backends/tests/test_client.py | 50 ++++++++++++++++++++++++++++++ ibis/config.py | 32 ++++++++++++++++++- ibis/expr/types/core.py | 44 ++++++++++++++++++++------ ibis/tests/expr/test_table.py | 11 +++++++ 6 files changed, 129 insertions(+), 12 deletions(-) diff --git a/ibis/backends/dask/core.py b/ibis/backends/dask/core.py index 4b93d6ab074c..471b728a0875 100644 --- a/ibis/backends/dask/core.py +++ b/ibis/backends/dask/core.py @@ -169,7 +169,7 @@ def execute_with_scope( # computing anything *and* before associating leaf nodes with data. This # allows clients to provide their own data for each leaf. if clients is None: - clients = expr._find_backends() + clients, _ = expr._find_backends() if aggcontext is None: aggcontext = agg_ctx.Summarize() diff --git a/ibis/backends/pandas/core.py b/ibis/backends/pandas/core.py index dd523c9bcb7f..16d548ffee57 100644 --- a/ibis/backends/pandas/core.py +++ b/ibis/backends/pandas/core.py @@ -205,7 +205,7 @@ def execute_with_scope( # computing anything *and* before associating leaf nodes with data. This # allows clients to provide their own data for each leaf. if clients is None: - clients = expr._find_backends() + clients, _ = expr._find_backends() if aggcontext is None: aggcontext = agg_ctx.Summarize() diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 8f59a3369c00..6f9b189c2dbe 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -1,4 +1,5 @@ import platform +import re import pandas as pd import pandas.testing as tm @@ -7,6 +8,7 @@ from pytest import mark, param import ibis +import ibis.common.exceptions as com import ibis.expr.datatypes as dt from ibis.util import guid @@ -661,3 +663,51 @@ def test_create_from_in_memory_table(con, t): finally: con.drop_table(tmp_name) assert tmp_name not in con.list_tables() + + +def test_default_backend_no_duckdb(backend): + # backend is used to ensure that this test runs in CI in the setting + # where only the dependencies for a a given backend are installed + + # if duckdb is available then this test won't fail and so we skip it + try: + import duckdb # noqa: F401 + + pytest.skip( + "duckdb is installed; it will be used as the default backend" + ) + except ImportError: + pass + + df = pd.DataFrame({'a': [1, 2, 3]}) + t = ibis.memtable(df) + expr = t.a.sum() + + # run this twice to ensure that we hit the optimizations in + # `_default_backend` + for _ in range(2): + with pytest.raises( + com.IbisError, + match="Expression depends on no backends", + ): + expr.execute() + + +@pytest.mark.duckdb +def test_default_backend(): + pytest.importorskip("duckdb") + + df = pd.DataFrame({'a': [1, 2, 3]}) + t = ibis.memtable(df) + expr = t.a.sum() + # run this twice to ensure that we hit the optimizations in + # `_default_backend` + for _ in range(2): + assert expr.execute() == df.a.sum() + + sql = ibis.to_sql(expr) + rx = """\ +SELECT + SUM\\((\\w+)\\.a\\) AS sum +FROM \\w+ AS \\1""" + assert re.match(rx, sql) is not None diff --git a/ibis/config.py b/ibis/config.py index 96acfb92c213..18a49c197c64 100644 --- a/ibis/config.py +++ b/ibis/config.py @@ -86,6 +86,31 @@ def query_text_length_ge_zero(cls, query_text_length: int) -> int: return query_text_length +_HAS_DUCKDB = True +_DUCKDB_CON = None + + +def _default_backend() -> Any: + global _HAS_DUCKDB, _DUCKDB_CON + + if not _HAS_DUCKDB: + return None + + if _DUCKDB_CON is not None: + return _DUCKDB_CON + + try: + import duckdb as _ # noqa: F401 + except ImportError: + _HAS_DUCKDB = False + return None + + import ibis + + _DUCKDB_CON = ibis.duckdb.connect(":memory:") + return _DUCKDB_CON + + class Options(BaseSettings): """Ibis configuration options.""" @@ -106,10 +131,15 @@ class Options(BaseSettings): default=False, description="Render expressions as GraphViz PNGs when running in a Jupyter notebook.", # noqa: E501 ) + default_backend: Any = Field( default=None, - description="The default backend to use for execution.", + description=( + "The default backend to use for execution. " + "Defaults to DuckDB if not set." + ), ) + context_adjustment: ContextAdjustment = Field( default=ContextAdjustment(), description=ContextAdjustment.__doc__, diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index 54c6f51793ec..af05e8e934f8 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -7,13 +7,13 @@ from public import public -from ibis import config from ibis.common.exceptions import ( ExpressionError, IbisError, IbisTypeError, TranslationError, ) +from ibis.config import _default_backend, options from ibis.expr.typing import TimeContext from ibis.util import UnnamedMarker, deprecated @@ -33,7 +33,7 @@ def __init__(self, arg: ops.Node) -> None: self._arg = arg def __repr__(self) -> str: - if not config.options.interactive: + if not options.interactive: return self._repr() try: @@ -103,7 +103,7 @@ def _key(self) -> tuple[Hashable, ...]: return type(self), self._safe_name, self.op() def _repr_png_(self) -> bytes | None: - if config.options.interactive or not config.options.graphviz_repr: + if options.interactive or not options.graphviz_repr: return None try: import ibis.expr.visualize as viz @@ -189,7 +189,7 @@ def pipe(self, f, *args: Any, **kwargs: Any) -> Expr: def op(self) -> ops.Node: return self._arg - def _find_backends(self) -> list[BaseBackend]: + def _find_backends(self) -> tuple[list[BaseBackend], bool]: """Return the possible backends for an expression. Returns @@ -197,6 +197,7 @@ def _find_backends(self) -> list[BaseBackend]: list[BaseBackend] A list of the backends found. """ + import ibis.expr.operations as ops from ibis.backends.base import BaseBackend seen_backends: dict[ @@ -205,11 +206,13 @@ def _find_backends(self) -> list[BaseBackend]: stack = [self.op()] seen = set() + has_unbound = False while stack: node = stack.pop() if node not in seen: + has_unbound |= isinstance(node, ops.UnboundTable) seen.add(node) for arg in node.flat_args(): @@ -219,13 +222,36 @@ def _find_backends(self) -> list[BaseBackend]: elif isinstance(arg, Expr): stack.append(arg.op()) - return list(seen_backends.values()) + return list(seen_backends.values()), has_unbound - def _find_backend(self) -> BaseBackend: - backends = self._find_backends() + def _find_backend(self, *, use_default: bool = False) -> BaseBackend: + """Find the backend attached to an expression. + + Parameters + ---------- + use_default + If [`True`][True] and the default backend isn't set, initialize the + default backend and use that. This should only be set to `True` for + `.execute()`. For other contexts such as compilation, this option + doesn't make sense so the default value is [`False`][False]. + + Returns + ------- + BaseBackend + A backend that is attached to the expression + """ + backends, has_unbound = self._find_backends() if not backends: - default = config.options.default_backend + if has_unbound: + raise IbisError( + "Expression contains unbound tables and therefore cannot " + "be executed. Use ibis..execute(expr) or " + "assign a backend instance to " + "`ibis.options.default_backend`." + ) + if (default := options.default_backend) is None and use_default: + default = _default_backend() if default is None: raise IbisError( 'Expression depends on no backends, and found no default' @@ -262,7 +288,7 @@ def execute( params Mapping of scalar parameter expressions to value """ - return self._find_backend().execute( + return self._find_backend(use_default=True).execute( self, limit=limit, timecontext=timecontext, params=params, **kwargs ) diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 549e3397055c..aa59ebf417ac 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -1551,3 +1551,14 @@ def test_memtable_filter(): t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) expr = t.filter(t.x > 1) assert expr.columns == ["x", "y"] + + +def test_default_backend_with_unbound_table(): + t = ibis.table(dict(a="int"), name="t") + expr = t.a.sum() + + with pytest.raises( + com.IbisError, + match="Expression contains unbound tables", + ): + assert expr.execute()