Skip to content

Commit

Permalink
feat(ux): add duckdb as the default backend
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Sep 14, 2022
1 parent fa59d10 commit 8ccb81d
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 12 deletions.
2 changes: 1 addition & 1 deletion ibis/backends/dask/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def execute_with_scope(
# computing anything *and* before associating leaf nodes with data. This
# allows clients to provide their own data for each leaf.
if clients is None:
clients = expr._find_backends()
clients, _ = expr._find_backends()

if aggcontext is None:
aggcontext = agg_ctx.Summarize()
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/pandas/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def execute_with_scope(
# computing anything *and* before associating leaf nodes with data. This
# allows clients to provide their own data for each leaf.
if clients is None:
clients = expr._find_backends()
clients, _ = expr._find_backends()

if aggcontext is None:
aggcontext = agg_ctx.Summarize()
Expand Down
50 changes: 50 additions & 0 deletions ibis/backends/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import platform
import re

import pandas as pd
import pandas.testing as tm
Expand All @@ -7,6 +8,7 @@
from pytest import mark, param

import ibis
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
from ibis.util import guid

Expand Down Expand Up @@ -661,3 +663,51 @@ def test_create_from_in_memory_table(con, t):
finally:
con.drop_table(tmp_name)
assert tmp_name not in con.list_tables()


def test_default_backend_no_duckdb(backend):
# backend is used to ensure that this test runs in CI in the setting
# where only the dependencies for a a given backend are installed

# if duckdb is available then this test won't fail and so we skip it
try:
import duckdb # noqa: F401

pytest.skip(
"duckdb is installed; it will be used as the default backend"
)
except ImportError:
pass

df = pd.DataFrame({'a': [1, 2, 3]})
t = ibis.memtable(df)
expr = t.a.sum()

# run this twice to ensure that we hit the optimizations in
# `_default_backend`
for _ in range(2):
with pytest.raises(
com.IbisError,
match="Expression depends on no backends",
):
expr.execute()


@pytest.mark.duckdb
def test_default_backend():
pytest.importorskip("duckdb")

df = pd.DataFrame({'a': [1, 2, 3]})
t = ibis.memtable(df)
expr = t.a.sum()
# run this twice to ensure that we hit the optimizations in
# `_default_backend`
for _ in range(2):
assert expr.execute() == df.a.sum()

sql = ibis.to_sql(expr)
rx = """\
SELECT
SUM\\((\\w+)\\.a\\) AS sum
FROM \\w+ AS \\1"""
assert re.match(rx, sql) is not None
32 changes: 31 additions & 1 deletion ibis/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,31 @@ def query_text_length_ge_zero(cls, query_text_length: int) -> int:
return query_text_length


_HAS_DUCKDB = True
_DUCKDB_CON = None


def _default_backend() -> Any:
global _HAS_DUCKDB, _DUCKDB_CON

if not _HAS_DUCKDB:
return None

if _DUCKDB_CON is not None:
return _DUCKDB_CON

try:
import duckdb as _ # noqa: F401
except ImportError:
_HAS_DUCKDB = False
return None

import ibis

_DUCKDB_CON = ibis.duckdb.connect(":memory:")
return _DUCKDB_CON


class Options(BaseSettings):
"""Ibis configuration options."""

Expand All @@ -106,10 +131,15 @@ class Options(BaseSettings):
default=False,
description="Render expressions as GraphViz PNGs when running in a Jupyter notebook.", # noqa: E501
)

default_backend: Any = Field(
default=None,
description="The default backend to use for execution.",
description=(
"The default backend to use for execution. "
"Defaults to DuckDB if not set."
),
)

context_adjustment: ContextAdjustment = Field(
default=ContextAdjustment(),
description=ContextAdjustment.__doc__,
Expand Down
44 changes: 35 additions & 9 deletions ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@

from public import public

from ibis import config
from ibis.common.exceptions import (
ExpressionError,
IbisError,
IbisTypeError,
TranslationError,
)
from ibis.config import _default_backend, options
from ibis.expr.typing import TimeContext
from ibis.util import UnnamedMarker, deprecated

Expand All @@ -33,7 +33,7 @@ def __init__(self, arg: ops.Node) -> None:
self._arg = arg

def __repr__(self) -> str:
if not config.options.interactive:
if not options.interactive:
return self._repr()

try:
Expand Down Expand Up @@ -103,7 +103,7 @@ def _key(self) -> tuple[Hashable, ...]:
return type(self), self._safe_name, self.op()

def _repr_png_(self) -> bytes | None:
if config.options.interactive or not config.options.graphviz_repr:
if options.interactive or not options.graphviz_repr:
return None
try:
import ibis.expr.visualize as viz
Expand Down Expand Up @@ -189,14 +189,15 @@ def pipe(self, f, *args: Any, **kwargs: Any) -> Expr:
def op(self) -> ops.Node:
return self._arg

def _find_backends(self) -> list[BaseBackend]:
def _find_backends(self) -> tuple[list[BaseBackend], bool]:
"""Return the possible backends for an expression.
Returns
-------
list[BaseBackend]
A list of the backends found.
"""
import ibis.expr.operations as ops
from ibis.backends.base import BaseBackend

seen_backends: dict[
Expand All @@ -205,11 +206,13 @@ def _find_backends(self) -> list[BaseBackend]:

stack = [self.op()]
seen = set()
has_unbound = False

while stack:
node = stack.pop()

if node not in seen:
has_unbound |= isinstance(node, ops.UnboundTable)
seen.add(node)

for arg in node.flat_args():
Expand All @@ -219,13 +222,36 @@ def _find_backends(self) -> list[BaseBackend]:
elif isinstance(arg, Expr):
stack.append(arg.op())

return list(seen_backends.values())
return list(seen_backends.values()), has_unbound

def _find_backend(self) -> BaseBackend:
backends = self._find_backends()
def _find_backend(self, *, use_default: bool = False) -> BaseBackend:
"""Find the backend attached to an expression.
Parameters
----------
use_default
If [`True`][True] and the default backend isn't set, initialize the
default backend and use that. This should only be set to `True` for
`.execute()`. For other contexts such as compilation, this option
doesn't make sense so the default value is [`False`][False].
Returns
-------
BaseBackend
A backend that is attached to the expression
"""
backends, has_unbound = self._find_backends()

if not backends:
default = config.options.default_backend
if has_unbound:
raise IbisError(
"Expression contains unbound tables and therefore cannot "
"be executed. Use ibis.<backend>.execute(expr) or "
"assign a backend instance to "
"`ibis.options.default_backend`."
)
if (default := options.default_backend) is None and use_default:
default = _default_backend()
if default is None:
raise IbisError(
'Expression depends on no backends, and found no default'
Expand Down Expand Up @@ -262,7 +288,7 @@ def execute(
params
Mapping of scalar parameter expressions to value
"""
return self._find_backend().execute(
return self._find_backend(use_default=True).execute(
self, limit=limit, timecontext=timecontext, params=params, **kwargs
)

Expand Down
11 changes: 11 additions & 0 deletions ibis/tests/expr/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1551,3 +1551,14 @@ def test_memtable_filter():
t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"])
expr = t.filter(t.x > 1)
assert expr.columns == ["x", "y"]


def test_default_backend_with_unbound_table():
t = ibis.table(dict(a="int"), name="t")
expr = t.a.sum()

with pytest.raises(
com.IbisError,
match="Expression contains unbound tables",
):
assert expr.execute()

0 comments on commit 8ccb81d

Please sign in to comment.