Skip to content

Commit

Permalink
refactor(ir): glue patterns and rules together
Browse files Browse the repository at this point in the history
Replace the previous rules-based validation system with the new pattern
matching system. This enables to use type annotations for operation
definitions as well as better error handling. Also lays the groundwork
to enable static type checking in the future.

BREAKING CHANGE: the `ibis.common.validators` module has been removed
and all validation rules from `ibis.expr.rules`, either use typehints
or patterns from `ibis.common.patterns`
  • Loading branch information
kszucs authored and cpcloud committed Aug 7, 2023
1 parent 1ee60b8 commit c20ba7f
Show file tree
Hide file tree
Showing 94 changed files with 2,318 additions and 2,981 deletions.
7 changes: 4 additions & 3 deletions docs/how_to/extending/elementwise.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions docs/how_to/extending/reduction.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions docs/tutorial/ibis-for-sql-users.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis import _
from ibis.common.patterns import ValidationError

to_sql = ibis.bigquery.compile

Expand Down Expand Up @@ -557,7 +558,7 @@ def test_cov(alltypes, how, snapshot):
def test_cov_invalid_how(alltypes):
d = alltypes.double_col

with pytest.raises(ValueError):
with pytest.raises(ValidationError):
d.cov(d, how="error")


Expand Down
8 changes: 5 additions & 3 deletions ibis/backends/bigquery/udf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,10 @@ def js(
if libraries is None:
libraries = []

udf_node_fields = {name: rlz.value(type_) for name, type_ in params.items()}
udf_node_fields = {
name: rlz.ValueOf(None if type_ == "ANY TYPE" else type_)
for name, type_ in params.items()
}

udf_node_fields["output_dtype"] = output_type
udf_node_fields["output_shape"] = rlz.shape_like("args")
Expand Down Expand Up @@ -362,10 +365,9 @@ def sql(
"""
validate_output_type(output_type)
udf_node_fields = {
name: rlz.any if type_ == "ANY TYPE" else rlz.value(type_)
name: rlz.ValueOf(None if type_ == "ANY TYPE" else type_)
for name, type_ in params.items()
}

return_type = BigQueryType.from_ibis(dt.dtype(output_type))

udf_node_fields["output_dtype"] = output_type
Expand Down
6 changes: 1 addition & 5 deletions ibis/backends/clickhouse/compiler/values.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import ibis.expr.analysis as an
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.rules as rlz
from ibis.backends.base.sql.registry import helpers
from ibis.backends.clickhouse.datatypes import serialize

Expand Down Expand Up @@ -850,10 +849,7 @@ def tr(op, *, cache, **kw):
left_arg = helpers.parenthesize(left_arg)

# special case non-foreign isin/notin expressions
if (
not isinstance(options, tuple)
and options.output_shape is rlz.Shape.COLUMNAR
):
if not isinstance(options, tuple) and options.output_shape.is_columnar():
# this will fail to execute if there's a correlation, but it's too
# annoying to detect so we let it through to enable the
# uncorrelated use case (pandas-style `.isin`)
Expand Down
7 changes: 4 additions & 3 deletions ibis/backends/clickhouse/tests/test_aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import pandas.testing as tm
import pytest

from ibis import literal as L
import ibis
from ibis.common.patterns import ValidationError

pytest.importorskip("clickhouse_connect")

Expand All @@ -33,9 +34,9 @@ def test_std_var_pop(con, alltypes, method, translate, snapshot):

@pytest.mark.parametrize('reduction', ['sum', 'count', 'max', 'min'])
def test_reduction_invalid_where(alltypes, reduction):
condbad_literal = L('T')
condbad_literal = ibis.literal('T')

with pytest.raises(TypeError):
with pytest.raises(ValidationError):
fn = methodcaller(reduction, where=condbad_literal)
fn(alltypes.double_col)

Expand Down
3 changes: 2 additions & 1 deletion ibis/backends/dask/tests/execution/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import ibis
import ibis.expr.datatypes as dt
from ibis.common.exceptions import OperationNotDefinedError
from ibis.common.patterns import ValidationError

dd = pytest.importorskip("dask.dataframe")
from dask.dataframe.utils import tm # noqa: E402
Expand Down Expand Up @@ -196,7 +197,7 @@ def test_quantile_scalar(t, df, ibis_func, dask_func):
# out of range on quantile
(lambda x: x.quantile(5.0), ValueError),
# invalid interpolation arg
(lambda x: x.quantile(0.5, interpolation='foo'), ValueError),
(lambda x: x.quantile(0.5, interpolation='foo'), ValidationError),
],
)
def test_arraylike_functions_transform_errors(t, df, ibis_func, exc):
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/impala/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import ibis.expr.types as ir
from ibis import util
from ibis.backends.impala import ddl
from ibis.common.exceptions import IbisTypeError
from ibis.common.patterns import ValidationError
from ibis.expr import rules

pytest.importorskip("impala")
Expand Down Expand Up @@ -223,7 +223,7 @@ def test_udf_invalid_typecasting(ty, valid_cast_indexer, all_cols):
func = _register_udf([ty], 'int32', 'typecast')

for expr in all_cols[valid_cast_indexer]:
with pytest.raises(IbisTypeError):
with pytest.raises(ValidationError):
func(expr)


Expand Down
7 changes: 4 additions & 3 deletions ibis/backends/impala/tests/test_unary_builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import pytest

import ibis
import ibis.expr.types as ir
from ibis import literal as L
from ibis.backends.impala.tests.conftest import translate
from ibis.common.patterns import ValidationError


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -99,8 +100,8 @@ def test_reduction_where(table, expr_fn, snapshot):

@pytest.mark.parametrize("method_name", ["sum", "count", "mean", "max", "min"])
def test_reduction_invalid_where(table, method_name):
condbad_literal = L('T')
condbad_literal = ibis.literal('T')
reduction = getattr(table.double_col, method_name)

with pytest.raises(TypeError):
with pytest.raises(ValidationError):
reduction(where=condbad_literal)
14 changes: 5 additions & 9 deletions ibis/backends/impala/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.rules as rlz
import ibis.legacy.udf.validate as v
from ibis import util
from ibis.backends.base.sql.registry import fixed_arity, sql_type_names
from ibis.backends.impala.compiler import ImpalaExprTranslator
from ibis.legacy.udf.validate import validate_output_type

__all__ = [
'add_operation',
Expand Down Expand Up @@ -67,15 +67,15 @@ def register(self, name: str, database: str) -> None:

class ScalarFunction(Function):
def _create_operation_class(self):
fields = {f'_{i}': rlz.value(dtype) for i, dtype in enumerate(self.inputs)}
fields = {f'_{i}': rlz.ValueOf(dtype) for i, dtype in enumerate(self.inputs)}
fields['output_dtype'] = self.output
fields['output_shape'] = rlz.shape_like('args')
return type(f"UDF_{self.name}", (ops.Value,), fields)


class AggregateFunction(Function):
def _create_operation_class(self):
fields = {f'_{i}': rlz.value(dtype) for i, dtype in enumerate(self.inputs)}
fields = {f'_{i}': rlz.ValueOf(dtype) for i, dtype in enumerate(self.inputs)}
fields['output_dtype'] = self.output
return type(f"UDA_{self.name}", (ops.Reduction,), fields)

Expand All @@ -101,7 +101,7 @@ class ImpalaUDF(ScalarFunction, ImpalaFunction):
"""Feel free to customize my __doc__ or wrap in a nicer user API."""

def __init__(self, inputs, output, so_symbol=None, lib_path=None, name=None):
v.validate_output_type(output)
validate_output_type(output)
self.so_symbol = so_symbol
ImpalaFunction.__init__(self, name=name, lib_path=lib_path)
ScalarFunction.__init__(self, inputs, output, name=self.name)
Expand Down Expand Up @@ -136,7 +136,7 @@ def __init__(
self.finalize_fn = finalize_fn
self.serialize_fn = serialize_fn

v.validate_output_type(output)
validate_output_type(output)

ImpalaFunction.__init__(self, name=name, lib_path=lib_path)
AggregateFunction.__init__(self, inputs, output, name=self.name)
Expand Down Expand Up @@ -268,10 +268,6 @@ def add_operation(op, func_name, db):
database the relevant operator is registered to
"""
full_name = f'{db}.{func_name}'
# TODO
# if op.input_type is rlz.listof:
# translator = comp.varargs(full_name)
# else:
arity = len(op.__signature__.parameters)
translator = fixed_arity(full_name, arity)

Expand Down
3 changes: 2 additions & 1 deletion ibis/backends/pandas/tests/execution/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ibis.backends.pandas.execution import execute
from ibis.backends.pandas.tests.conftest import TestConf as tm
from ibis.backends.pandas.udf import udf
from ibis.common.patterns import ValidationError


@pytest.mark.parametrize(
Expand Down Expand Up @@ -172,7 +173,7 @@ def test_quantile_multi(t, df, ibis_func, pandas_func, column):
# out of range on quantile
(lambda x: x.quantile(5.0), ValueError),
# invalid interpolation arg
(lambda x: x.quantile(0.5, interpolation='foo'), ValueError),
(lambda x: x.quantile(0.5, interpolation='foo'), ValidationError),
],
)
def test_arraylike_functions_transform_errors(t, ibis_func, exc):
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/pandas/tests/execution/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
from packaging.version import parse as vparse

import ibis
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis.backends.base.df.scope import Scope
from ibis.backends.pandas import Backend
from ibis.backends.pandas.dispatch import pre_execute
from ibis.backends.pandas.execution import execute
from ibis.backends.pandas.tests.conftest import TestConf as tm
from ibis.common.patterns import ValidationError
from ibis.legacy.udf.vectorized import reduction


Expand Down Expand Up @@ -502,7 +502,7 @@ def test_window_with_mlb():
tm.assert_frame_equal(result, expected)

rows_with_mlb = ibis.rows_with_max_lookback(5, 10)
with pytest.raises(com.IbisTypeError):
with pytest.raises(ValidationError):
t.mutate(
sum=lambda df: df.a.sum().over(
ibis.trailing_window(rows_with_mlb, order_by='time')
Expand Down
6 changes: 3 additions & 3 deletions ibis/backends/postgres/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
import ibis
import ibis.expr.datatypes as dt
import ibis.expr.rules as rlz
import ibis.legacy.udf.validate as v
from ibis import IbisError
from ibis.backends.postgres.compiler import PostgreSQLExprTranslator, PostgresUDFNode
from ibis.backends.postgres.datatypes import PostgresType
from ibis.legacy.udf.validate import validate_output_type

_udf_name_cache: MutableMapping[str, Any] = collections.defaultdict(itertools.count)

Expand Down Expand Up @@ -70,10 +70,10 @@ def existing_udf(name, input_types, output_type, schema=None, parameters=None):
).format(len(input_types), len(parameters))
)

v.validate_output_type(output_type)
validate_output_type(output_type)

udf_node_fields = {
name: rlz.value(type_) for name, type_ in zip(parameters, input_types)
name: rlz.ValueOf(type_) for name, type_ in zip(parameters, input_types)
}
udf_node_fields['name'] = name
udf_node_fields['output_dtype'] = output_type
Expand Down
Loading

0 comments on commit c20ba7f

Please sign in to comment.