Skip to content

Commit

Permalink
refactor(udf): remove hierarchical usage of schema
Browse files Browse the repository at this point in the history
BREAKING CHANGE: The `schema` parameter for UDF definition has been
removed. A new `catalog` parameter has been added.
Ibis uses the word database to refer to a collection of
tables, and the word catalog to refer to a collection of databases. You
can use a combination of `catalog` and `database` to specify a
hierarchical location for the UDF.
  • Loading branch information
gforsyth committed Apr 29, 2024
1 parent 0d2e6a2 commit b42337d
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 41 deletions.
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/tests/unit/udf/test_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
def farm_fingerprint(value: bytes) -> int: ...


@ibis.udf.scalar.builtin(schema="fn", database="bqutil")
@ibis.udf.scalar.builtin(database="fn", catalog="bqutil")
def from_hex(value: str) -> int:
"""Community function to convert from hex string to integer.
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/impala/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _create_operation_class(self):
fn=self._make_fn(),
name=self.name,
signature=(self.inputs, self.output),
schema=self.database,
database=self.database,
)


Expand All @@ -78,7 +78,7 @@ def _create_operation_class(self):
fn=self._make_fn(),
name=self.name,
signature=(self.inputs, self.output),
schema=self.database,
database=self.database,
)


Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/postgres/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,15 +414,15 @@ def current_database(self) -> str:
(schema,) = cur.fetchone()
return schema

def function(self, name: str, *, schema: str | None = None) -> Callable:
def function(self, name: str, *, database: str | None = None) -> Callable:
n = ColGen(table="n")
p = ColGen(table="p")
f = self.compiler.f

predicates = [p.proname.eq(name)]

if schema is not None:
predicates.append(n.nspname.rlike(sge.convert(f"^({schema})$")))
if database is not None:
predicates.append(n.nspname.rlike(sge.convert(f"^({database})$")))

query = (
sg.select(
Expand All @@ -448,7 +448,7 @@ def split_name_type(arg: str) -> tuple[str, dt.DataType]:
rows = cur.fetchall()

if not rows:
name = f"{schema}.{name}" if schema else name
name = f"{database}.{name}" if database else name

Check warning on line 451 in ibis/backends/postgres/__init__.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/postgres/__init__.py#L451

Added line #L451 was not covered by tests
raise exc.MissingUDFError(name)
elif len(rows) > 1:
raise exc.AmbiguousUDFError(name)
Expand All @@ -471,7 +471,7 @@ def fake_func(*args, **kwargs): ...
return_annotation=return_type,
)
fake_func.__annotations__ = {"return": return_type, **dict(signature)}
op = ops.udf.scalar.builtin(fake_func, schema=schema)
op = ops.udf.scalar.builtin(fake_func, database=database)
return op

def _get_udf_source(self, udf_node: ops.ScalarUDF):
Expand Down
12 changes: 6 additions & 6 deletions ibis/backends/postgres/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@ def table(con_for_udf, table_name, test_database):
def test_existing_sql_udf(con_for_udf, test_database, table):
"""Test creating ibis UDF object based on existing UDF in the database."""
# Create ibis UDF objects referring to UDFs already created in the database
custom_length_udf = con_for_udf.function("custom_len", schema=test_database)
custom_length_udf = con_for_udf.function("custom_len", database=test_database)
result_obj = table[table, custom_length_udf(table["user_name"]).name("custom_len")]
result = result_obj.execute()
assert result["custom_len"].sum() == result["name_length"].sum()


def test_existing_plpython_udf(con_for_udf, test_database, table):
# Create ibis UDF objects referring to UDFs already created in the database
py_length_udf = con_for_udf.function("pylen", schema=test_database)
py_length_udf = con_for_udf.function("pylen", database=test_database)
result_obj = table[table, py_length_udf(table["user_name"]).name("custom_len")]
result = result_obj.execute()
assert result["custom_len"].sum() == result["name_length"].sum()
Expand All @@ -103,7 +103,7 @@ def test_udf(test_database, table):
"""Test creating a UDF in database based on Python function and then
creating an ibis UDF object based on that."""

@udf.scalar.python(schema=test_database)
@udf.scalar.python(database=test_database)
def mult_a_b(a: int, b: int) -> int:
return a * b

Expand All @@ -129,7 +129,7 @@ def test_array_type(test_database, table):
instantiated specifying the datatype of the elements of the array.
"""

@udf.scalar.python(schema=test_database)
@udf.scalar.python(database=test_database)
def pysplit(text: str, split: str) -> list[str]:
return text.split(split)

Expand All @@ -142,7 +142,7 @@ def test_client_udf_api(test_database, table):
"""Test creating a UDF in database based on Python function using an ibis
client method."""

@udf.scalar.python(schema=test_database)
@udf.scalar.python(database=test_database)
def multiply(a: int, b: int) -> int:
return a * b

Expand Down Expand Up @@ -171,7 +171,7 @@ def wrapped(*args, **kwds):
return wrapped

@decorator
@udf.scalar.python(schema=test_database)
@udf.scalar.python(database=test_database)
def multiply(a: int, b: int) -> int:
return a * b

Expand Down
94 changes: 67 additions & 27 deletions ibis/expr/operations/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,17 @@ def _make_node(
fn: Callable,
input_type: InputType,
name: str | None = None,
schema: str | None = None,
database: str | None = None,
catalog: str | None = None,
signature: tuple[tuple, Any] | None = None,
**kwargs,
) -> type[S]:
"""Construct a scalar user-defined function that is built-in to the backend."""
if "schema" in kwargs:
raise exc.UnsupportedArgumentError(

Check warning on line 111 in ibis/expr/operations/udf.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/operations/udf.py#L111

Added line #L111 was not covered by tests
"""schema` is not a valid argument.
You can use the `catalog` and `database` keywords to specify a UDF location."""
)

if signature is None:
annotations = typing.get_type_hints(fn)
Expand Down Expand Up @@ -139,7 +144,7 @@ def _make_node(
# method
"__func__": property(fget=lambda _, fn=fn: fn),
"__config__": FrozenDict(kwargs),
"__udf_namespace__": ops.Namespace(database=schema, catalog=database),
"__udf_namespace__": ops.Namespace(database=database, catalog=catalog),
"__module__": fn.__module__,
"__func_name__": func_name,
}
Expand Down Expand Up @@ -181,16 +186,23 @@ def builtin(
cls,
*,
name: str | None = None,
schema: str | None = None,
database: str | None = None,
catalog: str | None = None,
signature: tuple[tuple[Any, ...], Any] | None = None,
**kwargs: Any,
) -> Callable[[Callable], Callable[..., ir.Value]]: ...

@util.experimental
@classmethod
def builtin(
cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs
cls,
fn=None,
*,
name=None,
database=None,
catalog=None,
signature=None,
**kwargs,
):
"""Construct a scalar user-defined function that is built-in to the backend.
Expand All @@ -200,10 +212,10 @@ def builtin(
The function to wrap.
name
The name of the UDF in the backend if different from the function name.
schema
The schema in which the builtin function resides.
database
The database in which the builtin function resides.
catalog
The catalog in which the builtin function resides.
signature
If present, a tuple of the form `((arg0type, arg1type, ...), returntype)`.
For example, a function taking an int and a float and returning a
Expand Down Expand Up @@ -233,8 +245,8 @@ def builtin(
InputType.BUILTIN,
fn,
name=name,
schema=schema,
database=database,
catalog=catalog,
signature=signature,
**kwargs,
)
Expand All @@ -249,16 +261,23 @@ def python(
cls,
*,
name: str | None = None,
schema: str | None = None,
database: str | None = None,
catalog: str | None = None,
signature: tuple[tuple[Any, ...], Any] | None = None,
**kwargs: Any,
) -> Callable[[Callable], Callable[..., ir.Value]]: ...

@util.experimental
@classmethod
def python(
cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs
cls,
fn=None,
*,
name=None,
database=None,
catalog=None,
signature=None,
**kwargs,
):
"""Construct a **non-vectorized** scalar user-defined function that accepts Python scalar values as inputs.
Expand All @@ -281,10 +300,10 @@ def python(
The function to wrap.
name
The name of the UDF in the backend if different from the function name.
schema
The schema in which to create the UDF.
database
The database in which to create the UDF.
catalog
The catalog in which to create the UDF.
signature
If present, a tuple of the form `((arg0type, arg1type, ...), returntype)`.
For example, a function taking an int and a float and returning a
Expand Down Expand Up @@ -315,8 +334,8 @@ def python(
InputType.PYTHON,
fn,
name=name,
schema=schema,
database=database,
catalog=catalog,
signature=signature,
**kwargs,
)
Expand All @@ -331,16 +350,23 @@ def pandas(
cls,
*,
name: str | None = None,
schema: str | None = None,
database: str | None = None,
catalog: str | None = None,
signature: tuple[tuple[Any, ...], Any] | None = None,
**kwargs: Any,
) -> Callable[[Callable], Callable[..., ir.Value]]: ...

@util.experimental
@classmethod
def pandas(
cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs
cls,
fn=None,
*,
name=None,
database=None,
catalog=None,
signature=None,
**kwargs,
):
"""Construct a **vectorized** scalar user-defined function that accepts pandas Series' as inputs.
Expand All @@ -350,10 +376,10 @@ def pandas(
The function to wrap.
name
The name of the UDF in the backend if different from the function name.
schema
The schema in which to create the UDF.
database
The database in which to create the UDF.
catalog
The catalog in which to create the UDF.
signature
If present, a tuple of the form `((arg0type, arg1type, ...), returntype)`.
For example, a function taking an int and a float and returning a
Expand Down Expand Up @@ -386,8 +412,8 @@ def pandas(
InputType.PANDAS,
fn,
name=name,
schema=schema,
database=database,
catalog=catalog,
signature=signature,
**kwargs,
)
Expand All @@ -402,16 +428,23 @@ def pyarrow(
cls,
*,
name: str | None = None,
schema: str | None = None,
database: str | None = None,
catalog: str | None = None,
signature: tuple[tuple[Any, ...], Any] | None = None,
**kwargs: Any,
) -> Callable[[Callable], Callable[..., ir.Value]]: ...

@util.experimental
@classmethod
def pyarrow(
cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs
cls,
fn=None,
*,
name=None,
database=None,
catalog=None,
signature=None,
**kwargs,
):
"""Construct a **vectorized** scalar user-defined function that accepts PyArrow Arrays as input.
Expand All @@ -421,10 +454,10 @@ def pyarrow(
The function to wrap.
name
The name of the UDF in the backend if different from the function name.
schema
The schema in which to create the UDF.
database
The database in which to create the UDF.
catalog
The catalog in which to create the UDF.
signature
If present, a tuple of the form `((arg0type, arg1type, ...), returntype)`.
For example, a function taking an int and a float and returning a
Expand Down Expand Up @@ -456,8 +489,8 @@ def pyarrow(
InputType.PYARROW,
fn,
name=name,
schema=schema,
database=database,
catalog=catalog,
signature=signature,
**kwargs,
)
Expand All @@ -479,16 +512,23 @@ def builtin(
cls,
*,
name: str | None = None,
schema: str | None = None,
database: str | None = None,
catalog: str | None = None,
signature: tuple[tuple[Any, ...], Any] | None = None,
**kwargs: Any,
) -> Callable[[Callable], Callable[..., ir.Value]]: ...

@util.experimental
@classmethod
def builtin(
cls, fn=None, *, name=None, schema=None, database=None, signature=None, **kwargs
cls,
fn=None,
*,
name=None,
database=None,
catalog=None,
signature=None,
**kwargs,
):
"""Construct an aggregate user-defined function that is built-in to the backend.
Expand All @@ -498,10 +538,10 @@ def builtin(
The function to wrap.
name
The name of the UDF in the backend if different from the function name.
schema
The schema in which the builtin function resides.
database
The database in which the builtin function resides.
catalog
The catalog in which the builtin function resides.
signature
If present, a tuple of the form `((arg0type, arg1type, ...), returntype)`.
For example, a function taking an int and a float and returning a
Expand All @@ -528,8 +568,8 @@ def builtin(
InputType.BUILTIN,
fn,
name=name,
schema=schema,
database=database,
catalog=catalog,
signature=signature,
**kwargs,
)

0 comments on commit b42337d

Please sign in to comment.