Skip to content

Commit

Permalink
refactor(udf): remove hierarchical usage of schema (#9078)
Browse files Browse the repository at this point in the history
I did not deprecate `schema` as we have elsewhere, because I don't think we can determine the quoting behavior of the backend at UDF definition time, which makes accepting dotted string paths harder. 

BREAKING CHANGE: The `schema` parameter for UDF definition has been removed. A new `catalog` parameter has been added. Ibis uses the word database to refer to a collection of tables, and the word catalog to refer to a collection of databases. You can use a combination of `catalog` and `database` to specify a hierarchical location for the UDF.
  • Loading branch information
gforsyth authored Apr 30, 2024
1 parent 4b5341b commit f5d9084
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 43 deletions.
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/tests/unit/udf/test_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
def farm_fingerprint(value: bytes) -> int: ...


@ibis.udf.scalar.builtin(schema="fn", database="bqutil")
@ibis.udf.scalar.builtin(database="fn", catalog="bqutil")
def from_hex(value: str) -> int:
"""Community function to convert from hex string to integer.
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/impala/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _create_operation_class(self):
fn=self._make_fn(),
name=self.name,
signature=(self.inputs, self.output),
schema=self.database,
database=self.database,
)


Expand All @@ -78,7 +78,7 @@ def _create_operation_class(self):
fn=self._make_fn(),
name=self.name,
signature=(self.inputs, self.output),
schema=self.database,
database=self.database,
)


Expand Down
10 changes: 5 additions & 5 deletions ibis/backends/postgres/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,15 +414,15 @@ def current_database(self) -> str:
(schema,) = cur.fetchone()
return schema

def function(self, name: str, *, schema: str | None = None) -> Callable:
def function(self, name: str, *, database: str | None = None) -> Callable:
n = ColGen(table="n")
p = ColGen(table="p")
f = self.compiler.f

predicates = [p.proname.eq(name)]

if schema is not None:
predicates.append(n.nspname.rlike(sge.convert(f"^({schema})$")))
if database is not None:
predicates.append(n.nspname.rlike(sge.convert(f"^({database})$")))

query = (
sg.select(
Expand All @@ -448,7 +448,7 @@ def split_name_type(arg: str) -> tuple[str, dt.DataType]:
rows = cur.fetchall()

if not rows:
name = f"{schema}.{name}" if schema else name
name = f"{database}.{name}" if database else name
raise exc.MissingUDFError(name)
elif len(rows) > 1:
raise exc.AmbiguousUDFError(name)
Expand All @@ -471,7 +471,7 @@ def fake_func(*args, **kwargs): ...
return_annotation=return_type,
)
fake_func.__annotations__ = {"return": return_type, **dict(signature)}
op = ops.udf.scalar.builtin(fake_func, schema=schema)
op = ops.udf.scalar.builtin(fake_func, database=database)
return op

def _get_udf_source(self, udf_node: ops.ScalarUDF):
Expand Down
12 changes: 6 additions & 6 deletions ibis/backends/postgres/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@ def table(con_for_udf, table_name, test_database):
def test_existing_sql_udf(con_for_udf, test_database, table):
"""Test creating ibis UDF object based on existing UDF in the database."""
# Create ibis UDF objects referring to UDFs already created in the database
custom_length_udf = con_for_udf.function("custom_len", schema=test_database)
custom_length_udf = con_for_udf.function("custom_len", database=test_database)
result_obj = table[table, custom_length_udf(table["user_name"]).name("custom_len")]
result = result_obj.execute()
assert result["custom_len"].sum() == result["name_length"].sum()


def test_existing_plpython_udf(con_for_udf, test_database, table):
# Create ibis UDF objects referring to UDFs already created in the database
py_length_udf = con_for_udf.function("pylen", schema=test_database)
py_length_udf = con_for_udf.function("pylen", database=test_database)
result_obj = table[table, py_length_udf(table["user_name"]).name("custom_len")]
result = result_obj.execute()
assert result["custom_len"].sum() == result["name_length"].sum()
Expand All @@ -103,7 +103,7 @@ def test_udf(test_database, table):
"""Test creating a UDF in database based on Python function and then
creating an ibis UDF object based on that."""

@udf.scalar.python(schema=test_database)
@udf.scalar.python(database=test_database)
def mult_a_b(a: int, b: int) -> int:
return a * b

Expand All @@ -129,7 +129,7 @@ def test_array_type(test_database, table):
instantiated specifying the datatype of the elements of the array.
"""

@udf.scalar.python(schema=test_database)
@udf.scalar.python(database=test_database)
def pysplit(text: str, split: str) -> list[str]:
return text.split(split)

Expand All @@ -142,7 +142,7 @@ def test_client_udf_api(test_database, table):
"""Test creating a UDF in database based on Python function using an ibis
client method."""

@udf.scalar.python(schema=test_database)
@udf.scalar.python(database=test_database)
def multiply(a: int, b: int) -> int:
return a * b

Expand Down Expand Up @@ -171,7 +171,7 @@ def wrapped(*args, **kwds):
return wrapped

@decorator
@udf.scalar.python(schema=test_database)
@udf.scalar.python(database=test_database)
def multiply(a: int, b: int) -> int:
return a * b

Expand Down
7 changes: 5 additions & 2 deletions ibis/backends/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def _to_catalog_db_tuple(self, table_loc: sge.Table):
def _to_sqlglot_table(self, database):
if database is None:
return None
elif isinstance(database, tuple):
elif isinstance(database, (list, tuple)):
if len(database) > 2:
raise ValueError(
"Only database hierarchies of two or fewer levels are supported."
Expand Down Expand Up @@ -558,6 +558,9 @@ def _to_sqlglot_table(self, database):
db = table.args["this"]
database = sg.exp.Table(catalog=catalog, db=db)
else:
raise ValueError("oops")
raise ValueError(
"""Invalid database hierarchy format. Please use either dotted
strings ('catalog.database') or tuples ('catalog', 'database')."""
)

return database
Loading

0 comments on commit f5d9084

Please sign in to comment.