From 99e531d677419716d98843e4001a1d25d0578c8d Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 11 Sep 2023 14:33:19 -0400 Subject: [PATCH] feat(udf): support inputs without type annotations --- .../builtin/execute-results/html.json | 4 +- docs/how-to/extending/builtin.qmd | 41 +++++++++++++++++++ ibis/backends/clickhouse/tests/test_client.py | 14 ++++++- ibis/expr/operations/udf.py | 9 ++-- 4 files changed, 61 insertions(+), 7 deletions(-) diff --git a/docs/_freeze/how-to/extending/builtin/execute-results/html.json b/docs/_freeze/how-to/extending/builtin/execute-results/html.json index 026df8c6823b..36722b951080 100644 --- a/docs/_freeze/how-to/extending/builtin/execute-results/html.json +++ b/docs/_freeze/how-to/extending/builtin/execute-results/html.json @@ -1,7 +1,7 @@ { - "hash": "00ca1d2fd32eb50acb2ec2e7c7a57931", + "hash": "be8b66093ec8d56b7575bd43ecdab2e9", "result": { - "markdown": "---\nfreeze: auto\ntitle: Reference built-in functions\n---\n\n\n\n\n\nFunctions that aren't exposed in ibis directly can be accessed using the\n`@ibis.udf.scalar.builtin` decorator.\n\n::: {.callout-tip}\n## [Ibis APIs](../../reference/index.qmd) may already exist for your function.\n\nBuiltin scalar UDFs are designed to be an escape hatch when Ibis doesn't have\na defined API for a built-in database function.\n\nSee [the reference documentation](../../reference/index.qmd) for existing APIs.\n:::\n\n## DuckDB\n\nIbis doesn't directly expose many of the DuckDB [text similarity\nfunctions](https://duckdb.org/docs/sql/functions/char.html#text-similarity-functions).\nLet's expose the `mismatches` API.\n\n\n::: {#e0a2087b .cell execution_count=1}\n``` {.python .cell-code}\nfrom ibis import udf\n\n@udf.scalar.builtin\ndef mismatches(left: str, right: str) -> int:\n ...\n```\n:::\n\n\nThe [`...`](https://docs.python.org/3/library/constants.html#Ellipsis) is\na visual indicator that the function definition is unknown to Ibis.\n\n::: {.callout-note collapse=\"true\"}\n## Ibis does not do anything with the function body.\n\nIbis will not inspect the function body or otherwise inspect it. Any code you\nwrite in the function body **will be ignored**.\n:::\n\nWe can now call this function on any ibis expression:\n\n::: {#7c520722 .cell execution_count=2}\n``` {.python .cell-code}\nimport ibis\n\ncon = ibis.duckdb.connect() # <1>\n```\n:::\n\n\n1. Connect to an in-memory DuckDB database\n\n::: {#ac393010 .cell execution_count=3}\n``` {.python .cell-code}\nexpr = mismatches(\"duck\", \"luck\")\ncon.execute(expr)\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```\n1\n```\n:::\n:::\n\n\nLike any other ibis expression you can inspect the SQL:\n\n::: {#bb4d8344 .cell execution_count=4}\n``` {.python .cell-code}\nimport ibis\n\nibis.to_sql(expr, dialect=\"duckdb\") # <1>\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```sql\nSELECT\n MISMATCHES('duck', 'luck') AS \"mismatches('duck', 'luck')\"\n```\n:::\n:::\n\n\n1. The `dialect` keyword argument must be passed, because we constructed\n a literal expression which has no backend attached.\n\nBecause built-in UDFs are ultimately Ibis expressions, they compose with the\nrest of the library:\n\n::: {#1b20bc7e .cell execution_count=5}\n``` {.python .cell-code}\nibis.options.interactive = True\n\n@udf.scalar.builtin\ndef jaro_winkler_similarity(a: str, b: str) -> float:\n ...\n\npkgs = ibis.read_parquet(\n \"https://storage.googleapis.com/ibis-tutorial-data/pypi/packages.parquet\"\n)\npandas_ish = pkgs[jaro_winkler_similarity(pkgs.name, \"pandas\") >= 0.9]\npandas_ish\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n
┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ name ┃ version ┃ requires_python ┃ yanked ┃ has_binary_wheel ┃ has_vulnerabilities ┃ first_uploaded_at ┃ last_uploaded_at ┃ recorded_at ┃ downloads ┃ scorecard_overall ┃ in_google_assured_oss ┃\n┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩\n│ string │ string │ string │ boolean │ boolean │ boolean │ timestamp │ timestamp │ timestamp │ int32 │ float64 │ boolean │\n├──────────┼───────────────────┼─────────────────┼─────────┼──────────────────┼─────────────────────┼─────────────────────┼─────────────────────┼─────────────────────┼───────────┼───────────────────┼───────────────────────┤\n│ bcpandas │ 2.4.1 │ >=3.8.1 │ True │ False │ False │ 2023-07-12 06:14:22 │ 2023-07-12 06:14:23 │ 2023-07-12 14:31:41 │ 0 │ nan │ False │\n│ espandas │ 1.0.4 │ ~ │ False │ False │ False │ 2018-12-22 20:52:30 │ 2018-12-22 20:52:30 │ 2023-07-12 14:58:47 │ 0 │ 3.6 │ False │\n│ fpandas │ 0.5 │ ~ │ False │ False │ False │ 2020-03-09 02:35:31 │ 2020-03-09 02:35:31 │ 2023-07-12 15:04:23 │ 0 │ nan │ False │\n│ h3pandas │ 0.2.4 │ >=3.6 │ False │ False │ False │ 2023-03-19 17:58:16 │ 2023-03-19 17:58:16 │ 2023-07-12 15:10:06 │ 0 │ nan │ False │\n│ ipandas │ 0.0.1 │ ~ │ False │ False │ False │ 2019-05-29 18:46:12 │ 2019-05-29 18:46:12 │ 2023-07-12 15:15:34 │ 0 │ 3.6 │ False │\n│ kpandas │ 0.0.1 │ >=3.6,<4.0 │ False │ False │ False │ 2019-05-02 18:00:29 │ 2019-05-02 18:00:31 │ 2023-07-12 15:20:21 │ 0 │ nan │ False │\n│ mpandas │ 0.0.2.1 │ ~ │ False │ False │ False │ 2022-07-03 16:21:21 │ 2022-07-03 16:21:23 │ 2023-07-12 15:30:35 │ 0 │ nan │ False │\n│ mtpandas │ 1.14.202306141807 │ >=3.6 │ False │ False │ False │ 2023-06-14 18:08:01 │ 2023-06-14 18:08:01 │ 2023-07-12 15:31:04 │ 0 │ 4.6 │ False │\n│ mypandas │ 0.1.6 │ >=3.10 │ False │ False │ False │ 2022-10-24 21:01:10 │ 2022-10-24 21:01:12 │ 2023-07-12 15:32:04 │ 0 │ nan │ False │\n│ paandas │ 0.0.3 │ ~ │ False │ False │ False │ 2022-11-24 06:11:15 │ 2022-11-24 06:11:17 │ 2023-07-12 15:43:31 │ 0 │ nan │ False │\n│ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │\n└──────────┴───────────────────┴─────────────────┴─────────┴──────────────────┴─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────┴───────────┴───────────────────┴───────────────────────┘\n\n```\n:::\n:::\n\n\nLet's count the results:\n\n::: {#71158865 .cell execution_count=6}\n``` {.python .cell-code}\npandas_ish.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=6}\n\n::: {.ansi-escaped-output}\n```{=html}\n
178
\n```\n:::\n\n:::\n:::\n\n\nThere are a good number of packages that look similar to `pandas`!\n\n## Snowflake\n\nSimilarly we can expose Snowflake's\n[`jarowinkler_similarity`](https://docs.snowflake.com/en/sql-reference/functions/jarowinkler_similarity)\nfunction.\n\nLet's alias it to `jw_sim` to illustrate some more of the Ibis `udf` API:\n\n::: {#cec67864 .cell execution_count=7}\n``` {.python .cell-code}\n@udf.scalar.builtin(name=\"jarowinkler_similarity\") # <1>\ndef jw_sim(left: str, right: str) -> float:\n ...\n```\n:::\n\n\n1. `target` is the name of the function in the backend. This argument is\n required in this because the function name is different than the name of the\n function in ibis.\n\n\nNow let's connect to Snowflake and call our `jw_sim` function:\n\n::: {#48696c70 .cell execution_count=8}\n``` {.python .cell-code}\nimport os\n\ncon = ibis.connect(os.environ[\"SNOWFLAKE_URL\"])\n```\n:::\n\n\n::: {#b8951169 .cell execution_count=9}\n``` {.python .cell-code}\nexpr = jw_sim(\"snow\", \"shoe\")\ncon.execute(expr)\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```\n66.0\n```\n:::\n:::\n\n\nAnd let's take a look at the SQL\n\n::: {#1073bf2e .cell execution_count=10}\n``` {.python .cell-code}\nibis.to_sql(expr, dialect=\"snowflake\")\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n```sql\nSELECT\n JAROWINKLER_SIMILARITY('snow', 'shoe') AS \"jarowinkler_similarity('snow', 'shoe')\"\n```\n:::\n:::\n\n\n",
+ "markdown": "---\nfreeze: auto\ntitle: Reference built-in functions\n---\n\n\n\n\n\nFunctions that aren't exposed in ibis directly can be accessed using the\n`@ibis.udf.scalar.builtin` decorator.\n\n::: {.callout-tip}\n## [Ibis APIs](../../reference/index.qmd) may already exist for your function.\n\nBuiltin scalar UDFs are designed to be an escape hatch when Ibis doesn't have\na defined API for a built-in database function.\n\nSee [the reference documentation](../../reference/index.qmd) for existing APIs.\n:::\n\n## DuckDB\n\nIbis doesn't directly expose many of the DuckDB [text similarity\nfunctions](https://duckdb.org/docs/sql/functions/char.html#text-similarity-functions).\nLet's expose the `mismatches` API.\n\n\n::: {#a0ce6764 .cell execution_count=1}\n``` {.python .cell-code}\nfrom ibis import udf\n\n@udf.scalar.builtin\ndef mismatches(left: str, right: str) -> int:\n ...\n```\n:::\n\n\nThe [`...`](https://docs.python.org/3/library/constants.html#Ellipsis) is\na visual indicator that the function definition is unknown to Ibis.\n\n::: {.callout-note collapse=\"true\"}\n## Ibis does not do anything with the function body.\n\nIbis will not inspect the function body or otherwise inspect it. Any code you\nwrite in the function body **will be ignored**.\n:::\n\nWe can now call this function on any ibis expression:\n\n::: {#271b9916 .cell execution_count=2}\n``` {.python .cell-code}\nimport ibis\n\ncon = ibis.duckdb.connect() # <1>\n```\n:::\n\n\n1. Connect to an in-memory DuckDB database\n\n::: {#ef527d30 .cell execution_count=3}\n``` {.python .cell-code}\nexpr = mismatches(\"duck\", \"luck\")\ncon.execute(expr)\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```\n1\n```\n:::\n:::\n\n\nLike any other ibis expression you can inspect the SQL:\n\n::: {#69b10261 .cell execution_count=4}\n``` {.python .cell-code}\nimport ibis\n\nibis.to_sql(expr, dialect=\"duckdb\") # <1>\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```sql\nSELECT\n MISMATCHES('duck', 'luck') AS \"mismatches('duck', 'luck')\"\n```\n:::\n:::\n\n\n1. The `dialect` keyword argument must be passed, because we constructed\n a literal expression which has no backend attached.\n\nBecause built-in UDFs are ultimately Ibis expressions, they compose with the\nrest of the library:\n\n::: {#d44d4edc .cell execution_count=5}\n``` {.python .cell-code}\nibis.options.interactive = True\n\n@udf.scalar.builtin\ndef jaro_winkler_similarity(a: str, b: str) -> float:\n ...\n\npkgs = ibis.read_parquet(\n \"https://storage.googleapis.com/ibis-tutorial-data/pypi/packages.parquet\"\n)\npandas_ish = pkgs[jaro_winkler_similarity(pkgs.name, \"pandas\") >= 0.9]\npandas_ish\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n```{=html}\n┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┓\n┃ name ┃ version ┃ requires_python ┃ yanked ┃ has_binary_wheel ┃ has_vulnerabilities ┃ first_uploaded_at ┃ last_uploaded_at ┃ recorded_at ┃ downloads ┃ scorecard_overall ┃ in_google_assured_oss ┃\n┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━┩\n│ string │ string │ string │ boolean │ boolean │ boolean │ timestamp │ timestamp │ timestamp │ int32 │ float64 │ boolean │\n├──────────┼───────────────────┼─────────────────┼─────────┼──────────────────┼─────────────────────┼─────────────────────┼─────────────────────┼─────────────────────┼───────────┼───────────────────┼───────────────────────┤\n│ bcpandas │ 2.4.1 │ >=3.8.1 │ True │ False │ False │ 2023-07-12 06:14:22 │ 2023-07-12 06:14:23 │ 2023-07-12 14:31:41 │ 0 │ nan │ False │\n│ espandas │ 1.0.4 │ ~ │ False │ False │ False │ 2018-12-22 20:52:30 │ 2018-12-22 20:52:30 │ 2023-07-12 14:58:47 │ 0 │ 3.6 │ False │\n│ fpandas │ 0.5 │ ~ │ False │ False │ False │ 2020-03-09 02:35:31 │ 2020-03-09 02:35:31 │ 2023-07-12 15:04:23 │ 0 │ nan │ False │\n│ h3pandas │ 0.2.4 │ >=3.6 │ False │ False │ False │ 2023-03-19 17:58:16 │ 2023-03-19 17:58:16 │ 2023-07-12 15:10:06 │ 0 │ nan │ False │\n│ ipandas │ 0.0.1 │ ~ │ False │ False │ False │ 2019-05-29 18:46:12 │ 2019-05-29 18:46:12 │ 2023-07-12 15:15:34 │ 0 │ 3.6 │ False │\n│ kpandas │ 0.0.1 │ >=3.6,<4.0 │ False │ False │ False │ 2019-05-02 18:00:29 │ 2019-05-02 18:00:31 │ 2023-07-12 15:20:21 │ 0 │ nan │ False │\n│ mpandas │ 0.0.2.1 │ ~ │ False │ False │ False │ 2022-07-03 16:21:21 │ 2022-07-03 16:21:23 │ 2023-07-12 15:30:35 │ 0 │ nan │ False │\n│ mtpandas │ 1.14.202306141807 │ >=3.6 │ False │ False │ False │ 2023-06-14 18:08:01 │ 2023-06-14 18:08:01 │ 2023-07-12 15:31:04 │ 0 │ 4.6 │ False │\n│ mypandas │ 0.1.6 │ >=3.10 │ False │ False │ False │ 2022-10-24 21:01:10 │ 2022-10-24 21:01:12 │ 2023-07-12 15:32:04 │ 0 │ nan │ False │\n│ paandas │ 0.0.3 │ ~ │ False │ False │ False │ 2022-11-24 06:11:15 │ 2022-11-24 06:11:17 │ 2023-07-12 15:43:31 │ 0 │ nan │ False │\n│ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │\n└──────────┴───────────────────┴─────────────────┴─────────┴──────────────────┴─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────┴───────────┴───────────────────┴───────────────────────┘\n\n```\n:::\n:::\n\n\nLet's count the results:\n\n::: {#a363610a .cell execution_count=6}\n``` {.python .cell-code}\npandas_ish.count()\n```\n\n::: {.cell-output .cell-output-display}\n```{=html}\n\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=20}\n\n::: {.ansi-escaped-output}\n```{=html}\n
178
\n```\n:::\n\n:::\n:::\n\n\nThere are a good number of packages that look similar to `pandas`!\n\n## Snowflake\n\nSimilarly we can expose Snowflake's\n[`jarowinkler_similarity`](https://docs.snowflake.com/en/sql-reference/functions/jarowinkler_similarity)\nfunction.\n\nLet's alias it to `jw_sim` to illustrate some more of the Ibis `udf` API:\n\n::: {#c6b88f1d .cell execution_count=7}\n``` {.python .cell-code}\n@udf.scalar.builtin(name=\"jarowinkler_similarity\") # <1>\ndef jw_sim(left: str, right: str) -> float:\n ...\n```\n:::\n\n\n1. `target` is the name of the function in the backend. This argument is\n required in this because the function name is different than the name of the\n function in ibis.\n\n\nNow let's connect to Snowflake and call our `jw_sim` function:\n\n::: {#4b0eeaa8 .cell execution_count=8}\n``` {.python .cell-code}\nimport os\n\ncon = ibis.connect(os.environ[\"SNOWFLAKE_URL\"])\n```\n:::\n\n\n::: {#2c651137 .cell execution_count=9}\n``` {.python .cell-code}\nexpr = jw_sim(\"snow\", \"shoe\")\ncon.execute(expr)\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```\n66.0\n```\n:::\n:::\n\n\nAnd let's take a look at the SQL\n\n::: {#d2c051e5 .cell execution_count=10}\n``` {.python .cell-code}\nibis.to_sql(expr, dialect=\"snowflake\")\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```sql\nSELECT\n JAROWINKLER_SIMILARITY('snow', 'shoe') AS \"jarowinkler_similarity('snow', 'shoe')\"\n```\n:::\n:::\n\n\n## Input types\n\nSometimes the input types of builtin functions are difficult to spell.\n\nConsider a function that computes the length of any array: the elements in the\narray can be floats, integers, strings and even other arrays. Spelling that\ntype is difficult.\n\nFortunately the `udf.scalar.builtin` decorator doesn't require you to specify\ninput types in these cases:\n\n::: {#7f171163 .cell execution_count=11}\n``` {.python .cell-code}\n@udf.scalar.builtin(name=\"array_size\")\ndef cardinality(arr) -> int:\n ...\n```\n:::\n\n\n::: {.callout-caution}\n## The return type annotation **is always required**.\n:::\n\nWe can pass arrays with different element types to our `cardinality` function:\n\n::: {#636298ba .cell execution_count=12}\n``` {.python .cell-code}\ncon.execute(cardinality([1, 2, 3]))\n```\n\n::: {.cell-output .cell-output-display execution_count=26}\n```\n3\n```\n:::\n:::\n\n\n::: {#a15e7c06 .cell execution_count=13}\n``` {.python .cell-code}\ncon.execute(cardinality([\"a\", \"b\"]))\n```\n\n::: {.cell-output .cell-output-display execution_count=27}\n```\n2\n```\n:::\n:::\n\n\nWhen you bypass input types the errors you get back are backend dependent:\n\n::: {#6c4d358c .cell execution_count=14}\n``` {.python .cell-code}\ncon.execute(cardinality(\"foo\"))\n```\n\n::: {.cell-output .cell-output-error}\n```\nProgrammingError: (snowflake.connector.errors.ProgrammingError) 001044 (42P13): SQL compilation error: error line 1 at position 7\nInvalid argument types for function 'ARRAY_SIZE': (VARCHAR(3))\n[SQL: SELECT array_size(%(param_1)s) AS \"array_size('foo')\"]\n[parameters: {'param_1': 'foo'}]\n(Background on this error at: https://sqlalche.me/e/14/f405)\n```\n:::\n:::\n\n\nHere, Snowflake is informing us that the `ARRAY_SIZE` function does not accept\nstrings as input.\n\n",
"supporting": [
"builtin_files"
],
diff --git a/docs/how-to/extending/builtin.qmd b/docs/how-to/extending/builtin.qmd
index a7e899773b71..141c2b9d16dd 100644
--- a/docs/how-to/extending/builtin.qmd
+++ b/docs/how-to/extending/builtin.qmd
@@ -128,3 +128,44 @@ And let's take a look at the SQL
```{python}
ibis.to_sql(expr, dialect="snowflake")
```
+
+## Input types
+
+Sometimes the input types of builtin functions are difficult to spell.
+
+Consider a function that computes the length of any array: the elements in the
+array can be floats, integers, strings and even other arrays. Spelling that
+type is difficult.
+
+Fortunately the `udf.scalar.builtin` decorator doesn't require you to specify
+input types in these cases:
+
+```{python}
+@udf.scalar.builtin(name="array_size")
+def cardinality(arr) -> int:
+ ...
+```
+
+::: {.callout-caution}
+## The return type annotation **is always required**.
+:::
+
+We can pass arrays with different element types to our `cardinality` function:
+
+```{python}
+con.execute(cardinality([1, 2, 3]))
+```
+
+```{python}
+con.execute(cardinality(["a", "b"]))
+```
+
+When you bypass input types the errors you get back are backend dependent:
+
+```{python}
+#| error: true
+con.execute(cardinality("foo"))
+```
+
+Here, Snowflake is informing us that the `ARRAY_SIZE` function does not accept
+strings as input.
diff --git a/ibis/backends/clickhouse/tests/test_client.py b/ibis/backends/clickhouse/tests/test_client.py
index f3c7b1bd40cc..282fde8f0b47 100644
--- a/ibis/backends/clickhouse/tests/test_client.py
+++ b/ibis/backends/clickhouse/tests/test_client.py
@@ -251,12 +251,24 @@ def array_jaccard_index(a: dt.Array[dt.int64], b: dt.Array[dt.int64]) -> float:
...
+@udf.scalar.builtin(name="arrayJaccardIndex")
+def array_jaccard_index_no_input_types(a, b) -> float:
+ ...
+
+
@udf.scalar.builtin
def arrayJaccardIndex(a: dt.Array[dt.int64], b: dt.Array[dt.int64]) -> float:
...
-@pytest.mark.parametrize("func", [array_jaccard_index, arrayJaccardIndex])
+@pytest.mark.parametrize(
+ "func",
+ [
+ array_jaccard_index,
+ arrayJaccardIndex,
+ array_jaccard_index_no_input_types,
+ ],
+)
def test_builtin_udf(con, func):
expr = func([1, 2], [2, 3])
result = con.execute(expr)
diff --git a/ibis/expr/operations/udf.py b/ibis/expr/operations/udf.py
index 984750067dc8..ae7a1276f3fe 100644
--- a/ibis/expr/operations/udf.py
+++ b/ibis/expr/operations/udf.py
@@ -298,10 +298,11 @@ def _make_node(
func_name = name or fn.__name__
for arg_name, param in inspect.signature(fn).parameters.items():
- if (raw_dtype := annotations.get(arg_name)) is None:
- raise exc.MissingParameterAnnotationError(fn, arg_name)
-
- arg = rlz.ValueOf(dt.dtype(raw_dtype))
+ if (raw_dtype := annotations.get(arg_name)) is not None:
+ dtype = dt.dtype(raw_dtype)
+ else:
+ dtype = raw_dtype
+ arg = rlz.ValueOf(dtype)
fields[arg_name] = Argument(pattern=arg, default=param.default)
fields["dtype"] = dt.dtype(return_annotation)