From a3bed10ae2b2f423a80c06b4e4955282d1b80b74 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 19 Dec 2023 06:36:35 -0500 Subject: [PATCH] feat(polars): implement `ops.RegexSplit` using pyarrow UDF --- ibis/backends/polars/compiler.py | 19 +++++++++++++++++++ ibis/backends/tests/test_string.py | 1 - 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 390a248dd81c..16b3d3cc5722 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -1202,6 +1202,25 @@ def execute_agg_udf(op, **kw): return getattr(first, op.__func_name__)(*rest) +@translate.register(ops.RegexSplit) +def execute_regex_split(op, **kw): + import pyarrow.compute as pc + + def split(args): + arg, patterns = args + if len(patterns) != 1: + raise com.IbisError( + "Only a single scalar pattern is supported for Polars re_split" + ) + return pl.from_arrow(pc.split_pattern_regex(arg.to_arrow(), patterns[0])) + + arg = translate(op.arg, **kw) + pattern = translate(op.pattern, **kw) + return pl.map_batches( + exprs=(arg, pattern), function=split, return_dtype=dtype_to_polars(op.dtype) + ) + + @translate.register(ops.IntegerRange) def execute_integer_range(op, **kw): if not isinstance(op.step, ops.Literal): diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 5e2cdc394917..a32bce65ce0f 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1113,7 +1113,6 @@ def test_non_match_regex_search_is_false(con): "exasol", "pandas", "bigquery", - "polars", ], raises=com.OperationNotDefinedError, )