diff --git a/ibis/backends/datafusion/compiler/values.py b/ibis/backends/datafusion/compiler/values.py index 402ea004a489..1241b04266f1 100644 --- a/ibis/backends/datafusion/compiler/values.py +++ b/ibis/backends/datafusion/compiler/values.py @@ -11,6 +11,7 @@ import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.backends.base.sqlglot import ( + FALSE, NULL, AggGen, F, @@ -441,7 +442,16 @@ def string_find(op, *, arg, substr, start, end, **_): @translate_val.register(ops.RegexSearch) def regex_search(op, *, arg, pattern, **_): - return F.array_length(F.regexp_match(arg, pattern)) > 0 + return if_( + sg.or_(arg.is_(NULL), pattern.is_(NULL)), + NULL, + F.coalesce( + # null is returned for non-matching patterns, so coalesce to false + # because that is the desired behavior for ops.RegexSearch + F.array_length(F.regexp_match(arg, pattern)) > 0, + FALSE, + ), + ) @translate_val.register(ops.StringContains) diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index b43c6ccec9b1..9850c277e0e3 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -2,6 +2,7 @@ import contextlib +import numpy as np import pandas as pd import pytest import sqlalchemy as sa @@ -1090,4 +1091,6 @@ def test_no_conditional_percent_escape(con, expr): ) def test_non_match_regex_search_is_false(con): expr = ibis.literal("foo").re_search("bar") - assert con.execute(expr) is False + result = con.execute(expr) + assert isinstance(result, (bool, np.bool_)) + assert not result