Skip to content

Commit

Permalink
fix: handle NULLs consistently in String.concat()
Browse files Browse the repository at this point in the history
partially fixes #8302.
Still need to make pandas actually handle NULLs at all.
  • Loading branch information
NickCrews committed Feb 10, 2024
1 parent 0c3e73c commit cdd93db
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 6 deletions.
13 changes: 12 additions & 1 deletion ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,17 @@ def _substring(t, op):
)


def _string_concat(t, op):
# We can't use the builtin sa.func.concat because we want NULL + <anything>
# to be NULL. But with sa.func.concat, NULL + <anything> is <anything>.
# See https://github.com/ibis-project/ibis/issues/8302
sa_args = (t.translate(a) for a in op.arg)
first, *rest = sa_args
for arg in rest:
first += arg
return first


def _gen_string_find(func):
def string_find(t, op):
if op.end is not None:
Expand Down Expand Up @@ -661,7 +672,7 @@ class array_filter(FunctionElement):
ops.StringSQLILike: functools.partial(_string_like, "ilike"),
ops.StartsWith: _startswith,
ops.EndsWith: _endswith,
ops.StringConcat: varargs(sa.func.concat),
ops.StringConcat: _string_concat,
ops.Substring: _substring,
# math
ops.Ln: unary(sa.func.ln),
Expand Down
35 changes: 35 additions & 0 deletions ibis/backends/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,6 +1019,41 @@ def test_capitalize(con):
assert con.execute(expr) == expected


_PD_NOTIMPL = pytest.mark.notimpl(
["pandas"],
raises=TypeError,
)


@pytest.mark.parametrize(
("args, expected"),
[
pytest.param((ibis.literal(None, str), None), None, marks=_PD_NOTIMPL),
pytest.param(("abc", None), None, marks=_PD_NOTIMPL),
pytest.param(("abc", ibis.literal(None, str)), None, marks=_PD_NOTIMPL),
pytest.param(("abc", "def", None), None, marks=_PD_NOTIMPL),
(("abc", "def"), "abcdef"),
(("abc", "def"), "abcdef"),
],
)
@pytest.mark.parametrize("method", ["plus", "concat"])
def test_string_concat(con, args, method, expected):
args = [
ibis.literal(arg, type="string") if isinstance(arg, str) else arg
for arg in args
]
first, rest = args[0], args[1:]
if method == "plus":
expr = first
for arg in rest:
expr = expr + arg
elif method == "concat":
expr = first.concat(*rest)
else:
raise ValueError(f"Unknown method {method}")

Check warning on line 1053 in ibis/backends/tests/test_string.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/tests/test_string.py#L1053

Added line #L1053 was not covered by tests
assert con.execute(expr) == expected


@pytest.mark.notimpl(
["dask", "pandas", "polars", "druid", "oracle", "flink"],
raises=OperationNotDefinedError,
Expand Down
18 changes: 13 additions & 5 deletions ibis/expr/types/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1488,7 +1488,7 @@ def split(self, delimiter: str | StringValue) -> ir.ArrayValue:
return ops.StringSplit(self, delimiter).to_expr()

def concat(self, other: str | StringValue, *args: str | StringValue) -> StringValue:
"""Concatenate strings.
"""Concatenate strings. NULLS are propagated. Equivalent to the `+` operator.
Parameters
----------
Expand All @@ -1506,16 +1506,24 @@ def concat(self, other: str | StringValue, *args: str | StringValue) -> StringVa
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"s": ["abc", "bac", "bca"]})
>>> t.s.concat("xyz")
>>> t = ibis.memtable({"s": ["abc", None]})
>>> t.s.concat("xyz", "123")
┏━━━━━━━━━━━━━━━━┓
┃ StringConcat() ┃
┡━━━━━━━━━━━━━━━━┩
│ string │
├────────────────┤
│ abcxyz123 │
│ NULL │
└────────────────┘
>>> t.s + "xyz"
┏━━━━━━━━━━━━━━━━┓
┃ StringConcat() ┃
┡━━━━━━━━━━━━━━━━┩
│ string │
├────────────────┤
│ abcxyz │
│ bacxyz │
│ bcaxyz │
│ NULL │
└────────────────┘
"""
return ops.StringConcat((self, other, *args)).to_expr()
Expand Down

0 comments on commit cdd93db

Please sign in to comment.