diff --git a/ibis/backends/base/sql/alchemy/registry.py b/ibis/backends/base/sql/alchemy/registry.py index f4a809c364e1..15bf2cf10503 100644 --- a/ibis/backends/base/sql/alchemy/registry.py +++ b/ibis/backends/base/sql/alchemy/registry.py @@ -492,6 +492,17 @@ def _substring(t, op): ) +def _string_concat(t, op): + # We can't use the builtin sa.func.concat because we want NULL + + # to be NULL. But with sa.func.concat, NULL + is . + # See https://github.com/ibis-project/ibis/issues/8302 + sa_args = (t.translate(a) for a in op.arg) + first, *rest = sa_args + for arg in rest: + first += arg + return first + + def _gen_string_find(func): def string_find(t, op): if op.end is not None: @@ -661,7 +672,7 @@ class array_filter(FunctionElement): ops.StringSQLILike: functools.partial(_string_like, "ilike"), ops.StartsWith: _startswith, ops.EndsWith: _endswith, - ops.StringConcat: varargs(sa.func.concat), + ops.StringConcat: _string_concat, ops.Substring: _substring, # math ops.Ln: unary(sa.func.ln), diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index 8e006cdf5643..2b8bb9e9cdd4 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -1019,6 +1019,41 @@ def test_capitalize(con): assert con.execute(expr) == expected +_PD_NOTIMPL = pytest.mark.notimpl( + ["pandas"], + raises=TypeError, +) + + +@pytest.mark.parametrize( + ("args, expected"), + [ + pytest.param((ibis.literal(None, str), None), None, marks=_PD_NOTIMPL), + pytest.param(("abc", None), None, marks=_PD_NOTIMPL), + pytest.param(("abc", ibis.literal(None, str)), None, marks=_PD_NOTIMPL), + pytest.param(("abc", "def", None), None, marks=_PD_NOTIMPL), + (("abc", "def"), "abcdef"), + (("abc", "def"), "abcdef"), + ], +) +@pytest.mark.parametrize("method", ["plus", "concat"]) +def test_string_concat(con, args, method, expected): + args = [ + ibis.literal(arg, type="string") if isinstance(arg, str) else arg + for arg in args + ] + first, rest = args[0], args[1:] + if method == "plus": + expr = first + for arg in rest: + expr = expr + arg + elif method == "concat": + expr = first.concat(*rest) + else: + raise ValueError(f"Unknown method {method}") + assert con.execute(expr) == expected + + @pytest.mark.notimpl( ["dask", "pandas", "polars", "druid", "oracle", "flink"], raises=OperationNotDefinedError, diff --git a/ibis/expr/types/strings.py b/ibis/expr/types/strings.py index ce44800760a0..8f1fc1d26829 100644 --- a/ibis/expr/types/strings.py +++ b/ibis/expr/types/strings.py @@ -1488,7 +1488,7 @@ def split(self, delimiter: str | StringValue) -> ir.ArrayValue: return ops.StringSplit(self, delimiter).to_expr() def concat(self, other: str | StringValue, *args: str | StringValue) -> StringValue: - """Concatenate strings. + """Concatenate strings. NULLS are propagated. Equivalent to the `+` operator. Parameters ---------- @@ -1506,16 +1506,24 @@ def concat(self, other: str | StringValue, *args: str | StringValue) -> StringVa -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({"s": ["abc", "bac", "bca"]}) - >>> t.s.concat("xyz") + >>> t = ibis.memtable({"s": ["abc", None]}) + >>> t.s.concat("xyz", "123") + ┏━━━━━━━━━━━━━━━━┓ + ┃ StringConcat() ┃ + ┡━━━━━━━━━━━━━━━━┩ + │ string │ + ├────────────────┤ + │ abcxyz123 │ + │ NULL │ + └────────────────┘ + >>> t.s + "xyz" ┏━━━━━━━━━━━━━━━━┓ ┃ StringConcat() ┃ ┡━━━━━━━━━━━━━━━━┩ │ string │ ├────────────────┤ │ abcxyz │ - │ bacxyz │ - │ bcaxyz │ + │ NULL │ └────────────────┘ """ return ops.StringConcat((self, other, *args)).to_expr()