Skip to content

Commit

Permalink
feat(api): add array to string join operation
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and kszucs committed Feb 8, 2023
1 parent fd6ea5b commit 74de349
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 2 deletions.
1 change: 1 addition & 0 deletions ibis/backends/bigquery/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,7 @@ def _interval_multiply(t, op):
ops.RandomScalar: fixed_arity("RAND", 0),
ops.NthValue: _nth_value,
ops.JSONGetItem: lambda t, op: f"{t.translate(op.arg)}[{t.translate(op.index)}]",
ops.ArrayStringJoin: lambda t, op: f"ARRAY_TO_STRING({t.translate(op.arg)}, {t.translate(op.sep)})",
}

_invalid_operations = {
Expand Down
7 changes: 7 additions & 0 deletions ibis/backends/clickhouse/compiler/values.py
Original file line number Diff line number Diff line change
Expand Up @@ -1323,3 +1323,10 @@ def _extract_query(op, **kw):
def _extract_fragment(op, **kw):
arg = translate_val(op.arg, **kw)
return f"nullIf(fragment({arg}), '')"


@translate_val.register(ops.ArrayStringJoin)
def _array_string_join(op, **kw):
arg = translate_val(op.arg, **kw)
sep = translate_val(op.sep, **kw)
return f"arrayStringConcat({arg}, {sep})"
3 changes: 3 additions & 0 deletions ibis/backends/duckdb/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,9 @@ def _struct_column(t, op):
ops.IntervalAdd: fixed_arity(operator.add, 2),
ops.IntervalSubtract: fixed_arity(operator.sub, 2),
ops.Capitalize: alchemy.sqlalchemy_operation_registry[ops.Capitalize],
ops.ArrayStringJoin: fixed_arity(
lambda sep, arr: sa.func.array_aggr(arr, sa.text("'string_agg'"), sep), 2
),
}
)

Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/postgres/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,5 +617,8 @@ def translate(t, op: ops.ArgMin | ops.ArgMax) -> str:
else_=sa.null(),
)
),
ops.ArrayStringJoin: fixed_arity(
lambda sep, arr: sa.func.array_to_string(arr, sep), 2
),
}
)
7 changes: 7 additions & 0 deletions ibis/backends/pyspark/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1917,3 +1917,10 @@ def compile_argmin(t, op, **kwargs):
@compiles(ops.ArgMax)
def compile_argmax(t, op, **kwargs):
return compile_aggregator(t, op, fn=F.max_by, **kwargs)


@compiles(ops.ArrayStringJoin)
def compile_array_string_join(t, op, **kwargs):
arg = t.translate(op.arg, **kwargs)
sep = t.translate(op.sep, raw=True, **kwargs)
return F.concat_ws(sep, arg)
12 changes: 12 additions & 0 deletions ibis/backends/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,3 +542,15 @@ def test_capitalize(con):
expected = "Abc"
expr = s.capitalize()
assert con.execute(expr) == expected


@pytest.mark.notimpl(["dask", "datafusion", "pandas", "polars"])
@pytest.mark.notyet(["impala", "mssql", "mysql", "sqlite"], reason="no arrays")
def test_array_string_join(con):
s = ibis.array(["a", "b", "c"])
expected = "a,b,c"
expr = ibis.literal(",").join(s)
assert con.execute(expr) == expected

expr = s.join(",")
assert con.execute(expr) == expected
3 changes: 3 additions & 0 deletions ibis/backends/trino/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ def _cot(t, op):
),
ops.TypeOf: unary(sa.func.typeof),
ops.Unnest: _unnest,
ops.ArrayStringJoin: fixed_arity(
lambda sep, arr: sa.func.array_join(arr, sep), 2
),
}
)

Expand Down
9 changes: 9 additions & 0 deletions ibis/expr/operations/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ def output_shape(self):
return rlz.highest_precedence_shape(self.arg)


@public
class ArrayStringJoin(Value):
sep = rlz.string
arg = rlz.value(dt.Array(dt.string))

output_dtype = dt.string
output_shape = rlz.shape_like("args")


@public
class StartsWith(Value):
arg = rlz.string
Expand Down
47 changes: 47 additions & 0 deletions ibis/expr/types/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,53 @@ def unnest(self) -> ir.Value:
except com.ExpressionError:
return expr

def join(self, sep: str | ir.StringValue) -> ir.StringValue:
"""Join the elements of this array expression with `sep`.
Parameters
----------
sep
Separator to use for joining array elements
Returns
-------
StringValue
Elements of `self` joined with `sep`
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"arr": [["a", "b", "c"], None, [], ["b", None]]})
>>> t
┏━━━━━━━━━━━━━━━━━━━━━━┓
┃ arr ┃
┡━━━━━━━━━━━━━━━━━━━━━━┩
│ array<string> │
├──────────────────────┤
│ ['a', 'b', ... +1] │
│ ∅ │
│ [] │
│ ['b', None] │
└──────────────────────┘
>>> t.arr.join("|")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ArrayStringJoin('|', arr) ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ string │
├───────────────────────────┤
│ a|b|c │
│ ∅ │
│ ∅ │
│ b │
└───────────────────────────┘
See Also
--------
[`StringValue.join`][ibis.expr.types.strings.StringValue.join]
"""
return ops.ArrayStringJoin(sep, self).to_expr()


@public
class ArrayScalar(Scalar, ArrayValue):
Expand Down
42 changes: 40 additions & 2 deletions ibis/expr/types/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def find_in_set(self, str_list: Sequence[str]) -> ir.IntegerValue:
"""
return ops.FindInSet(self, str_list).to_expr()

def join(self, strings: Sequence[str | StringValue]) -> StringValue:
def join(self, strings: Sequence[str | StringValue] | ir.ArrayValue) -> StringValue:
"""Join a list of strings using `self` as the separator.
Parameters
Expand All @@ -401,8 +401,46 @@ def join(self, strings: Sequence[str | StringValue]) -> StringValue:
-------
StringValue
Joined string
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"arr": [["a", "b", "c"], None, [], ["b", None]]})
>>> t
┏━━━━━━━━━━━━━━━━━━━━━━┓
┃ arr ┃
┡━━━━━━━━━━━━━━━━━━━━━━┩
│ array<string> │
├──────────────────────┤
│ ['a', 'b', ... +1] │
│ ∅ │
│ [] │
│ ['b', None] │
└──────────────────────┘
>>> ibis.literal("|").join(t.arr)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ArrayStringJoin('|', arr) ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ string │
├───────────────────────────┤
│ a|b|c │
│ ∅ │
│ ∅ │
│ b │
└───────────────────────────┘
See Also
--------
[`ArrayValue.join`][ibis.expr.types.arrays.ArrayValue.join]
"""
return ops.StringJoin(self, strings).to_expr()
import ibis.expr.types as ir

if isinstance(strings, ir.ArrayValue):
cls = ops.ArrayStringJoin
else:
cls = ops.StringJoin
return cls(self, strings).to_expr()

def startswith(self, start: str | StringValue) -> ir.BooleanValue:
"""Determine whether `self` starts with `end`.
Expand Down

0 comments on commit 74de349

Please sign in to comment.