From 2d14ccca308f03f8a246a6d7e61bb463519514ac Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sun, 21 May 2023 07:50:37 -0400 Subject: [PATCH] feat(duckdb): support array zip operation --- ibis/backends/duckdb/registry.py | 19 ++++++++++++++++ ibis/backends/tests/test_array.py | 1 - ibis/expr/types/arrays.py | 37 +++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/ibis/backends/duckdb/registry.py b/ibis/backends/duckdb/registry.py index 4c3d5bfa413e..7b3cf8016100 100644 --- a/ibis/backends/duckdb/registry.py +++ b/ibis/backends/duckdb/registry.py @@ -302,6 +302,24 @@ def _map_merge(t, op): ) +def _array_zip(t, op): + args = tuple(map(t.translate, op.arg)) + + i = sa.literal_column("i", type_=sa.INTEGER) + dtype = op.output_dtype + return array_map( + sa.func.range(1, sa.func.greatest(*map(sa.func.array_length, args)) + 1), + i, + struct_pack( + { + name: sa.func.list_extract(arg, i) + for name, arg in zip(dtype.value_type.names, args) + }, + type=t.get_sqla_type(dtype), + ), + ) + + operation_registry.update( { ops.ArrayColumn: ( @@ -342,6 +360,7 @@ def _map_merge(t, op): ops.ArrayUnion: fixed_arity( lambda left, right: sa.func.list_distinct(sa.func.list_cat(left, right)), 2 ), + ops.ArrayZip: _array_zip, ops.DayOfWeekName: unary(sa.func.dayname), ops.Literal: _literal, ops.Log2: unary(sa.func.log2), diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index d8a2c92cc29a..780c8e20d688 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -715,7 +715,6 @@ def test_unnest_struct(con): "dask", "datafusion", "druid", - "duckdb", "oracle", "pandas", "polars", diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 7b614b1420b8..3ab57476291e 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -774,6 +774,43 @@ def zip(self, other: ir.Array, *others: ir.Array) -> ir.Array: Array Array of structs where each struct field is an element of each input array. + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable({"numbers": [[3, 2], [], None], "strings": [["a", "c"], None, ["e"]]}) + >>> t + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ numbers ┃ strings ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩ + │ array │ array │ + ├──────────────────────┼──────────────────────┤ + │ [3, 2] │ ['a', 'c'] │ + │ [] │ NULL │ + │ NULL │ ['e'] │ + └──────────────────────┴──────────────────────┘ + >>> expr = t.numbers.zip(t.strings) + >>> expr + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ ArrayZip() ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ array> │ + ├──────────────────────────────────────┤ + │ [{...}, {...}] │ + │ [] │ + │ [{...}] │ + └──────────────────────────────────────┘ + >>> expr.unnest() + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ ArrayZip() ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ struct │ + ├───────────────────────────────┤ + │ {'f1': 3, 'f2': 'a'} │ + │ {'f1': 2, 'f2': 'c'} │ + │ {'f1': None, 'f2': 'e'} │ + └───────────────────────────────┘ """ return ops.ArrayZip((self, other, *others)).to_expr()