diff --git a/ibis/expr/operations/arrays.py b/ibis/expr/operations/arrays.py index 8110903d4429..67d1e736e676 100644 --- a/ibis/expr/operations/arrays.py +++ b/ibis/expr/operations/arrays.py @@ -186,3 +186,19 @@ def dtype(self): } ) ) + + +@public +class ArrayFlatten(Value): + """Flatten a nested array one level. + + The input expression must have at least one level of nesting for flattening + to make sense. + """ + + arg: Value[dt.Array[dt.Array]] + shape = rlz.shape_like("arg") + + @property + def dtype(self): + return self.arg.dtype.value_type diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 09b59303beb7..568b546d3958 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -901,6 +901,102 @@ def zip(self, other: ArrayValue, *others: ArrayValue) -> ArrayValue: return ops.ArrayZip((self, other, *others)).to_expr() + def flatten(self) -> ir.ArrayValue: + """Remove one level of nesting from an array expression. + + Returns + ------- + ArrayValue + Flattened array expression + + Examples + -------- + >>> import ibis + >>> import ibis.selectors as s + >>> from ibis import _ + >>> ibis.options.interactive = True + >>> schema = { + ... "empty": "array>", + ... "happy": "array>", + ... "nulls_only": "array>>>", + ... "mixed_nulls": "array>", + ... } + >>> data = { + ... "empty": [[], [], []], + ... "happy": [[["abc"]], [["bcd"]], [["def"]]], + ... "nulls_only": [None, None, None], + ... "mixed_nulls": [[], None, [None]], + ... } + >>> import pyarrow as pa + >>> t = ibis.memtable( + ... pa.Table.from_pydict( + ... data, + ... schema=ibis.schema(schema).to_pyarrow(), + ... ) + ... ) + >>> t + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━┓ + ┃ empty ┃ happy ┃ nulls_only ┃ … ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━┩ + │ array> │ array> │ array>> t.empty.flatten() + ┏━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ ArrayFlatten(empty) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━┩ + │ array │ + ├──────────────────────┤ + │ [] │ + │ [] │ + │ [] │ + └──────────────────────┘ + >>> t.happy.flatten() + ┏━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ ArrayFlatten(happy) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━┩ + │ array │ + ├──────────────────────┤ + │ ['abc'] │ + │ ['bcd'] │ + │ ['def'] │ + └──────────────────────┘ + >>> t.nulls_only.flatten() + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ ArrayFlatten(nulls_only) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ array>> t.mixed_nulls.flatten() + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ ArrayFlatten(mixed_nulls) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ array │ + ├───────────────────────────┤ + │ [] │ + │ NULL │ + │ [] │ + └───────────────────────────┘ + >>> t.select(s.across(s.all(), _.flatten())) + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━┓ + ┃ empty ┃ happy ┃ nulls_only ┃ … ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━┩ + │ array │ array │ array