From ced5f539ef00192dbcdcd495e5546c523aa62846 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 13 Jul 2022 07:41:41 -0400 Subject: [PATCH] feat(api): add `Table.unpack()` and `StructValue.lift()` APIs for projecting struct fields --- ibis/expr/types/relations.py | 48 ++++++++++++++++++++++++++++++++++ ibis/expr/types/structs.py | 37 ++++++++++++++++++++++++++ ibis/tests/expr/test_struct.py | 44 +++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 3c532a446f2a..7a1244fe0287 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -867,6 +867,54 @@ def fillna( ) return ops.FillNa(self, replacements).to_expr() + def unpack(self, *columns: str) -> Table: + """Project the struct fields of each of `columns` into `self`. + + Existing fields are retained in the projection. + + Parameters + ---------- + columns + String column names to project into `self`. + + Returns + ------- + Table + The child table with struct fields of each of `columns` projected. + + Examples + -------- + >>> schema = dict(a="struct", d="string") + >>> t = ibis.table(schema, name="t") + >>> t + UnboundTable: t + a struct + d string + >>> t.unpack("a") + r0 := UnboundTable: t + a struct + d string + + Selection[r0] + selections: + b: StructField(r0.a, field='b') + c: StructField(r0.a, field='c') + d: r0.d + + See Also + -------- + ibis.expr.types.structs.StructValue.lift + """ + columns_to_unpack = frozenset(columns) + result_columns = [] + for column in self.columns: + if column in columns_to_unpack: + expr = self[column] + result_columns.extend(expr[field] for field in expr.names) + else: + result_columns.append(column) + return self[result_columns] + def info(self, buf: IO[str] | None = None) -> None: """Show column names, types and null counts. diff --git a/ibis/expr/types/structs.py b/ibis/expr/types/structs.py index ff1c8d72b108..9c2e35c75a76 100644 --- a/ibis/expr/types/structs.py +++ b/ibis/expr/types/structs.py @@ -112,6 +112,43 @@ def fields(self) -> Mapping[str, dt.DataType]: """Return a mapping from field name to field type of the struct.""" return util.frozendict(self.type().pairs) + def lift(self) -> ir.Table: + """Project the fields of `self` into a table. + + This method is useful when analyzing data that has deeply nested + structs or arrays of structs. `lift` can be chained to avoid repeating + column names and table references. + + See also [`Table.unpack`][ibis.expr.types.relations.Table.unpack]. + + Returns + ------- + Table + A projection with this struct expression's fields. + + Examples + -------- + >>> schema = dict(a="struct", d="string") + >>> t = ibis.table(schema, name="t") + >>> t + UnboundTable: t + a struct + d string + >>> t.a.lift() + r0 := UnboundTable: t + a struct + d string + + Selection[r0] + selections: + b: StructField(r0.a, field='b') + c: StructField(r0.a, field='c') + """ + import ibis.expr.analysis as an + + table = an.find_first_base_table(self) + return table[[self[name] for name in self.names]] + def destructure(self) -> DestructValue: """Destructure `self` into a `DestructValue`. diff --git a/ibis/tests/expr/test_struct.py b/ibis/tests/expr/test_struct.py index 6c5f8e3020c2..2e96e655a34b 100644 --- a/ibis/tests/expr/test_struct.py +++ b/ibis/tests/expr/test_struct.py @@ -5,9 +5,22 @@ import ibis import ibis.expr.operations as ops import ibis.expr.types as ir +from ibis import _ from ibis.tests.util import assert_pickle_roundtrip +@pytest.fixture +def t(): + return ibis.table( + dict(a="struct", d="string"), name="t" + ) + + +@pytest.fixture +def s(): + return ibis.table(dict(a="struct"), name="s") + + def test_struct_operations(): value = OrderedDict( [ @@ -42,3 +55,34 @@ def test_struct_pickle(): ) assert_pickle_roundtrip(struct_scalar_expr) + + +def test_lift(t): + assert t.a.lift().equals(t[_.a.b, _.a.c]) + + +def test_unpack_from_table(t): + assert t.unpack("a").equals(t[_.a.b, _.a.c, _.d]) + + +def test_lift_join(t, s): + join = t.join(s, t.d == s.a.g) + result = join.a_y.lift() + expected = join[_.a_y.f, _.a_y.g] + assert result.equals(expected) + + +def test_unpack_join_from_table(t, s): + join = t.join(s, t.d == s.a.g) + result = join.unpack("a_y") + expected = join[_.a_x, _.d, _.a_y.f, _.a_y.g] + assert result.equals(expected) + + +def test_nested_lift(): + t = ibis.table( + {"a": "struct, c: string>", "d": "string"}, + name="t", + ) + expr = t.a.b.lift() + assert expr.schema() == ibis.schema({"x": "int", "y": "int"})