Skip to content

Commit

Permalink
feat(api): add Table.unpack() and StructValue.lift() APIs for pro…
Browse files Browse the repository at this point in the history
…jecting struct fields
  • Loading branch information
cpcloud committed Aug 6, 2022
1 parent 2e67918 commit ced5f53
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 0 deletions.
48 changes: 48 additions & 0 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,54 @@ def fillna(
)
return ops.FillNa(self, replacements).to_expr()

def unpack(self, *columns: str) -> Table:
"""Project the struct fields of each of `columns` into `self`.
Existing fields are retained in the projection.
Parameters
----------
columns
String column names to project into `self`.
Returns
-------
Table
The child table with struct fields of each of `columns` projected.
Examples
--------
>>> schema = dict(a="struct<b: float, c: string>", d="string")
>>> t = ibis.table(schema, name="t")
>>> t
UnboundTable: t
a struct<b: float64, c: string>
d string
>>> t.unpack("a")
r0 := UnboundTable: t
a struct<b: float64, c: string>
d string
Selection[r0]
selections:
b: StructField(r0.a, field='b')
c: StructField(r0.a, field='c')
d: r0.d
See Also
--------
ibis.expr.types.structs.StructValue.lift
"""
columns_to_unpack = frozenset(columns)
result_columns = []
for column in self.columns:
if column in columns_to_unpack:
expr = self[column]
result_columns.extend(expr[field] for field in expr.names)
else:
result_columns.append(column)
return self[result_columns]

def info(self, buf: IO[str] | None = None) -> None:
"""Show column names, types and null counts.
Expand Down
37 changes: 37 additions & 0 deletions ibis/expr/types/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,43 @@ def fields(self) -> Mapping[str, dt.DataType]:
"""Return a mapping from field name to field type of the struct."""
return util.frozendict(self.type().pairs)

def lift(self) -> ir.Table:
"""Project the fields of `self` into a table.
This method is useful when analyzing data that has deeply nested
structs or arrays of structs. `lift` can be chained to avoid repeating
column names and table references.
See also [`Table.unpack`][ibis.expr.types.relations.Table.unpack].
Returns
-------
Table
A projection with this struct expression's fields.
Examples
--------
>>> schema = dict(a="struct<b: float, c: string>", d="string")
>>> t = ibis.table(schema, name="t")
>>> t
UnboundTable: t
a struct<b: float64, c: string>
d string
>>> t.a.lift()
r0 := UnboundTable: t
a struct<b: float64, c: string>
d string
Selection[r0]
selections:
b: StructField(r0.a, field='b')
c: StructField(r0.a, field='c')
"""
import ibis.expr.analysis as an

table = an.find_first_base_table(self)
return table[[self[name] for name in self.names]]

def destructure(self) -> DestructValue:
"""Destructure `self` into a `DestructValue`.
Expand Down
44 changes: 44 additions & 0 deletions ibis/tests/expr/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,22 @@
import ibis
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis import _
from ibis.tests.util import assert_pickle_roundtrip


@pytest.fixture
def t():
return ibis.table(
dict(a="struct<b: float, c: string>", d="string"), name="t"
)


@pytest.fixture
def s():
return ibis.table(dict(a="struct<f: float, g: string>"), name="s")


def test_struct_operations():
value = OrderedDict(
[
Expand Down Expand Up @@ -42,3 +55,34 @@ def test_struct_pickle():
)

assert_pickle_roundtrip(struct_scalar_expr)


def test_lift(t):
assert t.a.lift().equals(t[_.a.b, _.a.c])


def test_unpack_from_table(t):
assert t.unpack("a").equals(t[_.a.b, _.a.c, _.d])


def test_lift_join(t, s):
join = t.join(s, t.d == s.a.g)
result = join.a_y.lift()
expected = join[_.a_y.f, _.a_y.g]
assert result.equals(expected)


def test_unpack_join_from_table(t, s):
join = t.join(s, t.d == s.a.g)
result = join.unpack("a_y")
expected = join[_.a_x, _.d, _.a_y.f, _.a_y.g]
assert result.equals(expected)


def test_nested_lift():
t = ibis.table(
{"a": "struct<b:struct<x: int, y: int>, c: string>", "d": "string"},
name="t",
)
expr = t.a.b.lift()
assert expr.schema() == ibis.schema({"x": "int", "y": "int"})

0 comments on commit ced5f53

Please sign in to comment.