Skip to content

Commit

Permalink
feat(api): add TableUnnest operation to support cross-join unnest s…
Browse files Browse the repository at this point in the history
…emantics as well as `offset`
  • Loading branch information
cpcloud committed Jun 21, 2024
1 parent 1839c13 commit ff89731
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 0 deletions.
17 changes: 17 additions & 0 deletions ibis/backends/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,23 @@ def visit_Unsupported(self, op, **_):
f"{type(op).__name__!r} operation is not supported in the {self.dialect} backend"
)

def visit_TableUnnest(self, op, *, parent, column, offset: str | None):
quoted = self.quoted
columns = [
sg.column(k, parent.alias_or_name, quoted=quoted)
for k in op.parent.schema.keys()
]
if offset is not None:
offset = sg.to_identifier(offset, quoted=quoted)
columns.append(offset)
column_alias = sg.to_identifier(op.column.name, quoted=self.quoted)
expr = sg.select(*columns, column_alias)
alias = sge.TableAlias(columns=[column_alias])
return expr.from_(parent).join(
sge.Unnest(expressions=[column], alias=alias, offset=offset),
how="CROSS",
)


# `__init_subclass__` is uncalled for subclasses - we manually call it here to
# autogenerate the base class implementations as well.
Expand Down
7 changes: 7 additions & 0 deletions ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,3 +1388,10 @@ def test_zip_unnest_lift(con):
result = con.execute(lifted)
expected = pd.DataFrame({"f1": [1, 2, 3], "f2": [4, 5, 6]})
tm.assert_frame_equal(result, expected)


def test_table_unnest(backend):
t = backend.array_types
expr = t.unnest(t.x.name("unnested_x"), offset="i")
result = expr.execute()
assert len(result)
18 changes: 18 additions & 0 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,4 +468,22 @@ class Distinct(Simple):
"""Compute the distinct rows of a table."""


@public
class TableUnnest(Simple):
"""Cross join unnest operation."""

column: Value[dt.Array]
offset: typing.Union[str, None]

@attribute
def schema(self):
column = self.column
offset = self.offset

base = self.parent.schema | Schema({column.name: column.dtype.value_type})
if offset is not None:
base |= Schema({offset: dt.int64})
return base


# TODO(kszucs): support t.select(*t) syntax by implementing Table.__iter__()
7 changes: 7 additions & 0 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4677,6 +4677,13 @@ def value_counts(self) -> ir.Table:
lambda t: t.count().name("_".join(columns) + "_count")
)

def unnest(self, column, offset: str | None = None) -> Table:
"""Unnest an array `column` from a table.
Optionally include `offset` for enumerating array indices.
"""
return ops.TableUnnest(parent=self, column=column, offset=offset).to_expr()


@public
class CachedTable(Table):
Expand Down

0 comments on commit ff89731

Please sign in to comment.