From 3352a84ce4e28a5186ef0d1a1ec8c21f49da7209 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 1 Jul 2024 10:22:26 -0400
Subject: [PATCH] feat(api): add `TableUnnest` operation to support cross-join
 unnest semantics as well as `offset` (#9423)

---
 ibis/backends/bigquery/compiler.py           |  44 ++++++
 ibis/backends/clickhouse/compiler.py         |  57 ++++++++
 ibis/backends/duckdb/compiler.py             |  53 +++++++
 ibis/backends/postgres/compiler.py           |  57 ++++++++
 ibis/backends/pyspark/compiler.py            |  65 +++++++++
 ibis/backends/snowflake/compiler.py          |  67 +++++++++
 ibis/backends/snowflake/tests/test_client.py |   9 ++
 ibis/backends/tests/test_array.py            |  99 ++++++++++++-
 ibis/backends/trino/compiler.py              |  57 ++++++++
 ibis/expr/operations/relations.py            |  23 +++
 ibis/expr/types/arrays.py                    |  20 ++-
 ibis/expr/types/relations.py                 | 143 +++++++++++++++++++
 12 files changed, 686 insertions(+), 8 deletions(-)

diff --git a/ibis/backends/bigquery/compiler.py b/ibis/backends/bigquery/compiler.py
index dd5aadfc1dad..7169f258b34e 100644
--- a/ibis/backends/bigquery/compiler.py
+++ b/ibis/backends/bigquery/compiler.py
@@ -690,3 +690,47 @@ def visit_DropColumns(self, op, *, parent, columns_to_drop):
         table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
         column = sge.Column(this=star, table=table)
         return sg.select(column).from_(parent)
+
+    def visit_TableUnnest(
+        self, op, *, parent, column, offset: str | None, keep_empty: bool
+    ):
+        quoted = self.quoted
+
+        column_alias = sg.to_identifier(
+            util.gen_name("table_unnest_column"), quoted=quoted
+        )
+
+        selcols = []
+
+        table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
+
+        opname = op.column.name
+        overlaps_with_parent = opname in op.parent.schema
+        computed_column = column_alias.as_(opname, quoted=quoted)
+
+        # replace the existing column if the unnested column hasn't been
+        # renamed
+        #
+        # e.g., table.unnest("x")
+        if overlaps_with_parent:
+            selcols.append(
+                sge.Column(this=sge.Star(replace=[computed_column]), table=table)
+            )
+        else:
+            selcols.append(sge.Column(this=STAR, table=table))
+            selcols.append(computed_column)
+
+        if offset is not None:
+            offset = sg.to_identifier(offset, quoted=quoted)
+            selcols.append(offset)
+
+        unnest = sge.Unnest(
+            expressions=[column],
+            alias=sge.TableAlias(columns=[column_alias]),
+            offset=offset,
+        )
+        return (
+            sg.select(*selcols)
+            .from_(parent)
+            .join(unnest, join_type="CROSS" if not keep_empty else "LEFT")
+        )
diff --git a/ibis/backends/clickhouse/compiler.py b/ibis/backends/clickhouse/compiler.py
index c4283a6c3500..7ad80aea2eaa 100644
--- a/ibis/backends/clickhouse/compiler.py
+++ b/ibis/backends/clickhouse/compiler.py
@@ -648,3 +648,60 @@ def visit_DropColumns(self, op, *, parent, columns_to_drop):
         table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
         column = sge.Column(this=star, table=table)
         return sg.select(column).from_(parent)
+
+    def visit_TableUnnest(
+        self, op, *, parent, column, offset: str | None, keep_empty: bool
+    ):
+        quoted = self.quoted
+
+        column_alias = sg.to_identifier(
+            util.gen_name("table_unnest_column"), quoted=quoted
+        )
+
+        table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
+
+        selcols = []
+
+        opname = op.column.name
+        overlaps_with_parent = opname in op.parent.schema
+        computed_column = column_alias.as_(opname, quoted=quoted)
+
+        if offset is not None:
+            if overlaps_with_parent:
+                selcols.append(
+                    sge.Column(this=sge.Star(replace=[computed_column]), table=table)
+                )
+            else:
+                selcols.append(sge.Column(this=STAR, table=table))
+                selcols.append(computed_column)
+
+            offset = sg.to_identifier(offset, quoted=quoted)
+            selcols.append(offset)
+        elif overlaps_with_parent:
+            selcols.append(
+                sge.Column(this=sge.Star(replace=[computed_column]), table=table)
+            )
+        else:
+            selcols.append(sge.Column(this=STAR, table=table))
+            selcols.append(computed_column)
+
+        select = (
+            sg.select(*selcols)
+            .from_(parent)
+            .join(
+                sge.Join(
+                    this=column.as_(column_alias, quoted=quoted),
+                    kind="ARRAY",
+                    side=None if not keep_empty else "LEFT",
+                )
+            )
+        )
+
+        if offset is not None:
+            param = sg.to_identifier(util.gen_name("arr_enum"))
+            func = sge.Lambda(this=param - 1, expressions=[param])
+            return select.join(
+                self.f.arrayMap(func, self.f.arrayEnumerate(column_alias)).as_(offset)
+            )
+
+        return select
diff --git a/ibis/backends/duckdb/compiler.py b/ibis/backends/duckdb/compiler.py
index 300a9e4fc215..f2c38a17dd8c 100644
--- a/ibis/backends/duckdb/compiler.py
+++ b/ibis/backends/duckdb/compiler.py
@@ -14,6 +14,7 @@
 from ibis.backends.sql.compiler import NULL, STAR, AggGen, SQLGlotCompiler
 from ibis.backends.sql.datatypes import DuckDBType
 from ibis.backends.sql.rewrites import exclude_nulls_from_array_collect
+from ibis.util import gen_name
 
 _INTERVAL_SUFFIXES = {
     "ms": "milliseconds",
@@ -547,3 +548,55 @@ def visit_DropColumns(self, op, *, parent, columns_to_drop):
         table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
         column = sge.Column(this=star, table=table)
         return sg.select(column).from_(parent)
+
+    def visit_TableUnnest(
+        self, op, *, parent, column, offset: str | None, keep_empty: bool
+    ):
+        quoted = self.quoted
+
+        column_alias = sg.to_identifier(gen_name("table_unnest_column"), quoted=quoted)
+
+        opname = op.column.name
+        overlaps_with_parent = opname in op.parent.schema
+        computed_column = column_alias.as_(opname, quoted=quoted)
+
+        selcols = []
+
+        table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
+
+        if offset is not None:
+            # TODO: clean this up once WITH ORDINALITY is supported in DuckDB
+            # no need for struct_extract once that's upstream
+            column = self.f.list_zip(column, self.f.range(self.f.len(column)))
+            extract = self.f.struct_extract(column_alias, 1).as_(opname, quoted=quoted)
+
+            if overlaps_with_parent:
+                replace = sge.Column(this=sge.Star(replace=[extract]), table=table)
+                selcols.append(replace)
+            else:
+                selcols.append(sge.Column(this=STAR, table=table))
+                selcols.append(extract)
+
+            selcols.append(
+                self.f.struct_extract(column_alias, 2).as_(offset, quoted=quoted)
+            )
+        elif overlaps_with_parent:
+            selcols.append(
+                sge.Column(this=sge.Star(replace=[computed_column]), table=table)
+            )
+        else:
+            selcols.append(sge.Column(this=STAR, table=table))
+            selcols.append(computed_column)
+
+        unnest = sge.Unnest(
+            expressions=[column],
+            alias=sge.TableAlias(
+                this=sg.to_identifier(gen_name("table_unnest"), quoted=quoted),
+                columns=[column_alias],
+            ),
+        )
+        return (
+            sg.select(*selcols)
+            .from_(parent)
+            .join(unnest, join_type="CROSS" if not keep_empty else "LEFT")
+        )
diff --git a/ibis/backends/postgres/compiler.py b/ibis/backends/postgres/compiler.py
index dfa2b05f019e..cb959d2bd199 100644
--- a/ibis/backends/postgres/compiler.py
+++ b/ibis/backends/postgres/compiler.py
@@ -15,6 +15,7 @@
 from ibis.backends.sql.datatypes import PostgresType
 from ibis.backends.sql.dialects import Postgres
 from ibis.backends.sql.rewrites import exclude_nulls_from_array_collect
+from ibis.util import gen_name
 
 
 class PostgresUDFNode(ops.Value):
@@ -611,3 +612,59 @@ def visit_Hash(self, op, *, arg):
             f"Hash({arg_dtype!r}) operation is not supported in the "
             f"{self.dialect} backend"
         )
+
+    def visit_TableUnnest(
+        self, op, *, parent, column, offset: str | None, keep_empty: bool
+    ):
+        quoted = self.quoted
+
+        column_alias = sg.to_identifier(gen_name("table_unnest_column"), quoted=quoted)
+
+        parent_alias = parent.alias_or_name
+
+        opname = op.column.name
+        parent_schema = op.parent.schema
+        overlaps_with_parent = opname in parent_schema
+        computed_column = column_alias.as_(opname, quoted=quoted)
+
+        selcols = []
+
+        if overlaps_with_parent:
+            column_alias_or_name = column.alias_or_name
+            selcols.extend(
+                sg.column(col, table=parent_alias, quoted=quoted)
+                if col != column_alias_or_name
+                else computed_column
+                for col in parent_schema.names
+            )
+        else:
+            selcols.append(
+                sge.Column(
+                    this=STAR, table=sg.to_identifier(parent_alias, quoted=quoted)
+                )
+            )
+            selcols.append(computed_column)
+
+        if offset is not None:
+            offset_name = offset
+            offset = sg.to_identifier(offset_name, quoted=quoted)
+            selcols.append((offset - 1).as_(offset_name, quoted=quoted))
+
+        unnest = sge.Unnest(
+            expressions=[column],
+            alias=sge.TableAlias(
+                this=sg.to_identifier(gen_name("table_unnest"), quoted=quoted),
+                columns=[column_alias],
+            ),
+            offset=offset,
+        )
+
+        return (
+            sg.select(*selcols)
+            .from_(parent)
+            .join(
+                unnest,
+                on=None if not keep_empty else sge.convert(True),
+                join_type="CROSS" if not keep_empty else "LEFT",
+            )
+        )
diff --git a/ibis/backends/pyspark/compiler.py b/ibis/backends/pyspark/compiler.py
index a28faedde83c..6cbd7f0796a9 100644
--- a/ibis/backends/pyspark/compiler.py
+++ b/ibis/backends/pyspark/compiler.py
@@ -452,3 +452,68 @@ def visit_HexDigest(self, op, *, arg, how):
             return self.f.sha2(arg, int(how[-3:]))
         else:
             raise NotImplementedError(f"No available hashing function for {how}")
+
+    def visit_TableUnnest(
+        self, op, *, parent, column, offset: str | None, keep_empty: bool
+    ):
+        quoted = self.quoted
+
+        column_alias = sg.to_identifier(gen_name("table_unnest_column"), quoted=quoted)
+
+        opname = op.column.name
+        parent_schema = op.parent.schema
+        overlaps_with_parent = opname in parent_schema
+        computed_column = column_alias.as_(opname, quoted=quoted)
+
+        parent_alias = parent.alias_or_name
+
+        selcols = []
+
+        if overlaps_with_parent:
+            column_alias_or_name = column.alias_or_name
+            selcols.extend(
+                sg.column(col, table=parent_alias, quoted=quoted)
+                if col != column_alias_or_name
+                else computed_column
+                for col in parent_schema.names
+            )
+        else:
+            selcols.append(
+                sge.Column(
+                    this=STAR, table=sg.to_identifier(parent_alias, quoted=quoted)
+                )
+            )
+            selcols.append(computed_column)
+
+        alias_columns = []
+
+        if offset is not None:
+            offset = sg.column(offset, quoted=quoted)
+            selcols.append(offset)
+            alias_columns.append(offset)
+
+        alias_columns.append(column_alias)
+
+        # four possible functions
+        #
+        # explode: unnest
+        # explode_outer: unnest preserving empties and nulls
+        # posexplode: unnest with index
+        # posexplode_outer: unnest with index preserving empties and nulls
+        funcname = (
+            ("pos" if offset is not None else "")
+            + "explode"
+            + ("_outer" if keep_empty else "")
+        )
+
+        return (
+            sg.select(*selcols)
+            .from_(parent)
+            .lateral(
+                sge.Lateral(
+                    this=self.f[funcname](column),
+                    view=True,
+                    alias=sge.TableAlias(columns=alias_columns),
+                )
+            )
+        )
diff --git a/ibis/backends/snowflake/compiler.py b/ibis/backends/snowflake/compiler.py
index aa2faca6a7e9..927a5922f032 100644
--- a/ibis/backends/snowflake/compiler.py
+++ b/ibis/backends/snowflake/compiler.py
@@ -662,3 +662,70 @@ def visit_DropColumns(self, op, *, parent, columns_to_drop):
         table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
         column = sge.Column(this=star, table=table)
         return sg.select(column).from_(parent)
+
+    def visit_TableUnnest(
+        self, op, *, parent, column, offset: str | None, keep_empty: bool
+    ):
+        quoted = self.quoted
+
+        column_alias = sg.to_identifier(
+            util.gen_name("table_unnest_column"), quoted=quoted
+        )
+
+        sep = sge.convert(util.guid())
+        null_sentinel = sge.convert(util.guid())
+
+        table = sg.to_identifier(parent.alias_or_name, quoted=quoted)
+
+        selcols = []
+
+        opcol = op.column
+        opname = opcol.name
+        overlaps_with_parent = opname in op.parent.schema
+        computed_column = self.cast(
+            self.f.nullif(column_alias, null_sentinel), opcol.dtype.value_type
+        ).as_(opname, quoted=quoted)
+
+        if overlaps_with_parent:
+            selcols.append(
+                sge.Column(this=sge.Star(replace=[computed_column]), table=table)
+            )
+        else:
+            selcols.append(sge.Column(this=STAR, table=table))
+            selcols.append(computed_column)
+
+        if offset is not None:
+            offset = sg.to_identifier(offset, quoted=quoted)
+            selcols.append(offset)
+
+        alias = sge.TableAlias(
+            this=sg.to_identifier(util.gen_name("table_unnest"), quoted=quoted),
+            columns=[column_alias],
+        )
+
+        # there has to be a better way
+        param = sg.to_identifier(util.gen_name("table_unnest_param"))
+        column = self.f.transform(
+            column,
+            sge.Lambda(
+                this=self.f.coalesce(self.cast(param, dt.string), null_sentinel),
+                expressions=[param],
+            ),
+        )
+        empty_array = self.f.array()
+        split = self.f.coalesce(
+            self.f.nullif(
+                self.f.split(
+                    self.f.array_to_string(self.f.nullif(column, empty_array), sep), sep
+                ),
+                empty_array,
+            ),
+            self.f.array(null_sentinel),
+        )
+
+        unnest = sge.Unnest(expressions=[split], alias=alias, offset=offset)
+        return (
+            sg.select(*selcols)
+            .from_(parent)
+            .join(unnest, join_type="CROSS" if not keep_empty else "LEFT")
+        )
diff --git a/ibis/backends/snowflake/tests/test_client.py b/ibis/backends/snowflake/tests/test_client.py
index a904b1054197..e50b4b716f31 100644
--- a/ibis/backends/snowflake/tests/test_client.py
+++ b/ibis/backends/snowflake/tests/test_client.py
@@ -2,6 +2,7 @@
 
 import json
 import os
+from collections import Counter
 
 import pandas as pd
 import pandas.testing as tm
@@ -429,3 +430,11 @@ def test_connect_without_snowflake_url():
     )
 
     assert nonurlcon.list_tables()
+
+
+def test_table_unnest_with_empty_strings(con):
+    t = ibis.memtable({"x": [["", ""], [""], [], None]})
+    expected = Counter(["", "", "", None, None])
+    expr = t.unnest(t.x)["x"]
+    result = con.execute(expr)
+    assert Counter(result.values) == expected
diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py
index 8b55c189e9d6..1d3633a0e4ea 100644
--- a/ibis/backends/tests/test_array.py
+++ b/ibis/backends/tests/test_array.py
@@ -1384,7 +1384,104 @@ def test_zip_unnest_lift(con):
     t = ibis.memtable(data)
     zipped = t.mutate(zipped=t.array1.zip(t.array2))
     unnested = zipped.mutate(unnest=zipped.zipped.unnest())
-    lifted = unnested.unnest.lift()
+    lifted = unnested["unnest"].lift()
     result = con.execute(lifted)
     expected = pd.DataFrame({"f1": [1, 2, 3], "f2": [4, 5, 6]})
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.notimpl(
+    ["datafusion", "pandas", "polars", "dask", "flink"],
+    raises=com.OperationNotDefinedError,
+)
+@pytest.mark.parametrize(
+    "colspec",
+    ["y", lambda t: t.y, ibis._.y],
+    ids=["string", "lambda", "deferred"],
+)
+def test_table_unnest(backend, colspec):
+    t = backend.array_types
+    expr = t.unnest(colspec)
+    result = expr.execute()
+    assert set(result["y"].values) == set(t[["y"]].execute().explode("y")["y"].values)
+
+
+@pytest.mark.notimpl(
+    ["datafusion", "pandas", "polars", "dask", "flink"],
+    raises=com.OperationNotDefinedError,
+)
+def test_table_unnest_with_offset(backend):
+    t = backend.array_types
+    col = "y"
+    df = (
+        t[[col]]
+        .execute()
+        .assign(idx=lambda df: df[col].map(lambda v: list(range(len(v)))))[[col, "idx"]]
+        .explode("idx")
+        .assign(idx=lambda df: df["idx"].astype("int64"))
+    )
+    idx = iter(df.idx.values)
+    expected = (
+        df.assign(**{col: df[col].map(lambda v: v[next(idx)])})
+        .sort_values(["idx", col])
+        .reset_index(drop=True)[["idx", col]]
+    )
+
+    expr = t.unnest(col, offset="idx")[["idx", col]].order_by("idx", col)
+    result = expr.execute()
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.notimpl(
+    ["datafusion", "pandas", "polars", "dask", "flink"],
+    raises=com.OperationNotDefinedError,
+)
+def test_table_unnest_with_keep_empty(con):
+    t = ibis.memtable(pd.DataFrame({"y": [[], None, ["a"]]}))
+    expr = t.unnest("y", keep_empty=True)["y"]
+    result = con.execute(expr)
+    assert Counter(result.values) == Counter(["a", None, None])
+
+
+@pytest.mark.notimpl(
+    ["datafusion", "pandas", "polars", "dask", "flink"],
+    raises=com.OperationNotDefinedError,
+)
+@pytest.mark.notyet(
+    ["risingwave"], raises=PsycoPg2InternalError, reason="not supported in risingwave"
+)
+def test_table_unnest_column_expr(backend):
+    t = backend.array_types
+    expr = t.unnest(t.y.map(lambda v: v.cast("str") + "'s").name("plural"))
+    result = expr.execute()["plural"]
+    expected = t["y"].execute().explode("y") + "'s"
+    assert set(result.values) == set(expected.replace({np.nan: None}).values)
+
+
+@pytest.mark.notimpl(
+    ["datafusion", "pandas", "polars", "dask", "flink"],
+    raises=com.OperationNotDefinedError,
+)
+@pytest.mark.notimpl(["trino"], raises=TrinoUserError)
+@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
+@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError)
+@pytest.mark.notyet(
+    ["risingwave"], raises=PsycoPg2InternalError, reason="not supported in risingwave"
+)
+def test_table_unnest_array_of_struct_of_array(con):
+    t = ibis.memtable(
+        {
+            "a": [
+                [{"x": [1, 2, 3]}, {"x": [1, 2]}],
+                [],
+                None,
+                [{"x": [3, 1, 2, 3]}],
+            ]
+        },
+        schema={"a": "array<struct<x: array<int64>>>"},
+    )
+    # two different unnests
+    expr = t.unnest("a").a.x.unnest().name("x").as_table().order_by("x")
+    result = con.execute(expr)
+    expected = pd.DataFrame({"x": [1, 1, 1, 2, 2, 2, 3, 3, 3]})
+    tm.assert_frame_equal(result, expected)
diff --git a/ibis/backends/trino/compiler.py b/ibis/backends/trino/compiler.py
index 18bbfc913c98..f98aa48b4ab3 100644
--- a/ibis/backends/trino/compiler.py
+++ b/ibis/backends/trino/compiler.py
@@ -17,6 +17,7 @@
     exclude_nulls_from_array_collect,
     exclude_unsupported_window_frame_from_ops,
 )
+from ibis.util import gen_name
 
 
 class TrinoCompiler(SQLGlotCompiler):
@@ -511,3 +512,59 @@ def visit_ToJSONArray(self, op, *, arg):
             ),
             dt.Array(dt.json),
         )
+
+    def visit_TableUnnest(
+        self, op, *, parent, column, offset: str | None, keep_empty: bool
+    ):
+        quoted = self.quoted
+
+        column_alias = sg.to_identifier(gen_name("table_unnest_column"), quoted=quoted)
+
+        opname = op.column.name
+        parent_schema = op.parent.schema
+        overlaps_with_parent = opname in parent_schema
+        computed_column = column_alias.as_(opname, quoted=quoted)
+
+        parent_alias_or_name = parent.alias_or_name
+
+        selcols = []
+
+        if overlaps_with_parent:
+            column_alias_or_name = column.alias_or_name
+            selcols.extend(
+                sg.column(col, table=parent_alias_or_name, quoted=quoted)
+                if col != column_alias_or_name
+                else computed_column
+                for col in parent_schema.names
+            )
+        else:
+            selcols.append(
+                sge.Column(
+                    this=STAR,
+                    table=sg.to_identifier(parent_alias_or_name, quoted=quoted),
+                )
+            )
+            selcols.append(computed_column)
+
+        if offset is not None:
+            offset_name = offset
+            offset = sg.to_identifier(offset_name, quoted=quoted)
+            selcols.append((offset - 1).as_(offset_name, quoted=quoted))
+
+        unnest = sge.Unnest(
+            expressions=[column],
+            alias=sge.TableAlias(
+                this=sg.to_identifier(gen_name("table_unnest"), quoted=quoted),
+                columns=[column_alias],
+            ),
+            offset=offset,
+        )
+        return (
+            sg.select(*selcols)
+            .from_(parent)
+            .join(
+                unnest,
+                on=None if not keep_empty else sge.convert(True),
+                join_type="CROSS" if not keep_empty else "LEFT",
+            )
+        )
diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py
index 0e26700b3967..5402ad698df1 100644
--- a/ibis/expr/operations/relations.py
+++ b/ibis/expr/operations/relations.py
@@ -488,4 +488,27 @@ class Distinct(Simple):
     """Compute the distinct rows of a table."""
 
 
+@public
+class TableUnnest(Simple):
+    """Cross join unnest operation."""
+
+    column: Value[dt.Array]
+    offset: typing.Union[str, None]
+    keep_empty: bool
+
+    @attribute
+    def schema(self):
+        column = self.column
+        offset = self.offset
+
+        base = self.parent.schema.fields.copy()
+
+        base[column.name] = column.dtype.value_type
+
+        if offset is not None:
+            base[offset] = dt.int64
+
+        return Schema(base)
+
+
 # TODO(kszucs): support t.select(*t) syntax by implementing Table.__iter__()
diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py
index 2d9e5a8f5b3a..2053bc47d87d 100644
--- a/ibis/expr/types/arrays.py
+++ b/ibis/expr/types/arrays.py
@@ -286,12 +286,23 @@ def repeat(self, n: int | ir.IntegerValue) -> ArrayValue:
     __mul__ = __rmul__ = repeat
 
     def unnest(self) -> ir.Value:
-        """Flatten an array into a column.
+        """Unnest an array into a column.
 
         ::: {.callout-note}
-        ## Rows with empty arrays are dropped in the output.
+        ## Empty arrays and `NULL`s are dropped in the output.
+        To preserve empty arrays as `NULL`s as well as existing `NULL` values,
+        use [`Table.unnest`](./expression-tables.qmd#ibis.expr.types.relations.Table.unnest).
         :::
 
+        Returns
+        -------
+        ir.Value
+            Unnested array
+
+        See Also
+        --------
+        [`Table.unnest`](./expression-tables.qmd#ibis.expr.types.relations.Table.unnest)
+
         Examples
         --------
         >>> import ibis
@@ -318,11 +329,6 @@ def unnest(self) -> ir.Value:
         │     3 │
         │     3 │
         └───────┘
-
-        Returns
-        -------
-        ir.Value
-            Unnested array
         """
         expr = ops.Unnest(self).to_expr()
         try:
diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py
index 4d80735888e8..b25dd193f365 100644
--- a/ibis/expr/types/relations.py
+++ b/ibis/expr/types/relations.py
@@ -4681,6 +4681,149 @@ def value_counts(self) -> ir.Table:
             lambda t: t.count().name("_".join(columns) + "_count")
         )
 
+    def unnest(
+        self, column, offset: str | None = None, keep_empty: bool = False
+    ) -> Table:
+        """Unnest an array `column` from a table.
+
+        When unnesting an existing column the newly unnested column replaces
+        the existing column.
+
+        Parameters
+        ----------
+        column
+            Array column to unnest.
+        offset
+            Name of the resulting index column.
+        keep_empty
+            Keep empty array values as `NULL` in the output table, as well as
+            existing `NULL` values.
+
+        Returns
+        -------
+        Table
+            Table with the array column `column` unnested.
+
+        See Also
+        --------
+        [`ArrayValue.unnest`](./expression-collections.qmd#ibis.expr.types.arrays.ArrayValue.unnest)
+
+        Examples
+        --------
+        >>> import ibis
+        >>> from ibis import _
+        >>> ibis.options.interactive = True
+
+        Construct a table expression with an array column.
+
+        >>> t = ibis.memtable({"x": [[1, 2], [], None, [3, 4, 5]], "y": [1, 2, 3, 4]})
+        >>> t
+        ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
+        ┃ x                    ┃ y     ┃
+        ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩
+        │ array<int64>         │ int64 │
+        ├──────────────────────┼───────┤
+        │ [1, 2]               │     1 │
+        │ []                   │     2 │
+        │ NULL                 │     3 │
+        │ [3, 4, ... +1]       │     4 │
+        └──────────────────────┴───────┘
+
+        Unnest the array column `x`, replacing the **existing** `x` column.
+
+        >>> t.unnest("x")
+        ┏━━━━━━━┳━━━━━━━┓
+        ┃ x     ┃ y     ┃
+        ┡━━━━━━━╇━━━━━━━┩
+        │ int64 │ int64 │
+        ├───────┼───────┤
+        │     1 │     1 │
+        │     2 │     1 │
+        │     3 │     4 │
+        │     4 │     4 │
+        │     5 │     4 │
+        └───────┴───────┘
+
+        Unnest the array column `x` with an offset. The `offset` parameter is
+        the name of the resulting index column.
+
+        >>> t.unnest(t.x, offset="idx")
+        ┏━━━━━━━┳━━━━━━━┳━━━━━━━┓
+        ┃ x     ┃ y     ┃ idx   ┃
+        ┡━━━━━━━╇━━━━━━━╇━━━━━━━┩
+        │ int64 │ int64 │ int64 │
+        ├───────┼───────┼───────┤
+        │     1 │     1 │     0 │
+        │     2 │     1 │     1 │
+        │     3 │     4 │     0 │
+        │     4 │     4 │     1 │
+        │     5 │     4 │     2 │
+        └───────┴───────┴───────┘
+
+        Unnest the array column `x` keep empty array values as `NULL` in the
+        output table.
+
+        >>> t.unnest(_.x, offset="idx", keep_empty=True)
+        ┏━━━━━━━┳━━━━━━━┳━━━━━━━┓
+        ┃ x     ┃ y     ┃ idx   ┃
+        ┡━━━━━━━╇━━━━━━━╇━━━━━━━┩
+        │ int64 │ int64 │ int64 │
+        ├───────┼───────┼───────┤
+        │     1 │     1 │     0 │
+        │     2 │     1 │     1 │
+        │     3 │     4 │     0 │
+        │     4 │     4 │     1 │
+        │     5 │     4 │     2 │
+        │  NULL │     2 │  NULL │
+        │  NULL │     3 │  NULL │
+        └───────┴───────┴───────┘
+
+        If you need to preserve the row order of the preserved empty arrays or
+        null values use
+        [`row_number`](./expression-tables.qmd#ibis.row_number) to
+        create an index column before calling `unnest`.
+
+        >>> (
+        ...     t.mutate(original_row=ibis.row_number())
+        ...     .unnest("x", offset="idx", keep_empty=True)
+        ...     .relocate("original_row")
+        ...     .order_by("original_row")
+        ... )
+        ┏━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━┓
+        ┃ original_row ┃ x     ┃ y     ┃ idx   ┃
+        ┡━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━┩
+        │ int64        │ int64 │ int64 │ int64 │
+        ├──────────────┼───────┼───────┼───────┤
+        │            0 │     1 │     1 │     0 │
+        │            0 │     2 │     1 │     1 │
+        │            1 │  NULL │     2 │  NULL │
+        │            2 │  NULL │     3 │  NULL │
+        │            3 │     3 │     4 │     0 │
+        │            3 │     4 │     4 │     1 │
+        │            3 │     5 │     4 │     2 │
+        └──────────────┴───────┴───────┴───────┘
+
+        You can also unnest more complex expressions, and the resulting column
+        will be projected as the last expression in the result.
+
+        >>> t.unnest(_.x.map(lambda v: v + 1).name("plus_one"))
+        ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┓
+        ┃ x                    ┃ y     ┃ plus_one ┃
+        ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━┩
+        │ array<int64>         │ int64 │ int64    │
+        ├──────────────────────┼───────┼──────────┤
+        │ [1, 2]               │     1 │        2 │
+        │ [1, 2]               │     1 │        3 │
+        │ [3, 4, ... +1]       │     4 │        4 │
+        │ [3, 4, ... +1]       │     4 │        5 │
+        │ [3, 4, ... +1]       │     4 │        6 │
+        └──────────────────────┴───────┴──────────┘
+        """
+        (column,) = self.bind(column)
+        return ops.TableUnnest(
+            parent=self, column=column, offset=offset, keep_empty=keep_empty
+        ).to_expr()
+
 
 @public
 class CachedTable(Table):