From 22de674afb987b031cfb9574b2833170e341f752 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 5 Feb 2024 23:21:39 +0100
Subject: [PATCH] fix(api): restore and deprecate `ir.Table.to_array()` (#8227)

Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
---
 .github/workflows/ibis-docs-main.yml        |  1 -
 .github/workflows/ibis-docs-pr.yml          |  1 +
 docs/posts/ibis-duckdb-geospatial/index.qmd | 13 ++++---
 docs/tutorials/ibis-for-sql-users.qmd       |  2 +-
 gen_matrix.py                               |  8 +----
 ibis/backends/flink/__init__.py             |  4 +--
 ibis/expr/operations/relations.py           | 10 +-----
 ibis/expr/tests/test_newrels.py             | 16 ++++++---
 ibis/expr/types/__init__.py                 |  2 ++
 ibis/expr/types/core.py                     |  2 +-
 ibis/expr/types/joins.py                    | 40 ++++++++++++---------
 ibis/expr/types/logical.py                  |  2 +-
 ibis/expr/types/relations.py                | 24 +++++++++++--
 ibis/expr/types/temporal_windows.py         |  3 --
 14 files changed, 74 insertions(+), 54 deletions(-)

diff --git a/.github/workflows/ibis-docs-main.yml b/.github/workflows/ibis-docs-main.yml
index 57277bb0de87..6478f0b857d2 100644
--- a/.github/workflows/ibis-docs-main.yml
+++ b/.github/workflows/ibis-docs-main.yml
@@ -4,7 +4,6 @@ on:
   push:
     branches:
       - main
-      - "*.x.x"
   merge_group:
 
 # only a single docs job that pushes to `main` can run at any given time
diff --git a/.github/workflows/ibis-docs-pr.yml b/.github/workflows/ibis-docs-pr.yml
index 9a6e5cca50c8..c45bdb709ba2 100644
--- a/.github/workflows/ibis-docs-pr.yml
+++ b/.github/workflows/ibis-docs-pr.yml
@@ -5,6 +5,7 @@ on:
     branches:
       - main
       - "*.x.x"
+      - "the-epic-split"
   merge_group:
 
 concurrency:
diff --git a/docs/posts/ibis-duckdb-geospatial/index.qmd b/docs/posts/ibis-duckdb-geospatial/index.qmd
index 575b25fdbd57..b78f7764056a 100644
--- a/docs/posts/ibis-duckdb-geospatial/index.qmd
+++ b/docs/posts/ibis-duckdb-geospatial/index.qmd
@@ -117,7 +117,7 @@ boroughs
 ```
 
 ```{python}
-boroughs.filter(_.geom.intersects(broad_station.geom))
+boroughs.filter(boroughs.geom.intersects(broad_station.select(broad_station.geom).to_array()))
 ```
 
 ### `d_within` (ST_DWithin)
@@ -133,10 +133,15 @@ streets
 Using the deferred API, we can check which streets are within `d=10` meters of distance.
 
 ```{python}
-sts_near_broad = streets.filter(_.geom.d_within(broad_station.geom, 10))
+sts_near_broad = streets.filter(_.geom.d_within(broad_station.select(_.geom).to_array(), 10))
 sts_near_broad
 ```
 
+::: {.callout-note}
+In the previous query, `streets` and `broad_station` are different tables. We use [`to_array()`](../../reference/expression-tables.qmd#ibis.expr.types.relations.Table.to_array) to generate a
+scalar subquery from a table with a single column (whose shape is scalar).
+:::
+
 To visualize the findings, we will convert the tables to GeoPandas DataFrames.
 
 ```{python}
@@ -196,7 +201,7 @@ To find if there were any homicides in that area, we can find where the polygon
 200 meters buffer to our "Broad St" station point intersects with the geometry column in our homicides table.
 
 ```{python}
-h_near_broad = homicides.filter(_.geom.intersects(broad_station.geom.buffer(200)))
+h_near_broad = homicides.filter(_.geom.intersects(broad_station.select(_.geom.buffer(200)).to_array()))
 h_near_broad
 ```
 
@@ -205,7 +210,7 @@ data we can't tell the street near which it happened. However, we can check if t
 distance of a street.
 
 ```{python}
-h_street = streets.filter(_.geom.d_within(h_near_broad.geom, 2))
+h_street = streets.filter(_.geom.d_within(h_near_broad.select(_.geom).to_array(), 2))
 h_street
 ```
 
diff --git a/docs/tutorials/ibis-for-sql-users.qmd b/docs/tutorials/ibis-for-sql-users.qmd
index 59a01d889b8b..9fd863a3edd0 100644
--- a/docs/tutorials/ibis-for-sql-users.qmd
+++ b/docs/tutorials/ibis-for-sql-users.qmd
@@ -807,7 +807,7 @@ ibis.to_sql(expr)
 You can mix the overlapping key names with other expressions:
 
 ```{python}
-joined = t4.join(t5, ["key1", "key2", t4.key3.left(4) == t4.key3.left(4)])
+joined = t4.join(t5, ["key1", "key2", t4.key3.left(4) == t5.key3.left(4)])
 expr = joined[t4, t5.value2]
 ibis.to_sql(expr)
 ```
diff --git a/gen_matrix.py b/gen_matrix.py
index e6c21381dd7e..9f9745cb7239 100644
--- a/gen_matrix.py
+++ b/gen_matrix.py
@@ -26,13 +26,7 @@ def get_leaf_classes(op):
 
 
 def main():
-    internal_ops = {
-        # Never translates into anything
-        ops.UnresolvedExistsSubquery,
-        ops.ScalarParameter,
-    }
-
-    public_ops = frozenset(get_leaf_classes(ops.Value)) - internal_ops
+    public_ops = frozenset(get_leaf_classes(ops.Value))
     support = {"operation": [f"{op.__module__}.{op.__name__}" for op in public_ops]}
     support.update(
         (name, list(map(backend.has_operation, public_ops)))
diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py
index 52febaf80e3a..2ae904f505ba 100644
--- a/ibis/backends/flink/__init__.py
+++ b/ibis/backends/flink/__init__.py
@@ -11,7 +11,7 @@
 import ibis.expr.operations as ops
 import ibis.expr.schema as sch
 import ibis.expr.types as ir
-from ibis.backends.base import BaseBackend, CanCreateDatabase
+from ibis.backends.base import BaseBackend, CanCreateDatabase, NoUrl
 from ibis.backends.base.sql.ddl import fully_qualified_re, is_fully_qualified
 from ibis.backends.flink.compiler.core import FlinkCompiler
 from ibis.backends.flink.ddl import (
@@ -38,7 +38,7 @@
     from ibis.api import Watermark
 
 
-class Backend(BaseBackend, CanCreateDatabase):
+class Backend(BaseBackend, CanCreateDatabase, NoUrl):
     name = "flink"
     compiler = FlinkCompiler
     supports_temporary_tables = True
diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py
index b4eaff5e428e..cf316217170f 100644
--- a/ibis/expr/operations/relations.py
+++ b/ibis/expr/operations/relations.py
@@ -127,15 +127,7 @@ def dtype(self):
 
 @public
 class ScalarSubquery(Subquery):
-    def __init__(self, rel):
-        from ibis.expr.operations import Reduction
-
-        super().__init__(rel=rel)
-        if not isinstance(self.value, Reduction):
-            raise IntegrityError(
-                f"Subquery {self.value!r} is not a reduction, only "
-                "reductions can be used as scalar subqueries"
-            )
+    shape = ds.scalar
 
 
 @public
diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py
index f50b72926dc9..673432550446 100644
--- a/ibis/expr/tests/test_newrels.py
+++ b/ibis/expr/tests/test_newrels.py
@@ -147,11 +147,6 @@ def test_subquery_integrity_check():
     with pytest.raises(IntegrityError, match=msg):
         ops.ScalarSubquery(t)
 
-    agg = t.agg(t.a.sum() + 1)
-    msg = "is not a reduction"
-    with pytest.raises(IntegrityError, match=msg):
-        ops.ScalarSubquery(agg)
-
 
 def test_select_turns_scalar_reduction_into_subquery():
     arr = ibis.literal([1, 2, 3])
@@ -180,6 +175,17 @@ def test_select_turns_value_with_multiple_parents_into_subquery():
     assert t1.op() == expected
 
 
+def test_value_to_array_creates_subquery():
+    rel = t.int_col.sum().as_table()
+    with pytest.warns(FutureWarning, match="implicit"):
+        expr = rel.to_array()
+
+    op = expr.op()
+    assert op.shape.is_scalar()
+    assert op.dtype.is_int64()
+    assert isinstance(op, ops.ScalarSubquery)
+
+
 def test_mutate():
     proj = t.select(t, other=t.int_col + 1)
     expected = Project(
diff --git a/ibis/expr/types/__init__.py b/ibis/expr/types/__init__.py
index 99bd54d2f6e4..9105bc911615 100644
--- a/ibis/expr/types/__init__.py
+++ b/ibis/expr/types/__init__.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+# ruff: noqa: I001
+
 from ibis.expr.types.arrays import *  # noqa: F403
 from ibis.expr.types.binary import *  # noqa: F403
 from ibis.expr.types.collections import *  # noqa: F403
diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py
index f42e60e47347..5d0b447521b1 100644
--- a/ibis/expr/types/core.py
+++ b/ibis/expr/types/core.py
@@ -85,7 +85,7 @@ def __repr__(self) -> str:
             except TranslationError as e:
                 lines = [
                     "Translation to backend failed",
-                    f"Error message: {repr(e)}",
+                    f"Error message: {e!r}",
                     "Expression repr follows:",
                     self._repr(),
                 ]
diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py
index ab6c587b8a33..6e3021780507 100644
--- a/ibis/expr/types/joins.py
+++ b/ibis/expr/types/joins.py
@@ -1,31 +1,36 @@
 from __future__ import annotations
 
 import functools
+from typing import TYPE_CHECKING, Any
+
 from public import public
-from typing import Any, Optional, TYPE_CHECKING
-from collections.abc import Iterator, Mapping
 
 import ibis
 import ibis.expr.operations as ops
-
 from ibis import util
-from ibis.expr.types import Table, Value
 from ibis.common.deferred import Deferred
+from ibis.common.egraph import DisjointSet
+from ibis.common.exceptions import (
+    ExpressionError,
+    IbisInputError,
+    InputTypeError,
+    IntegrityError,
+)
 from ibis.expr.analysis import flatten_predicates
-from ibis.common.exceptions import ExpressionError, IntegrityError
+from ibis.expr.rewrites import peel_join_field
+from ibis.expr.types.generic import Value
 from ibis.expr.types.relations import (
+    Table,
     bind,
-    dereference_values,
     dereference_mapping,
     unwrap_aliases,
 )
-from ibis.expr.operations.relations import JoinKind
-from ibis.expr.rewrites import peel_join_field
-from ibis.common.egraph import DisjointSet
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
+    from ibis.expr.operations.relations import JoinKind
+
 
 def disambiguate_fields(
     how,
@@ -36,9 +41,7 @@ def disambiguate_fields(
     left_template,
     right_template,
 ):
-    """
-    Resolve name collisions between the left and right tables.
-    """
+    """Resolve name collisions between the left and right tables."""
     collisions = set()
     left_template = left_template or "{name}"
     right_template = right_template or "{name}"
@@ -190,6 +193,9 @@ def prepare_predicates(
         The right table
     predicates
         Predicates to bind and dereference, see the possible values above
+    comparison
+        The comparison operation to construct if the input is a pair of
+        expression-like objects
     """
     deref_left = dereference_mapping_left(left)
     deref_right = dereference_mapping_right(right)
@@ -266,7 +272,7 @@ def _finish(self) -> Table:
         return Table(self.op())
 
     @functools.wraps(Table.join)
-    def join(  # noqa: D102
+    def join(
         self,
         right,
         predicates: Any,
@@ -275,8 +281,8 @@ def join(  # noqa: D102
         lname: str = "",
         rname: str = "{name}_right",
     ):
-        import pyarrow as pa
         import pandas as pd
+        import pyarrow as pa
 
         # TODO(kszucs): factor out to a helper function
         if isinstance(right, (pd.DataFrame, pa.Table)):
@@ -324,7 +330,7 @@ def join(  # noqa: D102
         return self.__class__(left, collisions=collisions, equalities=equalities)
 
     @functools.wraps(Table.asof_join)
-    def asof_join(  # noqa: D102
+    def asof_join(
         self: Table,
         right: Table,
         on,
@@ -403,7 +409,7 @@ def asof_join(  # noqa: D102
         return self.__class__(left, collisions=collisions, equalities=equalities)
 
     @functools.wraps(Table.cross_join)
-    def cross_join(  # noqa: D102
+    def cross_join(
         self: Table,
         right: Table,
         *rest: Table,
@@ -418,7 +424,7 @@ def cross_join(  # noqa: D102
         return left
 
     @functools.wraps(Table.select)
-    def select(self, *args, **kwargs):  # noqa: D102
+    def select(self, *args, **kwargs):
         chain = self.op()
         values = bind(self, (args, kwargs))
         values = unwrap_aliases(values)
diff --git a/ibis/expr/types/logical.py b/ibis/expr/types/logical.py
index 09927223f2ac..01483bd241cc 100644
--- a/ibis/expr/types/logical.py
+++ b/ibis/expr/types/logical.py
@@ -306,7 +306,7 @@ def any(self, where: BooleanValue | None = None) -> BooleanValue:
         >>> (t.arr == None).any(where=t.arr != None)
         False
         """
-        from ibis.common.deferred import Call, _, Deferred
+        from ibis.common.deferred import Call, Deferred, _
 
         parents = self.op().relations
 
diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py
index 951f72e5b64a..4521de905d8f 100644
--- a/ibis/expr/types/relations.py
+++ b/ibis/expr/types/relations.py
@@ -25,6 +25,7 @@
 from ibis.expr.types.core import Expr, _FixedTextJupyterMixin
 from ibis.expr.types.generic import ValueExpr, literal
 from ibis.selectors import Selector
+from ibis.util import deprecated
 
 if TYPE_CHECKING:
     import pandas as pd
@@ -1127,9 +1128,9 @@ def aggregate(
         metrics = unwrap_aliases(metrics)
         having = unwrap_aliases(having)
 
-        groups = dereference_values(self.op(), groups)
-        metrics = dereference_values(self.op(), metrics)
-        having = dereference_values(self.op(), having)
+        groups = dereference_values(node, groups)
+        metrics = dereference_values(node, metrics)
+        having = dereference_values(node, having)
 
         # the user doesn't need to specify the metrics used in the having clause
         # explicitly, we implicitly add them to the metrics list by looking for
@@ -1804,6 +1805,23 @@ def intersect(self, table: Table, *rest: Table, distinct: bool = True) -> Table:
             node = ops.Intersection(node, table, distinct=distinct)
         return node.to_expr().select(self.columns)
 
+    @deprecated(as_of="9.0", instead="conversion to scalar subquery is implicit")
+    def to_array(self) -> ir.Column:
+        """View a single column table as an array.
+
+        Returns
+        -------
+        Value
+            A single column view of a table
+        """
+        schema = self.schema()
+        if len(schema) != 1:
+            raise com.ExpressionError(
+                "Table must have exactly one column when viewed as array"
+            )
+
+        return ops.ScalarSubquery(self).to_expr()
+
     def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Table:
         """Add columns to a table expression.
 
diff --git a/ibis/expr/types/temporal_windows.py b/ibis/expr/types/temporal_windows.py
index 865a9922e6a2..74560d0f36a2 100644
--- a/ibis/expr/types/temporal_windows.py
+++ b/ibis/expr/types/temporal_windows.py
@@ -5,11 +5,8 @@
 from public import public
 
 import ibis.common.exceptions as com
-import ibis.expr.analysis as an
 import ibis.expr.operations as ops
 import ibis.expr.types as ir
-from ibis.common.deferred import Deferred
-from ibis.selectors import Selector
 from ibis.expr.types.relations import bind
 
 if TYPE_CHECKING: