refactor(aliasing): remove the need for renaming after execution

ibis-project · Sep 3, 2024 · 30faa25 · 30faa25
1 parent 281f9d3
commit 30faa25
Show file tree

Hide file tree

Showing 10 changed files with 48 additions and 38 deletions.
diff --git a/ibis/backends/polars/__init__.py b/ibis/backends/polars/__init__.py
@@ -519,15 +519,12 @@ def _to_pyarrow_table(
         streaming: bool = False,
         **kwargs: Any,
     ):
+        from ibis.formats.pyarrow import PyArrowData
+
         df = self._to_dataframe(
             expr, params=params, limit=limit, streaming=streaming, **kwargs
         )
-        table = df.to_arrow()
-        if isinstance(expr, (ir.Table, ir.Value)):
-            schema = expr.as_table().schema().to_pyarrow()
-            return table.rename_columns(schema.names).cast(schema)
-        else:
-            raise com.IbisError(f"Cannot execute expression of type: {type(expr)}")
+        return PyArrowData.convert_table(df.to_arrow(), expr.as_table().schema())
 
     def to_pyarrow(
         self,

diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py
@@ -59,7 +59,7 @@ def table(op, **_):
 
 @translate.register(ops.DummyTable)
 def dummy_table(op, **kw):
-    selections = [translate(arg, **kw) for name, arg in op.values.items()]
+    selections = [translate(arg, **kw).alias(name) for name, arg in op.values.items()]
     return pl.DataFrame().lazy().select(selections)
 
 
@@ -68,12 +68,6 @@ def in_memory_table(op, **_):
     return op.data.to_polars(op.schema).lazy()
 
 
-@translate.register(ops.Alias)
-def alias(op, **kw):
-    arg = translate(op.arg, **kw)
-    return arg.alias(op.name)
-
-
 def _make_duration(value, dtype):
     kwargs = {f"{dtype.resolution}s": value}
     return pl.duration(**kwargs)

diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py
@@ -2502,3 +2502,11 @@ def test_simple_pivot_wider(con, backend, monkeypatch):
     result = expr.to_pandas()
     expected = pd.DataFrame({"no": [4], "yes": [3]})
     backend.assert_frame_equal(result, expected)
+
+
+def test_named_literal(con, backend):
+    lit = ibis.literal(1, type="int64").name("one")
+    expr = lit.as_table()
+    result = con.to_pandas(expr)
+    expected = pd.DataFrame({"one": [1]})
+    backend.assert_frame_equal(result, expected)
diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py
@@ -456,7 +456,7 @@ class SQLStringView(Relation):
 class DummyTable(Relation):
     """A table constructed from literal values."""
 
-    values: FrozenOrderedDict[str, Value]
+    values: FrozenOrderedDict[str, Annotated[Value, ~InstanceOf(Alias)]]
 
     @attribute
     def schema(self):

diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py
@@ -1342,10 +1342,13 @@ def as_table(self) -> ir.Table:
         >>> isinstance(lit, ir.Table)
         True
         """
-        parents = self.op().relations
+        from ibis.expr.types.relations import unwrap_alias
+
+        op = self.op()
+        parents = op.relations
 
-        if len(parents) == 0:
-            return ops.DummyTable({self.get_name(): self}).to_expr()
+        if not parents:
+            return ops.DummyTable({op.name: unwrap_alias(op)}).to_expr()
         elif len(parents) == 1:
             (parent,) = parents
             return parent.to_expr().aggregate(self)
@@ -1521,11 +1524,13 @@ def as_table(self) -> ir.Table:
         >>> expr.equals(expected)
         True
         """
-        parents = self.op().relations
-        values = {self.get_name(): self}
+        from ibis.expr.types.relations import unwrap_alias
+
+        op = self.op()
+        parents = op.relations
 
-        if len(parents) == 0:
-            return ops.DummyTable(values).to_expr()
+        if not parents:
+            return ops.DummyTable({op.name: unwrap_alias(op)}).to_expr()
         elif len(parents) == 1:
             (parent,) = parents
             return parent.to_expr().select(self)

diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py
@@ -118,6 +118,14 @@ def bind(table: Table, value) -> Iterator[ir.Value]:
         yield literal(value)
 
 
+def unwrap_alias(node: ops.Value) -> ops.Value:
+    """Unwrap an alias node."""
+    if isinstance(node, ops.Alias):
+        return node.arg
+    else:
+        return node
+
+
 def unwrap_aliases(values: Iterator[ir.Value]) -> Mapping[str, ir.Value]:
     """Unwrap aliases into a mapping of {name: expression}."""
     result = {}
@@ -127,10 +135,7 @@ def unwrap_aliases(values: Iterator[ir.Value]) -> Mapping[str, ir.Value]:
             raise com.IbisInputError(
                 f"Duplicate column name {node.name!r} in result set"
             )
-        if isinstance(node, ops.Alias):
-            result[node.name] = node.arg
-        else:
-            result[node.name] = node
+        result[node.name] = unwrap_alias(node)
     return result
 
 

diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py
@@ -114,14 +114,11 @@ def convert_table(cls, df, schema):
                 "schema column count does not match input data column count"
             )
 
-        columns = []
-        for (_, series), dtype in zip(df.items(), schema.types):
-            columns.append(cls.convert_column(series, dtype))
-        df = cls.concat(columns, axis=1)
-
-        # return data with the schema's columns which may be different than the
-        # input columns
-        df.columns = schema.names
+        columns = {
+            name: cls.convert_column(series, dtype)
+            for (name, dtype), (_, series) in zip(schema.items(), df.items())
+        }
+        df = pd.DataFrame(columns)
 
         if geospatial_supported:
             from geopandas import GeoDataFrame
@@ -154,7 +151,7 @@ def convert_column(cls, obj, dtype):
 
     @classmethod
     def convert_scalar(cls, obj, dtype):
-        df = PandasData.convert_table(obj, sch.Schema({obj.columns[0]: dtype}))
+        df = PandasData.convert_table(obj, sch.Schema({str(obj.columns[0]): dtype}))
         return df.iat[0, 0]
 
     @classmethod

diff --git a/ibis/formats/polars.py b/ibis/formats/polars.py
@@ -166,9 +166,6 @@ def convert_column(cls, df: pl.DataFrame, dtype: dt.DataType) -> pl.Series:
     def convert_table(cls, df: pl.DataFrame, schema: Schema) -> pl.DataFrame:
         pl_schema = PolarsSchema.from_ibis(schema)
 
-        if tuple(df.columns) != tuple(schema.names):
-            df = df.rename(dict(zip(df.columns, schema.names)))
-
         if df.schema == pl_schema:
             return df
         return df.cast(pl_schema)

diff --git a/ibis/formats/tests/test_polars.py b/ibis/formats/tests/test_polars.py
@@ -162,7 +162,7 @@ def test_convert_column():
 
 
 def test_convert_table():
-    df = pl.DataFrame({"x": ["1", "2"], "y": ["a", "b"]})
+    df = pl.DataFrame({"x": ["1", "2"], "z": ["a", "b"]})
     schema = ibis.schema({"x": "int64", "z": "string"})
     df = PolarsData.convert_table(df, schema)
     sol = pl.DataFrame(

diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py
@@ -2192,3 +2192,10 @@ def test_table_fillna_depr_warn():
     t = ibis.table(schema={"a": "int", "b": "str"})
     with pytest.warns(FutureWarning, match="v9.1"):
         t.fillna({"b": "missing"})
+
+
+def test_dummy_table_disallows_aliases():
+    values = {"one": ops.Alias(ops.Literal(1, dtype=dt.int64), name="two")}
+
+    with pytest.raises(ValidationError):
+        ops.DummyTable(values)