Skip to content

Commit

Permalink
feat(api): add .as_scalar() method for turning expressions into sca…
Browse files Browse the repository at this point in the history
…lar subqueries
  • Loading branch information
kszucs committed Feb 14, 2024
1 parent a20f44a commit d899b66
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 36 deletions.
30 changes: 29 additions & 1 deletion ibis/expr/tests/test_newrels.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def test_select_turns_value_with_multiple_parents_into_subquery():

def test_value_to_array_creates_subquery():
rel = t.int_col.sum().as_table()
with pytest.warns(FutureWarning, match="implicit"):
with pytest.warns(FutureWarning, match="as_scalar"):
expr = rel.to_array()

op = expr.op()
Expand All @@ -186,6 +186,34 @@ def test_value_to_array_creates_subquery():
assert isinstance(op, ops.ScalarSubquery)


def test_as_scalar_creates_subquery():
# scalar literal case
lit = ibis.literal(1)
expr = lit.as_scalar()
assert expr.equals(lit)

# scalar reduction case
reduction = t.int_col.sum()
expr = reduction.as_scalar()
expected = ops.ScalarSubquery(reduction.as_table())
assert expr.op() == expected

# column case
expr = t.int_col.as_scalar()
expected = ops.ScalarSubquery(t.int_col.as_table())
assert expr.op() == expected

# table case
proj = t.select(t.int_col)
expr = proj.as_scalar()
expected = ops.ScalarSubquery(proj)
assert expr.op() == expected

# table case but with multiple columns which can be validated
with pytest.raises(IntegrityError, match="must have exactly one column"):
t.as_scalar()


def test_mutate():
proj = t.select(t, other=t.int_col + 1)
expected = Project(
Expand Down
10 changes: 9 additions & 1 deletion ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,15 @@ def unbind(self) -> ir.Table:

def as_table(self) -> ir.Table:
"""Convert an expression to a table."""
raise NotImplementedError(type(self))
raise NotImplementedError(
f"{type(self)} expressions cannot be converted into tables"
)

def as_scalar(self) -> ir.Scalar:
"""Convert an expression to a scalar."""
raise NotImplementedError(
f"{type(self)} expression cannot be converted into scalars"
)


def _binop(op_class: type[ops.Binary], left: ir.Value, right: ir.Value) -> ir.Value:
Expand Down
134 changes: 103 additions & 31 deletions ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,37 +1149,6 @@ def desc(self) -> ir.Value:
"""Sort an expression descending."""
return ops.SortKey(self, ascending=False).to_expr()

def as_table(self) -> ir.Table:
"""Promote the expression to a [Table](./expression-tables.qmd#ibis.expr.types.Table).
Returns
-------
Table
A table expression
Examples
--------
>>> import ibis
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().name("len").as_table()
>>> expected = t.select(len=t.a.length())
>>> expr.equals(expected)
True
"""
parents = self.op().relations
values = {self.get_name(): self}

if len(parents) == 0:
return ops.DummyTable(values).to_expr()
elif len(parents) == 1:
(parent,) = parents
return parent.to_expr().select(self)
else:
raise com.RelationError(
f"Cannot convert {type(self)} expression involving multiple "
"base table references to a projection"
)

def to_pandas(self, **kwargs) -> pd.Series:
"""Convert a column expression to a pandas Series or scalar object.
Expand Down Expand Up @@ -1234,6 +1203,42 @@ def __pandas_result__(self, df: pd.DataFrame) -> Any:

return PandasData.convert_scalar(df, self.type())

def as_scalar(self):
"""Inform ibis that the expression should be treated as a scalar.
If the expression is a literal, it will be returned as is. If it depends
on a table, it will be turned to a scalar subquery.
Returns
-------
Scalar
A scalar subquery or a literal
Examples
--------
>>> import ibis
>>>
>>> ibis.options.interactive = True
>>>
>>> t = ibis.examples.penguins.fetch()
>>> max_gentoo_weight = t.filter(t.species == "Gentoo").body_mass_g.max()
>>> light_penguins = t.filter(t.body_mass_g < max_gentoo_weight / 2)
>>> light_penguins.group_by("species").count()
┏━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ species ┃ CountStar() ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ string │ int64 │
├───────────┼─────────────┤
│ Adelie │ 15 │
│ Chinstrap │ 2 │
└───────────┴─────────────┘
"""
parents = self.op().relations
if parents:
return ops.ScalarSubquery(self.as_table()).to_expr()
else:
return self

def as_table(self) -> ir.Table:
"""Promote the scalar expression to a table.
Expand Down Expand Up @@ -1327,6 +1332,73 @@ def __pandas_result__(self, df: pd.DataFrame) -> pd.Series:
(column,) = df.columns
return PandasData.convert_column(df.loc[:, column], self.type())

def as_scalar(self) -> Scalar:
"""Inform ibis that the expression should be treated as a scalar.
Creates a scalar subquery from the column expression. Since ibis cannot
be sure that the column expression contains only one value, the column
expression is wrapped in a scalar subquery and treated as a scalar.
Note that the execution of the scalar subquery will fail if the column
expression contains more than one value.
Returns
-------
Scalar
A scalar subquery
Examples
--------
>>> import ibis
>>>
>>> ibis.options.interactive = True
>>>
>>> t = ibis.examples.penguins.fetch()
>>> heavy_gentoo = t.filter(t.species == "Gentoo", t.body_mass_g > 6200)
>>> from_that_island = t.filter(t.island == heavy_gentoo.island.as_scalar())
>>> from_that_island.group_by("species").count()
┏━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ species ┃ CountStar() ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━┩
│ string │ int64 │
├─────────┼─────────────┤
│ Adelie │ 44 │
│ Gentoo │ 124 │
└─────────┴─────────────┘
"""
return self.as_table().as_scalar()

def as_table(self) -> ir.Table:
"""Promote the expression to a [Table](./expression-tables.qmd#ibis.expr.types.Table).
Returns
-------
Table
A table expression
Examples
--------
>>> import ibis
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().name("len").as_table()
>>> expected = t.select(len=t.a.length())
>>> expr.equals(expected)
True
"""
parents = self.op().relations
values = {self.get_name(): self}

if len(parents) == 0:
return ops.DummyTable(values).to_expr()
elif len(parents) == 1:
(parent,) = parents
return parent.to_expr().select(self)
else:
raise com.RelationError(
f"Cannot convert {type(self)} expression involving multiple "
"base table references to a projection"
)

def _bind_reduction_filter(self, where):
rels = self.op().relations
if isinstance(where, Deferred):
Expand Down
39 changes: 36 additions & 3 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,40 @@ def _bind_reduction_filter(self, where):

return where.resolve(self)

def as_scalar(self) -> ir.ScalarExpr:
"""Inform ibis that the table expression should be treated as a scalar.
Note that the table must have exactly one column and one row for this to
work. If the table has more than one column an error will be raised in
expression construction time. If the table has more than one row an
error will be raised by the backend when the expression is executed.
Returns
-------
Scalar
A scalar subquery
Examples
--------
>>> import ibis
>>>
>>> ibis.options.interactive = True
>>>
>>> t = ibis.examples.penguins.fetch()
>>> heavy_gentoo = t.filter(t.species == "Gentoo", t.body_mass_g > 6200)
>>> from_that_island = t.filter(t.island == heavy_gentoo.select("island").as_scalar())
>>> from_that_island.group_by("species").count()
┏━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ species ┃ CountStar() ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━┩
│ string │ int64 │
├─────────┼─────────────┤
│ Adelie │ 44 │
│ Gentoo │ 124 │
└─────────┴─────────────┘
"""
return ops.ScalarSubquery(self).to_expr()

def as_table(self) -> Table:
"""Promote the expression to a table.
Expand Down Expand Up @@ -1805,7 +1839,7 @@ def intersect(self, table: Table, *rest: Table, distinct: bool = True) -> Table:
node = ops.Intersection(node, table, distinct=distinct)
return node.to_expr().select(self.columns)

@deprecated(as_of="9.0", instead="conversion to scalar subquery is implicit")
@deprecated(as_of="9.0", instead="use table.as_scalar() instead")
def to_array(self) -> ir.Column:
"""View a single column table as an array.
Expand All @@ -1819,8 +1853,7 @@ def to_array(self) -> ir.Column:
raise com.ExpressionError(
"Table must have exactly one column when viewed as array"
)

return ops.ScalarSubquery(self).to_expr()
return self.as_scalar()

def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Table:
"""Add columns to a table expression.
Expand Down

0 comments on commit d899b66

Please sign in to comment.