Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api): add .as_scalar() method for turning expressions into scalar subqueries #8350

Merged
merged 1 commit into from
Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion ibis/expr/tests/test_newrels.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def test_select_turns_value_with_multiple_parents_into_subquery():

def test_value_to_array_creates_subquery():
rel = t.int_col.sum().as_table()
with pytest.warns(FutureWarning, match="implicit"):
with pytest.warns(FutureWarning, match="as_scalar"):
expr = rel.to_array()

op = expr.op()
Expand All @@ -186,6 +186,34 @@ def test_value_to_array_creates_subquery():
assert isinstance(op, ops.ScalarSubquery)


def test_as_scalar_creates_subquery():
# scalar literal case
lit = ibis.literal(1)
expr = lit.as_scalar()
assert expr.equals(lit)

# scalar reduction case
reduction = t.int_col.sum()
expr = reduction.as_scalar()
expected = ops.ScalarSubquery(reduction.as_table())
assert expr.op() == expected

# column case
expr = t.int_col.as_scalar()
expected = ops.ScalarSubquery(t.int_col.as_table())
assert expr.op() == expected

# table case
proj = t.select(t.int_col)
expr = proj.as_scalar()
expected = ops.ScalarSubquery(proj)
assert expr.op() == expected

# table case but with multiple columns which can be validated
with pytest.raises(IntegrityError, match="must have exactly one column"):
t.as_scalar()


def test_mutate():
proj = t.select(t, other=t.int_col + 1)
expected = Project(
Expand Down
10 changes: 9 additions & 1 deletion ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,15 @@ def unbind(self) -> ir.Table:

def as_table(self) -> ir.Table:
"""Convert an expression to a table."""
raise NotImplementedError(type(self))
raise NotImplementedError(
f"{type(self)} expressions cannot be converted into tables"
)

def as_scalar(self) -> ir.Scalar:
"""Convert an expression to a scalar."""
raise NotImplementedError(
f"{type(self)} expression cannot be converted into scalars"
)


def _binop(op_class: type[ops.Binary], left: ir.Value, right: ir.Value) -> ir.Value:
Expand Down
134 changes: 103 additions & 31 deletions ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,37 +1149,6 @@ def desc(self) -> ir.Value:
"""Sort an expression descending."""
return ops.SortKey(self, ascending=False).to_expr()

def as_table(self) -> ir.Table:
"""Promote the expression to a [Table](./expression-tables.qmd#ibis.expr.types.Table).
Returns
-------
Table
A table expression
Examples
--------
>>> import ibis
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().name("len").as_table()
>>> expected = t.select(len=t.a.length())
>>> expr.equals(expected)
True
"""
parents = self.op().relations
values = {self.get_name(): self}

if len(parents) == 0:
return ops.DummyTable(values).to_expr()
elif len(parents) == 1:
(parent,) = parents
return parent.to_expr().select(self)
else:
raise com.RelationError(
f"Cannot convert {type(self)} expression involving multiple "
"base table references to a projection"
)

def to_pandas(self, **kwargs) -> pd.Series:
"""Convert a column expression to a pandas Series or scalar object.
Expand Down Expand Up @@ -1234,6 +1203,42 @@ def __pandas_result__(self, df: pd.DataFrame) -> Any:

return PandasData.convert_scalar(df, self.type())

def as_scalar(self):
"""Inform ibis that the expression should be treated as a scalar.
If the expression is a literal, it will be returned as is. If it depends
on a table, it will be turned to a scalar subquery.
kszucs marked this conversation as resolved.
Show resolved Hide resolved
Returns
-------
Scalar
A scalar subquery or a literal
Examples
--------
>>> import ibis
>>>
>>> ibis.options.interactive = True
>>>
>>> t = ibis.examples.penguins.fetch()
>>> max_gentoo_weight = t.filter(t.species == "Gentoo").body_mass_g.max()
>>> light_penguins = t.filter(t.body_mass_g < max_gentoo_weight / 2)
>>> light_penguins.group_by("species").count()
┏━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ species ┃ CountStar() ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ string │ int64 │
├───────────┼─────────────┤
│ Adelie │ 15 │
│ Chinstrap │ 2 │
└───────────┴─────────────┘
"""
parents = self.op().relations
if parents:
return ops.ScalarSubquery(self.as_table()).to_expr()
else:
return self

def as_table(self) -> ir.Table:
"""Promote the scalar expression to a table.
Expand Down Expand Up @@ -1327,6 +1332,73 @@ def __pandas_result__(self, df: pd.DataFrame) -> pd.Series:
(column,) = df.columns
return PandasData.convert_column(df.loc[:, column], self.type())

def as_scalar(self) -> Scalar:
"""Inform ibis that the expression should be treated as a scalar.
Creates a scalar subquery from the column expression. Since ibis cannot
be sure that the column expression contains only one value, the column
expression is wrapped in a scalar subquery and treated as a scalar.
Note that the execution of the scalar subquery will fail if the column
expression contains more than one value.
kszucs marked this conversation as resolved.
Show resolved Hide resolved
Returns
-------
Scalar
A scalar subquery
Examples
--------
>>> import ibis
>>>
>>> ibis.options.interactive = True
>>>
>>> t = ibis.examples.penguins.fetch()
>>> heavy_gentoo = t.filter(t.species == "Gentoo", t.body_mass_g > 6200)
>>> from_that_island = t.filter(t.island == heavy_gentoo.island.as_scalar())
>>> from_that_island.group_by("species").count()
┏━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ species ┃ CountStar() ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━┩
│ string │ int64 │
├─────────┼─────────────┤
│ Adelie │ 44 │
│ Gentoo │ 124 │
└─────────┴─────────────┘
"""
return self.as_table().as_scalar()

def as_table(self) -> ir.Table:
"""Promote the expression to a [Table](./expression-tables.qmd#ibis.expr.types.Table).
Returns
-------
Table
A table expression
Examples
--------
>>> import ibis
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().name("len").as_table()
>>> expected = t.select(len=t.a.length())
>>> expr.equals(expected)
True
"""
parents = self.op().relations
values = {self.get_name(): self}

if len(parents) == 0:
return ops.DummyTable(values).to_expr()
elif len(parents) == 1:
(parent,) = parents
return parent.to_expr().select(self)
else:
raise com.RelationError(
f"Cannot convert {type(self)} expression involving multiple "
"base table references to a projection"
)

def _bind_reduction_filter(self, where):
rels = self.op().relations
if isinstance(where, Deferred):
Expand Down
39 changes: 36 additions & 3 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,40 @@ def _bind_reduction_filter(self, where):

return where.resolve(self)

def as_scalar(self) -> ir.ScalarExpr:
"""Inform ibis that the table expression should be treated as a scalar.
Note that the table must have exactly one column and one row for this to
work. If the table has more than one column an error will be raised in
expression construction time. If the table has more than one row an
error will be raised by the backend when the expression is executed.
Returns
-------
Scalar
A scalar subquery
Examples
--------
>>> import ibis
>>>
>>> ibis.options.interactive = True
>>>
>>> t = ibis.examples.penguins.fetch()
>>> heavy_gentoo = t.filter(t.species == "Gentoo", t.body_mass_g > 6200)
>>> from_that_island = t.filter(t.island == heavy_gentoo.select("island").as_scalar())
>>> from_that_island.group_by("species").count()
┏━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ species ┃ CountStar() ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━┩
│ string │ int64 │
├─────────┼─────────────┤
│ Adelie │ 44 │
│ Gentoo │ 124 │
└─────────┴─────────────┘
"""
return ops.ScalarSubquery(self).to_expr()

def as_table(self) -> Table:
"""Promote the expression to a table.
Expand Down Expand Up @@ -1805,7 +1839,7 @@ def intersect(self, table: Table, *rest: Table, distinct: bool = True) -> Table:
node = ops.Intersection(node, table, distinct=distinct)
return node.to_expr().select(self.columns)

@deprecated(as_of="9.0", instead="conversion to scalar subquery is implicit")
@deprecated(as_of="9.0", instead="use table.as_scalar() instead")
def to_array(self) -> ir.Column:
"""View a single column table as an array.
Expand All @@ -1819,8 +1853,7 @@ def to_array(self) -> ir.Column:
raise com.ExpressionError(
"Table must have exactly one column when viewed as array"
)

return ops.ScalarSubquery(self).to_expr()
return self.as_scalar()

def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Table:
"""Add columns to a table expression.
Expand Down
Loading