Skip to content

Commit

Permalink
refactor(api): remove interpolation argument
Browse files Browse the repository at this point in the history
BREAKING CHANGE: the `interpolation` argument was only supported in the dask and pandas backends; for interpolated quantiles use dask or pandas directly
  • Loading branch information
cpcloud authored and kszucs committed Sep 29, 2023
1 parent 4537e1f commit 7c242af
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 112 deletions.
32 changes: 11 additions & 21 deletions ibis/backends/dask/execution/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,47 +112,37 @@ def execute_series_natural_log(op, data, **kwargs):
return np.log(data)


@execute_node.register(
ops.Quantile, dd.Series, numeric_types, type(None), (dd.Series, type(None))
)
def execute_series_quantile(op, data, quantile, _, mask, **kwargs):
@execute_node.register(ops.Quantile, dd.Series, numeric_types, (dd.Series, type(None)))
def execute_series_quantile(op, data, quantile, mask, **_):
if mask is not None:
data = data.loc[mask]
return data.quantile(q=quantile)


@execute_node.register(
ops.Quantile, ddgb.SeriesGroupBy, numeric_types, type(None), type(None)
)
def execute_series_quantile_group_by(op, data, quantile, *_, **kwargs):
@execute_node.register(ops.Quantile, ddgb.SeriesGroupBy, numeric_types, type(None))
def execute_series_quantile_group_by(op, data, quantile, mask, **_):
raise NotImplementedError(
"Quantile not implemented for Dask SeriesGroupBy, Dask #9824"
)


@execute_node.register(
ops.MultiQuantile, dd.Series, collections.abc.Sequence, type(None), type(None)
ops.MultiQuantile, dd.Series, collections.abc.Sequence, type(None)
)
def execute_series_quantile_sequence(_, data, quantile, **kwargs):
def execute_series_quantile_sequence(op, data, quantile, mask, **_):
return list(data.quantile(q=quantile))


# TODO - aggregations - #2553
@execute_node.register(
ops.MultiQuantile,
ddgb.SeriesGroupBy,
collections.abc.Sequence,
(str, type(None)),
type(None),
ops.MultiQuantile, ddgb.SeriesGroupBy, collections.abc.Sequence, type(None)
)
def execute_series_quantile_groupby(
op, data, quantile, interpolation, _, aggcontext=None, **kwargs
):
def q(x, quantile, interpolation):
result = x.quantile(quantile, interpolation=interpolation).tolist()
def execute_series_quantile_groupby(op, data, quantile, mask, aggcontext=None, **_):
def q(x, quantile):
result = x.quantile(quantile).tolist()
return [result for _ in range(len(x))]

return aggcontext.agg(data, q, quantile, interpolation or "linear")
return aggcontext.agg(data, q, quantile)


@execute_node.register(ops.Round, dd.Series, (dd.Series, np.integer, type(None), int))
Expand Down
4 changes: 0 additions & 4 deletions ibis/backends/dask/tests/execution/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import ibis
import ibis.expr.datatypes as dt
from ibis.common.annotations import ValidationError
from ibis.common.exceptions import OperationNotDefinedError

dd = pytest.importorskip("dask.dataframe")
Expand Down Expand Up @@ -179,7 +178,6 @@ def test_quantile_list(t, df, ibis_func, dask_func, column):
],
)
def test_quantile_scalar(t, df, ibis_func, dask_func):
# TODO - interpolation
result = ibis_func(t.float64_with_zeros).compile()
expected = dask_func(df.float64_with_zeros)
assert result.compute() == expected.compute()
Expand All @@ -196,8 +194,6 @@ def test_quantile_scalar(t, df, ibis_func, dask_func):
(lambda x: x.clip(), ValueError),
# out of range on quantile
(lambda x: x.quantile(5.0), ValueError),
# invalid interpolation arg
(lambda x: x.quantile(0.5, interpolation="foo"), ValidationError),
],
)
def test_arraylike_functions_transform_errors(t, df, ibis_func, exc):
Expand Down
9 changes: 4 additions & 5 deletions ibis/backends/dask/tests/execution/test_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,20 +825,19 @@ def test_round(t, df):
reason="MultiQuantile is not implemented for the dask backend",
)
def test_quantile_group_by(batting, batting_df):
def q_fun(x, quantile, interpolation):
res = x.quantile(quantile, interpolation=interpolation).tolist()
def q_fun(x, quantile):
res = x.quantile(quantile).tolist()
return [res for _ in range(len(x))]

frac = 0.2
intp = "linear"
result = (
batting.group_by("teamID")
.mutate(res=lambda x: x.RBI.quantile([frac, 1 - frac], intp))
.mutate(res=lambda x: x.RBI.quantile([frac, 1 - frac]))
.res.compile()
)
expected = (
batting_df.groupby("teamID")
.RBI.transform(q_fun, quantile=[frac, 1 - frac], interpolation=intp)
.RBI.transform(q_fun, quantile=[frac, 1 - frac])
.rename("res")
)
tm.assert_series_equal(result.compute(), expected.compute(), check_index=False)
Expand Down
50 changes: 15 additions & 35 deletions ibis/backends/pandas/execution/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,37 +354,28 @@ def execute_series_clip(op, data, lower, upper, **kwargs):
ops.Quantile,
pd.Series,
(np.ndarray, *numeric_types),
type(None),
(pd.Series, type(None)),
)
def execute_series_quantile(op, data, quantile, _, mask, aggcontext=None, **kwargs):
def execute_series_quantile(op, data, quantile, mask, aggcontext=None, **_):
return aggcontext.agg(
data if mask is None else data.loc[mask],
"quantile",
q=quantile,
interpolation=op.interpolation or "linear",
)


@execute_node.register(
ops.Quantile, pd.Series, (np.ndarray, *numeric_types), type(None)
)
def execute_series_quantile_default(op, data, quantile, _, aggcontext=None, **kwargs):
return aggcontext.agg(
data, "quantile", q=quantile, interpolation=op.interpolation or "linear"
)
@execute_node.register(ops.Quantile, pd.Series, (np.ndarray, *numeric_types))
def execute_series_quantile_default(op, data, quantile, aggcontext=None, **_):
return aggcontext.agg(data, "quantile", q=quantile)


@execute_node.register(
ops.Quantile,
SeriesGroupBy,
(np.ndarray, *numeric_types),
type(None),
(SeriesGroupBy, type(None)),
)
def execute_series_group_by_quantile(
op, data, quantile, _, mask, aggcontext=None, **kwargs
):
def execute_series_group_by_quantile(op, data, quantile, mask, aggcontext=None, **_):
return aggcontext.agg(
data,
(
Expand All @@ -393,61 +384,50 @@ def execute_series_group_by_quantile(
else functools.partial(_filtered_reduction, mask.obj, pd.Series.quantile)
),
q=quantile,
interpolation=op.interpolation or "linear",
)


@execute_node.register(
ops.MultiQuantile,
pd.Series,
(np.ndarray, *numeric_types),
type(None),
(pd.Series, type(None)),
)
def execute_series_quantile_multi(
op, data, quantile, _, mask, aggcontext=None, **kwargs
):
def execute_series_quantile_multi(op, data, quantile, mask, aggcontext=None, **_):
return np.array(
aggcontext.agg(
data if mask is None else data.loc[mask],
"quantile",
q=quantile,
interpolation=op.interpolation or "linear",
)
aggcontext.agg(data if mask is None else data.loc[mask], "quantile", q=quantile)
)


@execute_node.register(
ops.MultiQuantile,
SeriesGroupBy,
np.ndarray,
type(None),
(SeriesGroupBy, type(None)),
)
def execute_series_quantile_multi_groupby(
op, data, quantile, _, mask, aggcontext=None, **kwargs
op, data, quantile, mask, aggcontext=None, **kwargs
):
def q(x, quantile, interpolation):
result = x.quantile(quantile, interpolation=interpolation).tolist()
def q(x, quantile):
result = x.quantile(quantile).tolist()
return [result for _ in range(len(x))]

return aggcontext.agg(
data,
q if mask is None else functools.partial(_filtered_reduction, mask.obj, q),
quantile,
op.interpolation or "linear",
)


@execute_node.register(ops.MultiQuantile, SeriesGroupBy, np.ndarray, type(None))
@execute_node.register(ops.MultiQuantile, SeriesGroupBy, np.ndarray)
def execute_series_quantile_multi_groupby_default(
op, data, quantile, _, aggcontext=None, **kwargs
op, data, quantile, aggcontext=None, **_
):
def q(x, quantile, interpolation):
result = x.quantile(quantile, interpolation=interpolation).tolist()
def q(x, quantile):
result = x.quantile(quantile).tolist()
return [result for _ in range(len(x))]

return aggcontext.agg(data, q, quantile, op.interpolation or "linear")
return aggcontext.agg(data, q, quantile)


@execute_node.register(ops.Cast, type(None), dt.DataType)
Expand Down
8 changes: 1 addition & 7 deletions ibis/backends/pandas/tests/execution/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from ibis.backends.pandas.execution import execute
from ibis.backends.pandas.tests.conftest import TestConf as tm
from ibis.backends.pandas.udf import udf
from ibis.common.annotations import ValidationError


@pytest.mark.parametrize(
Expand Down Expand Up @@ -130,10 +129,7 @@ def test_round_decimal_with_negative_places(t):
[
(lambda x: x.quantile(0), lambda x: x.quantile(0)),
(lambda x: x.quantile(1), lambda x: x.quantile(1)),
(
lambda x: x.quantile(0.5, interpolation="linear"),
lambda x: x.quantile(0.5, interpolation="linear"),
),
(lambda x: x.quantile(0.5), lambda x: x.quantile(0.5)),
],
)
def test_quantile(t, df, ibis_func, pandas_func):
Expand Down Expand Up @@ -172,8 +168,6 @@ def test_quantile_multi(t, df, ibis_func, pandas_func, column):
(lambda x: x.clip(), ValueError),
# out of range on quantile
(lambda x: x.quantile(5.0), ValueError),
# invalid interpolation arg
(lambda x: x.quantile(0.5, interpolation="foo"), ValidationError),
],
)
def test_arraylike_functions_transform_errors(t, ibis_func, exc):
Expand Down
9 changes: 4 additions & 5 deletions ibis/backends/pandas/tests/execution/test_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,20 +624,19 @@ def test_round(t, df):


def test_quantile_groupby(batting, batting_df):
def q_fun(x, quantile, interpolation):
res = x.quantile(quantile, interpolation=interpolation).tolist()
def q_fun(x, quantile):
res = x.quantile(quantile).tolist()
return [res for _ in range(len(x))]

frac = 0.2
intp = "linear"
result = (
batting.group_by("teamID")
.mutate(res=lambda x: x.RBI.quantile([frac, 1 - frac], intp))
.mutate(res=lambda x: x.RBI.quantile([frac, 1 - frac]))
.res.execute()
)
expected = (
batting_df.groupby("teamID")
.RBI.transform(q_fun, quantile=[frac, 1 - frac], interpolation=intp)
.RBI.transform(q_fun, quantile=[frac, 1 - frac])
.rename("res")
)
tm.assert_series_equal(result, expected)
Expand Down
6 changes: 0 additions & 6 deletions ibis/backends/postgres/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import platform
import re
import string
import warnings

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql as pg
Expand Down Expand Up @@ -430,11 +429,6 @@ def _mode(t, op):


def _quantile(t, op):
if op.interpolation is not None:
warnings.warn(
f"`{t.__module__.rsplit(',', 1)[0]}` backend does not support the "
"`interpolation` argument"
)
arg = op.arg
if (where := op.where) is not None:
arg = ops.IfElse(where, arg, None)
Expand Down
6 changes: 0 additions & 6 deletions ibis/expr/operations/reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,6 @@ def dtype(self):
class Quantile(Filterable, Reduction):
arg: Value
quantile: Value[dt.Numeric]
interpolation: Optional[
Literal["linear", "lower", "higher", "midpoint", "nearest"]
] = None

dtype = dt.float64

Expand All @@ -197,9 +194,6 @@ class Quantile(Filterable, Reduction):
class MultiQuantile(Filterable, Reduction):
arg: Value
quantile: Value[dt.Array[dt.Float64]]
interpolation: Optional[
Literal["linear", "lower", "higher", "midpoint", "nearest"]
] = None

dtype = dt.Array(dt.float64)

Expand Down
24 changes: 1 addition & 23 deletions ibis/expr/types/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,14 +781,6 @@ def median(self, where: ir.BooleanValue | None = None) -> NumericScalar:
def quantile(
self,
quantile: Sequence[NumericValue | float],
interpolation: Literal[
"linear",
"lower",
"higher",
"midpoint",
"nearest",
]
| None = None,
where: ir.BooleanValue | None = None,
) -> NumericScalar:
"""Return value at the given quantile.
Expand All @@ -797,20 +789,6 @@ def quantile(
----------
quantile
`0 <= quantile <= 1`, the quantile(s) to compute
interpolation
::: {.callout-warning}
## This parameter is backend dependent and may have no effect
:::
This parameter specifies the interpolation method to use, when the
desired quantile lies between two data points `i` and `j`:
* linear: `i + (j - i) * fraction`, where `fraction` is the
fractional part of the index surrounded by `i` and `j`.
* lower: `i`.
* higher: `j`.
* nearest: `i` or `j` whichever is nearest.
* midpoint: (`i` + `j`) / 2.
where
Boolean filter for input values
Expand All @@ -823,7 +801,7 @@ def quantile(
op = ops.MultiQuantile
else:
op = ops.Quantile
return op(self, quantile, interpolation, where=where).to_expr()
return op(self, quantile, where=where).to_expr()

def std(
self,
Expand Down

0 comments on commit 7c242af

Please sign in to comment.