Skip to content

Commit

Permalink
DEPR: numeric_only default in resampler ops (pandas-dev#47177)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach authored and yehoshuadimarsky committed Jul 13, 2022
1 parent 6a90010 commit ffc4ec7
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 51 deletions.
7 changes: 6 additions & 1 deletion doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ In the case where ``df.columns`` is not unique, use :meth:`DataFrame.isetitem`:
``numeric_only`` default value
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Across the DataFrame and DataFrameGroupBy operations such as
Across the :class:`DataFrame`, :class:`.DataFrameGroupBy`, and :class:`.Resampler` operations such as
``min``, ``sum``, and ``idxmax``, the default
value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
Furthermore, operations with the default value ``None`` can lead to surprising
Expand Down Expand Up @@ -644,6 +644,11 @@ gained the ``numeric_only`` argument.
- :meth:`.GroupBy.std`
- :meth:`.GroupBy.sem`
- :meth:`.DataFrameGroupBy.quantile`
- :meth:`.Resampler.mean`
- :meth:`.Resampler.median`
- :meth:`.Resampler.sem`
- :meth:`.Resampler.std`
- :meth:`.Resampler.var`

.. _whatsnew_150.deprecations.other:

Expand Down
132 changes: 90 additions & 42 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def transform(self, arg, *args, **kwargs):
"""
return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs)

def _downsample(self, f):
def _downsample(self, f, **kwargs):
raise AbstractMethodError(self)

def _upsample(self, f, limit=None, fill_value=None):
Expand Down Expand Up @@ -937,25 +937,28 @@ def asfreq(self, fill_value=None):
"""
return self._upsample("asfreq", fill_value=fill_value)

def std(self, ddof=1, *args, **kwargs):
def std(self, ddof=1, numeric_only: bool = False, *args, **kwargs):
"""
Compute standard deviation of groups, excluding missing values.
Parameters
----------
ddof : int, default 1
Degrees of freedom.
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.
.. versionadded:: 1.5.0
Returns
-------
DataFrame or Series
Standard deviation of values within each group.
"""
nv.validate_resampler_func("std", args, kwargs)
# error: Unexpected keyword argument "ddof" for "_downsample"
return self._downsample("std", ddof=ddof) # type: ignore[call-arg]
return self._downsample("std", ddof=ddof, numeric_only=numeric_only)

def var(self, ddof=1, *args, **kwargs):
def var(self, ddof=1, numeric_only: bool = False, *args, **kwargs):
"""
Compute variance of groups, excluding missing values.
Expand All @@ -964,14 +967,18 @@ def var(self, ddof=1, *args, **kwargs):
ddof : int, default 1
Degrees of freedom.
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.
.. versionadded:: 1.5.0
Returns
-------
DataFrame or Series
Variance of values within each group.
"""
nv.validate_resampler_func("var", args, kwargs)
# error: Unexpected keyword argument "ddof" for "_downsample"
return self._downsample("var", ddof=ddof) # type: ignore[call-arg]
return self._downsample("var", ddof=ddof, numeric_only=numeric_only)

@doc(GroupBy.size)
def size(self):
Expand Down Expand Up @@ -1027,53 +1034,94 @@ def quantile(self, q=0.5, **kwargs):
Return a DataFrame, where the coulmns are groupby columns,
and the values are its quantiles.
"""
# error: Unexpected keyword argument "q" for "_downsample"
# error: Too many arguments for "_downsample"
return self._downsample("quantile", q=q, **kwargs) # type: ignore[call-arg]
return self._downsample("quantile", q=q, **kwargs)


# downsample methods
for method in ["sum", "prod", "min", "max", "first", "last"]:
def _add_downsample_kernel(
name: str, args: tuple[str, ...], docs_class: type = GroupBy
) -> None:
"""
Add a kernel to Resampler.
Arguments
---------
name : str
Name of the kernel.
args : tuple
Arguments of the method.
docs_class : type
Class to get kernel docstring from.
"""
assert args in (
("numeric_only", "min_count"),
("numeric_only",),
("ddof", "numeric_only"),
(),
)

def f(
self,
_method: str = method,
numeric_only: bool | lib.NoDefault = lib.no_default,
min_count: int = 0,
*args,
**kwargs,
):
if numeric_only is lib.no_default:
if _method != "sum":
# Explicitly provide args rather than args/kwargs for API docs
if args == ("numeric_only", "min_count"):

def f(
self,
numeric_only: bool | lib.NoDefault = lib.no_default,
min_count: int = 0,
*args,
**kwargs,
):
nv.validate_resampler_func(name, args, kwargs)
if numeric_only is lib.no_default and name != "sum":
# For DataFrameGroupBy, set it to be False for methods other than `sum`.
numeric_only = False

nv.validate_resampler_func(_method, args, kwargs)
return self._downsample(_method, numeric_only=numeric_only, min_count=min_count)

f.__doc__ = getattr(GroupBy, method).__doc__
setattr(Resampler, method, f)

return self._downsample(
name, numeric_only=numeric_only, min_count=min_count
)

# downsample methods
for method in ["mean", "sem", "median", "ohlc"]:
elif args == ("numeric_only",):
# error: All conditional function variants must have identical signatures
def f( # type: ignore[misc]
self, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs
):
nv.validate_resampler_func(name, args, kwargs)
return self._downsample(name, numeric_only=numeric_only)

elif args == ("ddof", "numeric_only"):
# error: All conditional function variants must have identical signatures
def f( # type: ignore[misc]
self,
ddof: int = 1,
numeric_only: bool | lib.NoDefault = lib.no_default,
*args,
**kwargs,
):
nv.validate_resampler_func(name, args, kwargs)
return self._downsample(name, ddof=ddof, numeric_only=numeric_only)

def g(self, _method=method, *args, **kwargs):
nv.validate_resampler_func(_method, args, kwargs)
return self._downsample(_method)
else:
# error: All conditional function variants must have identical signatures
def f( # type: ignore[misc]
self,
*args,
**kwargs,
):
nv.validate_resampler_func(name, args, kwargs)
return self._downsample(name)

g.__doc__ = getattr(GroupBy, method).__doc__
setattr(Resampler, method, g)
f.__doc__ = getattr(docs_class, name).__doc__
setattr(Resampler, name, f)


# series only methods
for method in ["sum", "prod", "min", "max", "first", "last"]:
_add_downsample_kernel(method, ("numeric_only", "min_count"))
for method in ["mean", "median"]:
_add_downsample_kernel(method, ("numeric_only",))
for method in ["sem"]:
_add_downsample_kernel(method, ("ddof", "numeric_only"))
for method in ["ohlc"]:
_add_downsample_kernel(method, ())
for method in ["nunique"]:

def h(self, _method=method):
return self._downsample(_method)

h.__doc__ = getattr(SeriesGroupBy, method).__doc__
setattr(Resampler, method, h)
_add_downsample_kernel(method, (), SeriesGroupBy)


class _GroupByMixin(PandasObject):
Expand Down
44 changes: 36 additions & 8 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,7 @@ def test_end_and_end_day_origin(


@pytest.mark.parametrize(
# expected_data is a string when op raises a ValueError
"method, numeric_only, expected_data",
[
("sum", True, {"num": [25]}),
Expand All @@ -834,6 +835,21 @@ def test_end_and_end_day_origin(
("last", True, {"num": [20]}),
("last", False, {"cat": ["cat_2"], "num": [20]}),
("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
("mean", True, {"num": [12.5]}),
("mean", False, {"num": [12.5]}),
("mean", lib.no_default, {"num": [12.5]}),
("median", True, {"num": [12.5]}),
("median", False, {"num": [12.5]}),
("median", lib.no_default, {"num": [12.5]}),
("std", True, {"num": [10.606601717798213]}),
("std", False, "could not convert string to float"),
("std", lib.no_default, {"num": [10.606601717798213]}),
("var", True, {"num": [112.5]}),
("var", False, "could not convert string to float"),
("var", lib.no_default, {"num": [112.5]}),
("sem", True, {"num": [7.5]}),
("sem", False, "could not convert string to float"),
("sem", lib.no_default, {"num": [7.5]}),
],
)
def test_frame_downsample_method(method, numeric_only, expected_data):
Expand All @@ -845,20 +861,32 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
resampled = df.resample("Y")

func = getattr(resampled, method)
if method == "prod" and numeric_only is not True:
if numeric_only is lib.no_default and method not in (
"min",
"max",
"first",
"last",
"prod",
):
warn = FutureWarning
msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated"
elif method == "sum" and numeric_only is lib.no_default:
msg = (
f"default value of numeric_only in DataFrameGroupBy.{method} is deprecated"
)
elif method in ("prod", "mean", "median") and numeric_only is not True:
warn = FutureWarning
msg = "The default value of numeric_only in DataFrameGroupBy.sum is deprecated"
msg = f"Dropping invalid columns in DataFrameGroupBy.{method} is deprecated"
else:
warn = None
msg = ""
with tm.assert_produces_warning(warn, match=msg):
result = func(numeric_only=numeric_only)

expected = DataFrame(expected_data, index=expected_index)
tm.assert_frame_equal(result, expected)
if isinstance(expected_data, str):
klass = TypeError if method == "var" else ValueError
with pytest.raises(klass, match=expected_data):
_ = func(numeric_only=numeric_only)
else:
result = func(numeric_only=numeric_only)
expected = DataFrame(expected_data, index=expected_index)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
Expand Down

0 comments on commit ffc4ec7

Please sign in to comment.