DEPR: numeric_only default in resampler ops (pandas-dev#47177)

yehoshuadimarsky · Jul 13, 2022 · ffc4ec7 · ffc4ec7
1 parent 6a90010
commit ffc4ec7
Show file tree

Hide file tree

Showing 3 changed files with 132 additions and 51 deletions.
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -605,7 +605,7 @@ In the case where ``df.columns`` is not unique, use :meth:`DataFrame.isetitem`:
 ``numeric_only`` default value
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Across the DataFrame and DataFrameGroupBy operations such as
+Across the :class:`DataFrame`, :class:`.DataFrameGroupBy`, and :class:`.Resampler` operations such as
 ``min``, ``sum``, and ``idxmax``, the default
 value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
 Furthermore, operations with the default value ``None`` can lead to surprising
@@ -644,6 +644,11 @@ gained the ``numeric_only`` argument.
 - :meth:`.GroupBy.std`
 - :meth:`.GroupBy.sem`
 - :meth:`.DataFrameGroupBy.quantile`
+- :meth:`.Resampler.mean`
+- :meth:`.Resampler.median`
+- :meth:`.Resampler.sem`
+- :meth:`.Resampler.std`
+- :meth:`.Resampler.var`
 
 .. _whatsnew_150.deprecations.other:
 

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -393,7 +393,7 @@ def transform(self, arg, *args, **kwargs):
         """
         return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs)
 
-    def _downsample(self, f):
+    def _downsample(self, f, **kwargs):
         raise AbstractMethodError(self)
 
     def _upsample(self, f, limit=None, fill_value=None):
@@ -937,25 +937,28 @@ def asfreq(self, fill_value=None):
         """
         return self._upsample("asfreq", fill_value=fill_value)
 
-    def std(self, ddof=1, *args, **kwargs):
+    def std(self, ddof=1, numeric_only: bool = False, *args, **kwargs):
         """
         Compute standard deviation of groups, excluding missing values.
 
         Parameters
         ----------
         ddof : int, default 1
             Degrees of freedom.
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+            .. versionadded:: 1.5.0
 
         Returns
         -------
         DataFrame or Series
             Standard deviation of values within each group.
         """
         nv.validate_resampler_func("std", args, kwargs)
-        # error: Unexpected keyword argument "ddof" for "_downsample"
-        return self._downsample("std", ddof=ddof)  # type: ignore[call-arg]
+        return self._downsample("std", ddof=ddof, numeric_only=numeric_only)
 
-    def var(self, ddof=1, *args, **kwargs):
+    def var(self, ddof=1, numeric_only: bool = False, *args, **kwargs):
         """
         Compute variance of groups, excluding missing values.
 
@@ -964,14 +967,18 @@ def var(self, ddof=1, *args, **kwargs):
         ddof : int, default 1
             Degrees of freedom.
 
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+            .. versionadded:: 1.5.0
+
         Returns
         -------
         DataFrame or Series
             Variance of values within each group.
         """
         nv.validate_resampler_func("var", args, kwargs)
-        # error: Unexpected keyword argument "ddof" for "_downsample"
-        return self._downsample("var", ddof=ddof)  # type: ignore[call-arg]
+        return self._downsample("var", ddof=ddof, numeric_only=numeric_only)
 
     @doc(GroupBy.size)
     def size(self):
@@ -1027,53 +1034,94 @@ def quantile(self, q=0.5, **kwargs):
             Return a DataFrame, where the coulmns are groupby columns,
             and the values are its quantiles.
         """
-        # error: Unexpected keyword argument "q" for "_downsample"
-        # error: Too many arguments for "_downsample"
-        return self._downsample("quantile", q=q, **kwargs)  # type: ignore[call-arg]
+        return self._downsample("quantile", q=q, **kwargs)
 
 
-# downsample methods
-for method in ["sum", "prod", "min", "max", "first", "last"]:
+def _add_downsample_kernel(
+    name: str, args: tuple[str, ...], docs_class: type = GroupBy
+) -> None:
+    """
+    Add a kernel to Resampler.
+
+    Arguments
+    ---------
+    name : str
+        Name of the kernel.
+    args : tuple
+        Arguments of the method.
+    docs_class : type
+        Class to get kernel docstring from.
+    """
+    assert args in (
+        ("numeric_only", "min_count"),
+        ("numeric_only",),
+        ("ddof", "numeric_only"),
+        (),
+    )
 
-    def f(
-        self,
-        _method: str = method,
-        numeric_only: bool | lib.NoDefault = lib.no_default,
-        min_count: int = 0,
-        *args,
-        **kwargs,
-    ):
-        if numeric_only is lib.no_default:
-            if _method != "sum":
+    # Explicitly provide args rather than args/kwargs for API docs
+    if args == ("numeric_only", "min_count"):
+
+        def f(
+            self,
+            numeric_only: bool | lib.NoDefault = lib.no_default,
+            min_count: int = 0,
+            *args,
+            **kwargs,
+        ):
+            nv.validate_resampler_func(name, args, kwargs)
+            if numeric_only is lib.no_default and name != "sum":
                 # For DataFrameGroupBy, set it to be False for methods other than `sum`.
                 numeric_only = False
 
-        nv.validate_resampler_func(_method, args, kwargs)
-        return self._downsample(_method, numeric_only=numeric_only, min_count=min_count)
-
-    f.__doc__ = getattr(GroupBy, method).__doc__
-    setattr(Resampler, method, f)
-
+            return self._downsample(
+                name, numeric_only=numeric_only, min_count=min_count
+            )
 
-# downsample methods
-for method in ["mean", "sem", "median", "ohlc"]:
+    elif args == ("numeric_only",):
+        # error: All conditional function variants must have identical signatures
+        def f(  # type: ignore[misc]
+            self, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs
+        ):
+            nv.validate_resampler_func(name, args, kwargs)
+            return self._downsample(name, numeric_only=numeric_only)
+
+    elif args == ("ddof", "numeric_only"):
+        # error: All conditional function variants must have identical signatures
+        def f(  # type: ignore[misc]
+            self,
+            ddof: int = 1,
+            numeric_only: bool | lib.NoDefault = lib.no_default,
+            *args,
+            **kwargs,
+        ):
+            nv.validate_resampler_func(name, args, kwargs)
+            return self._downsample(name, ddof=ddof, numeric_only=numeric_only)
 
-    def g(self, _method=method, *args, **kwargs):
-        nv.validate_resampler_func(_method, args, kwargs)
-        return self._downsample(_method)
+    else:
+        # error: All conditional function variants must have identical signatures
+        def f(  # type: ignore[misc]
+            self,
+            *args,
+            **kwargs,
+        ):
+            nv.validate_resampler_func(name, args, kwargs)
+            return self._downsample(name)
 
-    g.__doc__ = getattr(GroupBy, method).__doc__
-    setattr(Resampler, method, g)
+    f.__doc__ = getattr(docs_class, name).__doc__
+    setattr(Resampler, name, f)
 
 
-# series only methods
+for method in ["sum", "prod", "min", "max", "first", "last"]:
+    _add_downsample_kernel(method, ("numeric_only", "min_count"))
+for method in ["mean", "median"]:
+    _add_downsample_kernel(method, ("numeric_only",))
+for method in ["sem"]:
+    _add_downsample_kernel(method, ("ddof", "numeric_only"))
+for method in ["ohlc"]:
+    _add_downsample_kernel(method, ())
 for method in ["nunique"]:
-
-    def h(self, _method=method):
-        return self._downsample(_method)
-
-    h.__doc__ = getattr(SeriesGroupBy, method).__doc__
-    setattr(Resampler, method, h)
+    _add_downsample_kernel(method, (), SeriesGroupBy)
 
 
 class _GroupByMixin(PandasObject):

diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
@@ -814,6 +814,7 @@ def test_end_and_end_day_origin(
 
 
 @pytest.mark.parametrize(
+    # expected_data is a string when op raises a ValueError
     "method, numeric_only, expected_data",
     [
         ("sum", True, {"num": [25]}),
@@ -834,6 +835,21 @@ def test_end_and_end_day_origin(
         ("last", True, {"num": [20]}),
         ("last", False, {"cat": ["cat_2"], "num": [20]}),
         ("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
+        ("mean", True, {"num": [12.5]}),
+        ("mean", False, {"num": [12.5]}),
+        ("mean", lib.no_default, {"num": [12.5]}),
+        ("median", True, {"num": [12.5]}),
+        ("median", False, {"num": [12.5]}),
+        ("median", lib.no_default, {"num": [12.5]}),
+        ("std", True, {"num": [10.606601717798213]}),
+        ("std", False, "could not convert string to float"),
+        ("std", lib.no_default, {"num": [10.606601717798213]}),
+        ("var", True, {"num": [112.5]}),
+        ("var", False, "could not convert string to float"),
+        ("var", lib.no_default, {"num": [112.5]}),
+        ("sem", True, {"num": [7.5]}),
+        ("sem", False, "could not convert string to float"),
+        ("sem", lib.no_default, {"num": [7.5]}),
     ],
 )
 def test_frame_downsample_method(method, numeric_only, expected_data):
@@ -845,20 +861,32 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
     resampled = df.resample("Y")
 
     func = getattr(resampled, method)
-    if method == "prod" and numeric_only is not True:
+    if numeric_only is lib.no_default and method not in (
+        "min",
+        "max",
+        "first",
+        "last",
+        "prod",
+    ):
         warn = FutureWarning
-        msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated"
-    elif method == "sum" and numeric_only is lib.no_default:
+        msg = (
+            f"default value of numeric_only in DataFrameGroupBy.{method} is deprecated"
+        )
+    elif method in ("prod", "mean", "median") and numeric_only is not True:
         warn = FutureWarning
-        msg = "The default value of numeric_only in DataFrameGroupBy.sum is deprecated"
+        msg = f"Dropping invalid columns in DataFrameGroupBy.{method} is deprecated"
     else:
         warn = None
         msg = ""
     with tm.assert_produces_warning(warn, match=msg):
-        result = func(numeric_only=numeric_only)
-
-    expected = DataFrame(expected_data, index=expected_index)
-    tm.assert_frame_equal(result, expected)
+        if isinstance(expected_data, str):
+            klass = TypeError if method == "var" else ValueError
+            with pytest.raises(klass, match=expected_data):
+                _ = func(numeric_only=numeric_only)
+        else:
+            result = func(numeric_only=numeric_only)
+            expected = DataFrame(expected_data, index=expected_index)
+            tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize(