pandas-dev · rhshadrach · Feb 14, 2022 · Feb 15, 2022 · Feb 15, 2022 · jbrockmendel
@@ -27,6 +27,7 @@
 import numpy as np
 
 from pandas._libs import reduction as libreduction
+import pandas._libs.lib as lib
 from pandas._typing import (
     ArrayLike,
     Manager,
@@ -1102,9 +1103,14 @@ def _wrap_applied_output_series(
         return self._reindex_output(result)
 
     def _cython_transform(
-        self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
+        self,
+        how: str,
+        numeric_only: bool | lib.NoDefault = lib.no_default,
+        axis: int = 0,
+        **kwargs,
     ) -> DataFrame:
         assert axis == 0  # handled by caller
+        numeric_only_bool = self._resolve_numeric_only(numeric_only)
         # TODO: no tests with self.ndim == 1 for DataFrameGroupBy
 
         # With self.axis == 0, we have multi-block tests
@@ -1113,7 +1119,8 @@ def _cython_transform(
         # With self.axis == 1, _get_data_to_aggregate does a transpose
         #  so we always have a single block.
         mgr: Manager2D = self._get_data_to_aggregate()
-        if numeric_only:
+        orig_len = len(mgr)
+        if numeric_only_bool:
             mgr = mgr.get_numeric_data(copy=False)
 
         def arr_func(bvalues: ArrayLike) -> ArrayLike:
@@ -1126,7 +1133,9 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
         res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True)
         res_mgr.set_axis(1, mgr.axes[1])
 
-        if len(res_mgr) < len(mgr):
+        if len(res_mgr) < len(mgr) or (
+            numeric_only is lib.no_default and len(res_mgr) < orig_len
+        ):
             warn_dropping_nuisance_columns_deprecated(type(self), how)
 
         res_df = self.obj._constructor(res_mgr)

@@ -1494,7 +1494,7 @@ def _python_agg_general(self, func, *args, **kwargs):
     @final
     def _agg_general(
         self,
-        numeric_only: bool = True,
+        numeric_only: bool | lib.NoDefault = True,
         min_count: int = -1,
         *,
         alias: str,
@@ -1553,15 +1553,21 @@ def _agg_py_fallback(
 
     @final
     def _cython_agg_general(
-        self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
+        self,
+        how: str,
+        alt: Callable,
+        numeric_only: bool | lib.NoDefault,
+        min_count: int = -1,
     ):
         # Note: we never get here with how="ohlc" for DataFrameGroupBy;
         #  that goes through SeriesGroupBy
 
         data = self._get_data_to_aggregate()
+        orig_len = len(data)
         is_ser = data.ndim == 1
+        numeric_only_bool = self._resolve_numeric_only(numeric_only)
 
-        if numeric_only:
+        if numeric_only_bool:
             if is_ser and not is_numeric_dtype(self._selected_obj.dtype):
                 # GH#41291 match Series behavior
                 kwd_name = "numeric_only"
@@ -1591,7 +1597,10 @@ def array_func(values: ArrayLike) -> ArrayLike:
         #  continue and exclude the block
         new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
 
-        if not is_ser and len(new_mgr) < len(data):
+        if not is_ser and (
+            len(new_mgr) < len(data)
+            or (numeric_only is lib.no_default and len(new_mgr) < orig_len)
+        ):
             warn_dropping_nuisance_columns_deprecated(type(self), how)
 
         res = self._wrap_agged_manager(new_mgr)
@@ -1947,7 +1956,6 @@ def mean(
         Name: B, dtype: float64
         """
         numeric_only_bool = self._resolve_numeric_only(numeric_only)
-
         if maybe_use_numba(engine):
             from pandas.core._numba.kernels import sliding_mean
 
@@ -1956,7 +1964,7 @@ def mean(
             result = self._cython_agg_general(
                 "mean",
                 alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool),
-                numeric_only=numeric_only_bool,
+                numeric_only=numeric_only,
             )
             return result.__finalize__(self.obj, method="groupby")
 
@@ -1981,11 +1989,10 @@ def median(self, numeric_only: bool | lib.NoDefault = lib.no_default):
             Median of values within each group.
         """
         numeric_only_bool = self._resolve_numeric_only(numeric_only)
-
         result = self._cython_agg_general(
             "median",
             alt=lambda x: Series(x).median(numeric_only=numeric_only_bool),
-            numeric_only=numeric_only_bool,
+            numeric_only=numeric_only,
         )
         return result.__finalize__(self.obj, method="groupby")
 
@@ -2180,8 +2187,6 @@ def sum(
                 "groupby_sum",
             )
         else:
-            numeric_only = self._resolve_numeric_only(numeric_only)
-
             # If we are grouping on categoricals we want unobserved categories to
             # return zero, rather than the default of NaN which the reindexing in
             # _agg_general() returns. GH #31422
@@ -2200,8 +2205,6 @@ def sum(
     def prod(
         self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0
     ):
-        numeric_only = self._resolve_numeric_only(numeric_only)
-
         return self._agg_general(
             numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
         )
@@ -3343,7 +3346,7 @@ def _get_cythonized_result(
         -------
         `Series` or `DataFrame`  with filled values
         """
-        numeric_only = self._resolve_numeric_only(numeric_only)
+        numeric_only_bool = self._resolve_numeric_only(numeric_only)
 
         if post_processing and not callable(post_processing):
             raise ValueError("'post_processing' must be a callable!")
@@ -3412,13 +3415,17 @@ def blk_func(values: ArrayLike) -> ArrayLike:
         # Operate block-wise instead of column-by-column
         is_ser = obj.ndim == 1
         mgr = self._get_data_to_aggregate()
+        orig_len = len(mgr.items)
 
-        if numeric_only:
+        if numeric_only_bool:
             mgr = mgr.get_numeric_data()
 
         res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
 
-        if not is_ser and len(res_mgr.items) != len(mgr.items):
+        if not is_ser and (
+            len(res_mgr.items) < len(mgr.items)
+            or (numeric_only is lib.no_default and len(res_mgr.items) < orig_len)
+        ):
             howstr = how.replace("group_", "")
             warn_dropping_nuisance_columns_deprecated(type(self), howstr)
 
@@ -3922,6 +3929,9 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
 
 
 def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None:
+    if how == "add":
+        # groupby internally uses "add" instead of "sum" in some places
+        how = "sum"
     warnings.warn(
         "Dropping invalid columns in "
         f"{cls.__name__}.{how} is deprecated. "

diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
@@ -96,7 +96,8 @@ def test_in_numeric_groupby(self, data_for_grouping):
                 "C": [1, 1, 1, 1, 1, 1, 1, 1],
             }
         )
-        result = df.groupby("A").sum().columns
+        with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
+            result = df.groupby("A").sum().columns
 
         if data_for_grouping.dtype._is_numeric:
             expected = pd.Index(["B", "C"])

diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -1777,7 +1777,8 @@ def test_stack_multiple_bug(self):
         multi = df.set_index(["DATE", "ID"])
         multi.columns.name = "Params"
         unst = multi.unstack("ID")
-        down = unst.resample("W-THU").mean()
+        with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
+            down = unst.resample("W-THU").mean()
 
         rs = down.stack("ID")
         xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID")

diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py
@@ -71,7 +71,8 @@ def test_metadata_propagation_indiv_groupby(self):
                 "D": np.random.randn(8),
             }
         )
-        result = df.groupby("A").sum()
+        with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
+            result = df.groupby("A").sum()
         tm.assert_metadata_equivalent(df, result)
 
     def test_metadata_propagation_indiv_resample(self):

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -103,7 +103,8 @@ def test_basic():  # TODO: split this test
     gb = df.groupby("A", observed=False)
     exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True)
     expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)})
-    result = gb.sum()
+    with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
+        result = gb.sum()
     tm.assert_frame_equal(result, expected)
 
     # GH 8623
@@ -344,7 +345,8 @@ def test_observed(observed):
     gb = df.groupby(["A", "B"], observed=observed)
     exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
     expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
-    result = gb.sum()
+    with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
+        result = gb.sum()
     if not observed:
         expected = cartesian_product_for_groupers(
             expected, [cat1, cat2], list("AB"), fill_value=0
@@ -807,8 +809,11 @@ def test_preserve_categorical_dtype():
         }
     )
     for col in ["C1", "C2"]:
-        result1 = df.groupby(by=col, as_index=False, observed=False).mean()
-        result2 = df.groupby(by=col, as_index=True, observed=False).mean().reset_index()
+        with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
+            result1 = df.groupby(by=col, as_index=False, observed=False).mean()
+            result2 = (
+                df.groupby(by=col, as_index=True, observed=False).mean().reset_index()
+            )
         expected = exp_full.reindex(columns=result1.columns)
         tm.assert_frame_equal(result1, expected)
         tm.assert_frame_equal(result2, expected)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -258,6 +258,8 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
         elif method in ["min", "max"]:
             # these have numeric_only kwarg, but default to False
             warn = FutureWarning
+        elif method in ["mean", "median", "prod", "cumprod", "sum", "cumsum"]:
+            warn = FutureWarning
 
         with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
             result = getattr(gb, method)()