pandas-dev · jreback · May 26, 2021 · May 14, 2021 · May 14, 2021 · May 14, 2021
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -1000,6 +1000,7 @@ instance method on each data group. This is pretty easy to do by passing lambda
 functions:
 
 .. ipython:: python
+   :okwarning:
 
    grouped = df.groupby("A")
    grouped.agg(lambda x: x.std())
@@ -1009,6 +1010,7 @@ arguments. Using a bit of metaprogramming cleverness, GroupBy now has the
 ability to "dispatch" method calls to the groups:
 
 .. ipython:: python
+   :okwarning:
 
    grouped.std()
 

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -647,6 +647,7 @@ Deprecations
 - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`)
 - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`)
 - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
+- Deprecated ignoring "nuisance columns" in :meth:`DataFrameGroupBy.agg`, :meth:`DataFrameGroupBy.apply`, and :meth:`DataFrameGroupBy.transform`; select valid columns before calling the method (:issue:`41475`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1428,7 +1428,14 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
                 output[i] = sgb.transform(wrapper)
             except TypeError:
                 # e.g. trying to call nanmean with string values
-                pass
+                warnings.warn(
+                    f"Dropping invalid columns in {type(self).__name__}.transform "
+                    "is deprecated. In a future version, a TypeError will be raised. "
+                    "Before calling .transform, select only columns which should be "
+                    "valid for the transforming function.",
+                    FutureWarning,
+                    stacklevel=5,
+                )
             else:
                 inds.append(i)
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -30,6 +30,7 @@ class providing the base-class of operations.
     Union,
     cast,
 )
+import warnings
 
 import numpy as np
 
@@ -1269,6 +1270,14 @@ def _python_agg_general(self, func, *args, **kwargs):
                 # if this function is invalid for this dtype, we will ignore it.
                 result = self.grouper.agg_series(obj, f)
             except TypeError:
+                warnings.warn(
+                    f"Dropping invalid columns in {type(self).__name__}.agg "
+                    "is deprecated. In a future version, a TypeError will be raised. "
+                    "Before calling .agg, select only columns which should be "
+                    "valid for the aggregating function.",
+                    FutureWarning,
+                    stacklevel=3,
+                )
                 continue
 
             key = base.OutputKey(label=name, position=idx)
@@ -2825,6 +2834,16 @@ def _get_cythonized_result(
                         vals, inferences = pre_processing(vals)
                     except TypeError as err:
                         error_msg = str(err)
+                        howstr = how.replace("group_", "")
+                        warnings.warn(
+                            "Dropping invalid columns in "
+                            f"{type(self).__name__}.{howstr} is deprecated. "
+                            "In a future version, a TypeError will be raised. "
+                            f"Before calling .{howstr}, select only columns which "
+                            "should be valid for the function.",
+                            FutureWarning,
+                            stacklevel=3,
+                        )
                         continue
                 vals = vals.astype(cython_dtype, copy=False)
                 if needs_2d:

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -257,7 +257,8 @@ def func(ser):
         else:
             return ser.sum()
 
-    result = grouped.aggregate(func)
+    with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
+        result = grouped.aggregate(func)
     exp_grouped = three_group.loc[:, three_group.columns != "C"]
     expected = exp_grouped.groupby(["A", "B"]).aggregate(func)
     tm.assert_frame_equal(result, expected)
@@ -1018,6 +1019,7 @@ def test_mangle_series_groupby(self):
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.")
+    @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning")
     def test_with_kwargs(self):
         f1 = lambda x, y, b=1: x.sum() + y + b
         f2 = lambda x, y, b=2: x.sum() + y * b

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -44,9 +44,16 @@ def test_agg_api():
     def peak_to_peak(arr):
         return arr.max() - arr.min()
 
-    expected = grouped.agg([peak_to_peak])
+    with tm.assert_produces_warning(
+        FutureWarning, match="Dropping invalid", check_stacklevel=False
+    ):
+        expected = grouped.agg([peak_to_peak])
     expected.columns = ["data1", "data2"]
-    result = grouped.agg(peak_to_peak)
+
+    with tm.assert_produces_warning(
+        FutureWarning, match="Dropping invalid", check_stacklevel=False
+    ):
+        result = grouped.agg(peak_to_peak)
     tm.assert_frame_equal(result, expected)
 
 
@@ -294,7 +301,8 @@ def raiseException(df):
         raise TypeError("test")
 
     with pytest.raises(TypeError, match="test"):
-        df.groupby(0).agg(raiseException)
+        with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
+            df.groupby(0).agg(raiseException)
 
 
 def test_series_agg_multikey():

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -923,7 +923,8 @@ def aggfun(ser):
         else:
             return ser.sum()
 
-    agged2 = df.groupby(keys).aggregate(aggfun)
+    with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
+        agged2 = df.groupby(keys).aggregate(aggfun)
     assert len(agged2.columns) + 1 == len(df.columns)
 
 
@@ -1757,6 +1758,7 @@ def test_pivot_table_values_key_error():
 @pytest.mark.parametrize(
     "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"]
 )
+@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning")
 def test_empty_groupby(columns, keys, values, method, op, request):
     # GH8093 & GH26411
     override_dtype = None

diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
@@ -155,7 +155,10 @@ def test_quantile_raises():
     df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"])
 
     with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
-        df.groupby("key").quantile()
+        with tm.assert_produces_warning(
+            FutureWarning, match="Dropping invalid columns"
+        ):
+            df.groupby("key").quantile()
 
 
 def test_quantile_out_of_bounds_q_raises():
@@ -236,7 +239,11 @@ def test_groupby_quantile_nullable_array(values, q):
 @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
 def test_groupby_quantile_skips_invalid_dtype(q):
     df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]})
-    result = df.groupby("a").quantile(q)
+
+    warn = None if isinstance(q, list) else FutureWarning
+    with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
+        result = df.groupby("a").quantile(q)
+
     expected = df.groupby("a")[["b"]].quantile(q)
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -409,7 +409,9 @@ def test_transform_exclude_nuisance(df, duplicates):
     grouped = df.groupby("A")
 
     gbc = grouped["C"]
-    expected["C"] = gbc.transform(np.mean)
+    warn = FutureWarning if duplicates else None
+    with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
+        expected["C"] = gbc.transform(np.mean)
     if duplicates:
         # squeeze 1-column DataFrame down to Series
         expected["C"] = expected["C"]["C"]
@@ -422,14 +424,16 @@ def test_transform_exclude_nuisance(df, duplicates):
 
     expected["D"] = grouped["D"].transform(np.mean)
     expected = DataFrame(expected)
-    result = df.groupby("A").transform(np.mean)
+    with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
+        result = df.groupby("A").transform(np.mean)
 
     tm.assert_frame_equal(result, expected)
 
 
 def test_transform_function_aliases(df):
-    result = df.groupby("A").transform("mean")
-    expected = df.groupby("A").transform(np.mean)
+    with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
+        result = df.groupby("A").transform("mean")
+        expected = df.groupby("A").transform(np.mean)
     tm.assert_frame_equal(result, expected)
 
     result = df.groupby("A")["C"].transform("mean")
@@ -498,7 +502,10 @@ def test_groupby_transform_with_int():
         }
     )
     with np.errstate(all="ignore"):
-        result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
+        with tm.assert_produces_warning(
+            FutureWarning, match="Dropping invalid columns"
+        ):
+            result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
     expected = DataFrame(
         {"B": np.nan, "C": Series([-1, 0, 1, -1, 0, 1], dtype="float64")}
     )
@@ -514,15 +521,21 @@ def test_groupby_transform_with_int():
         }
     )
     with np.errstate(all="ignore"):
-        result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
+        with tm.assert_produces_warning(
+            FutureWarning, match="Dropping invalid columns"
+        ):
+            result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
     expected = DataFrame({"B": np.nan, "C": [-1.0, 0.0, 1.0, -1.0, 0.0, 1.0]})
     tm.assert_frame_equal(result, expected)
 
     # int that needs float conversion
     s = Series([2, 3, 4, 10, 5, -1])
     df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
     with np.errstate(all="ignore"):
-        result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
+        with tm.assert_produces_warning(
+            FutureWarning, match="Dropping invalid columns"
+        ):
+            result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
 
     s1 = s.iloc[0:3]
     s1 = (s1 - s1.mean()) / s1.std()
@@ -532,7 +545,8 @@ def test_groupby_transform_with_int():
     tm.assert_frame_equal(result, expected)
 
     # int doesn't get downcasted
-    result = df.groupby("A").transform(lambda x: x * 2 / 2)
+    with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
+        result = df.groupby("A").transform(lambda x: x * 2 / 2)
     expected = DataFrame({"B": 1.0, "C": [2.0, 3.0, 4.0, 10.0, 5.0, -1.0]})
     tm.assert_frame_equal(result, expected)
 
@@ -791,7 +805,11 @@ def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request):
         {"a": date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)}
     )
 
-    result = df.groupby("b")[cols].transform(agg_func)
+    warn = FutureWarning
+    if isinstance(exp, Series) or agg_func != "size":
+        warn = None
+    with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
+        result = df.groupby("b")[cols].transform(agg_func)
 
     if agg_func == "rank":
         exp = exp.astype("float")
@@ -1103,7 +1121,12 @@ def test_transform_agg_by_name(request, reduction_func, obj):
 
     args = {"nth": [0], "quantile": [0.5], "corrwith": [obj]}.get(func, [])
 
-    result = g.transform(func, *args)
+    warn = None
+    if isinstance(obj, DataFrame) and func == "size":
+        warn = FutureWarning
+
+    with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
+        result = g.transform(func, *args)
 
     # this is the *definition* of a transformation
     tm.assert_index_equal(result.index, obj.index)