From 289064f8188a59ab6eec57889bc210cdd6b1242d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 May 2021 09:48:39 -0700 Subject: [PATCH 1/6] DEPR: dropping nuisance columns in DataFrameGroupby apply, agg, transform --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/groupby/generic.py | 9 +++- pandas/core/groupby/groupby.py | 19 ++++++++ .../tests/groupby/aggregate/test_aggregate.py | 4 +- pandas/tests/groupby/aggregate/test_other.py | 14 ++++-- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/groupby/test_quantile.py | 11 ++++- .../tests/groupby/transform/test_transform.py | 43 ++++++++++++++----- 8 files changed, 87 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 6ceae4dfd8a91..9b3fa86053915 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -646,6 +646,7 @@ Deprecations - Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`) - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) +- Deprecated ignoring "nuisance columns" in :meth:`DataFrameGroupBy.agg`, :meth:`DataFrameGroupBy.apply`, and :meth:`DataFrameGroupBy.transform`; select valid columns before calling the method (:issue:`??`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5c28a15532174..23851890b4c19 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1428,7 +1428,14 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: output[i] = sgb.transform(wrapper) except TypeError: # e.g. trying to call nanmean with string values - pass + warnings.warn( + f"Dropping invalid columns in {type(self).__name__}.transform " + "is deprecated. In a future version, a TypeError will be raised. " + "Before calling .transform, select only columns which should be " + "valid for the transforming function.", + FutureWarning, + stacklevel=5, + ) else: inds.append(i) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2091d2fc484e1..3bfba65855b18 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -30,6 +30,7 @@ class providing the base-class of operations. Union, cast, ) +import warnings import numpy as np @@ -1269,6 +1270,14 @@ def _python_agg_general(self, func, *args, **kwargs): # if this function is invalid for this dtype, we will ignore it. result = self.grouper.agg_series(obj, f) except TypeError: + warnings.warn( + f"Dropping invalid columns in {type(self).__name__}.agg " + "is deprecated. In a future version, a TypeError will be raised. " + "Before calling .agg, select only columns which should be " + "valid for the aggregating function.", + FutureWarning, + stacklevel=3, + ) continue key = base.OutputKey(label=name, position=idx) @@ -2825,6 +2834,16 @@ def _get_cythonized_result( vals, inferences = pre_processing(vals) except TypeError as err: error_msg = str(err) + howstr = how.replace("group_", "") + warnings.warn( + "Dropping invalid columns in " + f"{type(self).__name__}.{howstr} is deprecated. " + "In a future version, a TypeError will be raised. " + f"Before calling .{howstr}, select only columns which " + "should be valid for the function.", + FutureWarning, + stacklevel=3, + ) continue vals = vals.astype(cython_dtype, copy=False) if needs_2d: diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index b601ba92886d9..ed8033baaf938 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -257,7 +257,8 @@ def func(ser): else: return ser.sum() - result = grouped.aggregate(func) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = grouped.aggregate(func) exp_grouped = three_group.loc[:, three_group.columns != "C"] expected = exp_grouped.groupby(["A", "B"]).aggregate(func) tm.assert_frame_equal(result, expected) @@ -1018,6 +1019,7 @@ def test_mangle_series_groupby(self): tm.assert_frame_equal(result, expected) @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") + @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") def test_with_kwargs(self): f1 = lambda x, y, b=1: x.sum() + y + b f2 = lambda x, y, b=2: x.sum() + y * b diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 681192881c301..4d30543355d47 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -44,9 +44,16 @@ def test_agg_api(): def peak_to_peak(arr): return arr.max() - arr.min() - expected = grouped.agg([peak_to_peak]) + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid", check_stacklevel=False + ): + expected = grouped.agg([peak_to_peak]) expected.columns = ["data1", "data2"] - result = grouped.agg(peak_to_peak) + + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid", check_stacklevel=False + ): + result = grouped.agg(peak_to_peak) tm.assert_frame_equal(result, expected) @@ -294,7 +301,8 @@ def raiseException(df): raise TypeError("test") with pytest.raises(TypeError, match="test"): - df.groupby(0).agg(raiseException) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + df.groupby(0).agg(raiseException) def test_series_agg_multikey(): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 83aeb29ec53df..fece1e39b5e7b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -923,7 +923,8 @@ def aggfun(ser): else: return ser.sum() - agged2 = df.groupby(keys).aggregate(aggfun) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + agged2 = df.groupby(keys).aggregate(aggfun) assert len(agged2.columns) + 1 == len(df.columns) @@ -1757,6 +1758,7 @@ def test_pivot_table_values_key_error(): @pytest.mark.parametrize( "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"] ) +@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") def test_empty_groupby(columns, keys, values, method, op, request): # GH8093 & GH26411 override_dtype = None diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 9c9d1aa881890..90437b9139594 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -155,7 +155,10 @@ def test_quantile_raises(): df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]) with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"): - df.groupby("key").quantile() + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + df.groupby("key").quantile() def test_quantile_out_of_bounds_q_raises(): @@ -236,7 +239,11 @@ def test_groupby_quantile_nullable_array(values, q): @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) def test_groupby_quantile_skips_invalid_dtype(q): df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) - result = df.groupby("a").quantile(q) + + warn = None if isinstance(q, list) else FutureWarning + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = df.groupby("a").quantile(q) + expected = df.groupby("a")[["b"]].quantile(q) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 09317cbeec658..1949d03998512 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -409,7 +409,9 @@ def test_transform_exclude_nuisance(df, duplicates): grouped = df.groupby("A") gbc = grouped["C"] - expected["C"] = gbc.transform(np.mean) + warn = FutureWarning if duplicates else None + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + expected["C"] = gbc.transform(np.mean) if duplicates: # squeeze 1-column DataFrame down to Series expected["C"] = expected["C"]["C"] @@ -422,14 +424,16 @@ def test_transform_exclude_nuisance(df, duplicates): expected["D"] = grouped["D"].transform(np.mean) expected = DataFrame(expected) - result = df.groupby("A").transform(np.mean) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("A").transform(np.mean) tm.assert_frame_equal(result, expected) def test_transform_function_aliases(df): - result = df.groupby("A").transform("mean") - expected = df.groupby("A").transform(np.mean) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("A").transform("mean") + expected = df.groupby("A").transform(np.mean) tm.assert_frame_equal(result, expected) result = df.groupby("A")["C"].transform("mean") @@ -498,7 +502,10 @@ def test_groupby_transform_with_int(): } ) with np.errstate(all="ignore"): - result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) expected = DataFrame( {"B": np.nan, "C": Series([-1, 0, 1, -1, 0, 1], dtype="float64")} ) @@ -514,7 +521,10 @@ def test_groupby_transform_with_int(): } ) with np.errstate(all="ignore"): - result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) expected = DataFrame({"B": np.nan, "C": [-1.0, 0.0, 1.0, -1.0, 0.0, 1.0]}) tm.assert_frame_equal(result, expected) @@ -522,7 +532,10 @@ def test_groupby_transform_with_int(): s = Series([2, 3, 4, 10, 5, -1]) df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"}) with np.errstate(all="ignore"): - result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid columns" + ): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) s1 = s.iloc[0:3] s1 = (s1 - s1.mean()) / s1.std() @@ -532,7 +545,8 @@ def test_groupby_transform_with_int(): tm.assert_frame_equal(result, expected) # int doesn't get downcasted - result = df.groupby("A").transform(lambda x: x * 2 / 2) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"): + result = df.groupby("A").transform(lambda x: x * 2 / 2) expected = DataFrame({"B": 1.0, "C": [2.0, 3.0, 4.0, 10.0, 5.0, -1.0]}) tm.assert_frame_equal(result, expected) @@ -791,7 +805,11 @@ def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request): {"a": date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)} ) - result = df.groupby("b")[cols].transform(agg_func) + warn = FutureWarning + if isinstance(exp, Series) or agg_func != "size": + warn = None + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = df.groupby("b")[cols].transform(agg_func) if agg_func == "rank": exp = exp.astype("float") @@ -1103,7 +1121,12 @@ def test_transform_agg_by_name(request, reduction_func, obj): args = {"nth": [0], "quantile": [0.5], "corrwith": [obj]}.get(func, []) - result = g.transform(func, *args) + warn = None + if isinstance(obj, DataFrame) and func == "size": + warn = FutureWarning + + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = g.transform(func, *args) # this is the *definition* of a transformation tm.assert_index_equal(result.index, obj.index) From 1dac8b233e416c4cebfe1f87928200feafc87713 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 May 2021 09:53:27 -0700 Subject: [PATCH 2/6] GH ref --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9b3fa86053915..2f8eb3aca253d 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -646,7 +646,7 @@ Deprecations - Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`) - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) -- Deprecated ignoring "nuisance columns" in :meth:`DataFrameGroupBy.agg`, :meth:`DataFrameGroupBy.apply`, and :meth:`DataFrameGroupBy.transform`; select valid columns before calling the method (:issue:`??`) +- Deprecated ignoring "nuisance columns" in :meth:`DataFrameGroupBy.agg`, :meth:`DataFrameGroupBy.apply`, and :meth:`DataFrameGroupBy.transform`; select valid columns before calling the method (:issue:`41475`) .. --------------------------------------------------------------------------- From b9ae4ed7ecaa1d3dd9d50051ede94ac2321ba08b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 May 2021 13:28:09 -0700 Subject: [PATCH 3/6] doc fixup --- doc/source/user_guide/groupby.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index ef6d45fa0140b..7d68f919e39d3 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1009,6 +1009,7 @@ arguments. Using a bit of metaprogramming cleverness, GroupBy now has the ability to "dispatch" method calls to the groups: .. ipython:: python + :okwarning: grouped.std() From 4ecd59565b2f29499bc71a50d9050388698d3b21 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 May 2021 15:55:20 -0700 Subject: [PATCH 4/6] troubleshoot docbuild --- doc/source/user_guide/groupby.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 7d68f919e39d3..7a55acbd3031d 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1000,6 +1000,7 @@ instance method on each data group. This is pretty easy to do by passing lambda functions: .. ipython:: python + :okwarning: grouped = df.groupby("A") grouped.agg(lambda x: x.std()) From aac3803032a6bfe4f5fdbee0520b3c70aecf31d4 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 12:27:45 -0700 Subject: [PATCH 5/6] whatsnew, catch more cases --- doc/source/whatsnew/v1.3.0.rst | 75 ++++++++++++++++++++++++++- pandas/core/groupby/generic.py | 19 +++++++ pandas/tests/groupby/test_function.py | 32 ++++++++++-- 3 files changed, 120 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9b5c2c5342183..455d29a5abe7f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -648,7 +648,80 @@ Deprecations - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) -- Deprecated ignoring "nuisance columns" in :meth:`DataFrameGroupBy.agg`, :meth:`DataFrameGroupBy.apply`, and :meth:`DataFrameGroupBy.transform`; select valid columns before calling the method (:issue:`41475`) + +.. _whatsnew_130.deprecations.nuisance_columns: + +Deprecated Dropping Nuisance Columns in DataFrame Reductions and DataFrameGroupBy Operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with +``numeric_only=None`` (the default, columns on which the reduction raises ``TypeError`` +are silently ignored and dropped from the result. This behavior is deprecated. +In a future version, the ``TypeError`` will be raised, and users will need to +select only valid columns before calling the function. + +For example: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)}) + +*Old behavior*: + +.. code-block:: ipython + + In [3]: df.prod() + Out[3]: + Out[3]: + A 24 + dtype: int64 + +*Future behavior*: + +.. code-block:: ipython + + In [4]: df.prod() + ... + TypeError: 'DatetimeArray' does not implement reduction 'prod' + + In [5]: df[["A"]].prod() + Out[5]: + A 24 + dtype: int64 + +Similarly, when applying a function to :class:`DataFrameGroupBy`, columns on which +the function raises ``TypeError`` are currently silently ignored and dropped +from the result. This behavior is deprecated. In a future version, the ``TypeError`` +will be raised, and users will need to select only valid columns before calling +the function. + +For example: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)}) + gb = df.groupby([1, 1, 2, 2]) + +*Old behavior*: + +.. code-block:: ipython + + In [4]: gb.prod(numeric_only=False) + Out[4]: + A + 1 2 + 2 12 + +.. code-block:: ipython + + In [5]: gb.prod(numeric_only=False) + ... + TypeError: datetime64 type does not support prod operations + + In [6]: gb[["A"]].prod(numeric_only=False) + Out[6]: + A + 1 2 + 2 12 .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e06918f55b4b3..50058fb9ddf24 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1090,6 +1090,15 @@ def array_func(values: ArrayLike) -> ArrayLike: if not len(new_mgr) and len(orig): # If the original Manager was already empty, no need to raise raise DataError("No numeric types to aggregate") + if len(new_mgr) < len(data): + warnings.warn( + f"Dropping invalid columns in {type(self).__name__}.{how} " + "is deprecated. In a future version, a TypeError will be raised. " + f"Before calling .{how}, select only columns which should be " + "valid for the function.", + FutureWarning, + stacklevel=4, + ) return self._wrap_agged_manager(new_mgr) @@ -1287,6 +1296,16 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True) res_mgr.set_axis(1, mgr.axes[1]) + if len(res_mgr) < len(mgr): + warnings.warn( + f"Dropping invalid columns in {type(self).__name__}.{how} " + "is deprecated. In a future version, a TypeError will be raised. " + f"Before calling .{how}, select only columns which should be " + "valid for the transforming function.", + FutureWarning, + stacklevel=4, + ) + res_df = self.obj._constructor(res_mgr) if self.axis == 1: res_df = res_df.T diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 3f43c34b6eb34..8ad2e9c0ab6d8 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -87,13 +87,15 @@ def test_max_min_object_multiple_columns(using_array_manager): gb = df.groupby("A") - result = gb.max(numeric_only=False) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.max(numeric_only=False) # "max" is valid for column "C" but not for "B" ei = Index([1, 2, 3], name="A") expected = DataFrame({"C": ["b", "d", "e"]}, index=ei) tm.assert_frame_equal(result, expected) - result = gb.min(numeric_only=False) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.min(numeric_only=False) # "min" is valid for column "C" but not for "B" ei = Index([1, 2, 3], name="A") expected = DataFrame({"C": ["a", "c", "e"]}, index=ei) @@ -221,7 +223,10 @@ def test_averages(self, df, method): ], ) - result = getattr(gb, method)(numeric_only=False) + with tm.assert_produces_warning( + FutureWarning, match="Dropping invalid", check_stacklevel=False + ): + result = getattr(gb, method)(numeric_only=False) tm.assert_frame_equal(result.reindex_like(expected), expected) expected_columns = expected.columns @@ -303,10 +308,27 @@ def test_cummin_cummax(self, df, method): def _check(self, df, method, expected_columns, expected_columns_numeric): gb = df.groupby("group") - result = getattr(gb, method)() + # cummin, cummax dont have numeric_only kwarg, always use False + warn = None + if method in ["cummin", "cummax"]: + # these dont have numeric_only kwarg, always use False + warn = FutureWarning + elif method in ["min", "max"]: + # these have numeric_only kwarg, but default to False + warn = FutureWarning + + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = getattr(gb, method)() + tm.assert_index_equal(result.columns, expected_columns_numeric) - result = getattr(gb, method)(numeric_only=False) + # GH#41475 deprecated silently ignoring nuisance columns + warn = None + if len(expected_columns) < len(gb._obj_with_exclusions.columns): + warn = FutureWarning + with tm.assert_produces_warning(warn, match="Dropping invalid columns"): + result = getattr(gb, method)(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) From 998235577e9ae805794e9e6c916ea4134fbcd31e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 May 2021 14:24:38 -0700 Subject: [PATCH 6/6] deduplicate whatsnew --- doc/source/whatsnew/v1.3.0.rst | 42 +++------------------------------- 1 file changed, 3 insertions(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 9e7f6c457a2cf..e2e05d98845f6 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -720,48 +720,12 @@ For example: A 24 dtype: int64 -.. _whatsnew_130.deprecations.nuisance_columns: - -Deprecated Dropping Nuisance Columns in DataFrame Reductions and DataFrameGroupBy Operations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with -``numeric_only=None`` (the default, columns on which the reduction raises ``TypeError`` -are silently ignored and dropped from the result. This behavior is deprecated. -In a future version, the ``TypeError`` will be raised, and users will need to -select only valid columns before calling the function. - -For example: - -.. ipython:: python - - df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)}) - -*Old behavior*: - -.. code-block:: ipython - - In [3]: df.prod() - Out[3]: - Out[3]: - A 24 - dtype: int64 - -*Future behavior*: - -.. code-block:: ipython - - In [4]: df.prod() - ... - TypeError: 'DatetimeArray' does not implement reduction 'prod' - - In [5]: df[["A"]].prod() - Out[5]: - A 24 - dtype: int64 Similarly, when applying a function to :class:`DataFrameGroupBy`, columns on which the function raises ``TypeError`` are currently silently ignored and dropped -from the result. This behavior is deprecated. In a future version, the ``TypeError`` +from the result. + +This behavior is deprecated. In a future version, the ``TypeError`` will be raised, and users will need to select only valid columns before calling the function.