diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 71cef46950e12..03b0992fa1a64 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -27,6 +27,7 @@ import numpy as np from pandas._libs import reduction as libreduction +import pandas._libs.lib as lib from pandas._typing import ( ArrayLike, Manager, @@ -1102,9 +1103,14 @@ def _wrap_applied_output_series( return self._reindex_output(result) def _cython_transform( - self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + self, + how: str, + numeric_only: bool | lib.NoDefault = lib.no_default, + axis: int = 0, + **kwargs, ) -> DataFrame: assert axis == 0 # handled by caller + numeric_only_bool = self._resolve_numeric_only(numeric_only) # TODO: no tests with self.ndim == 1 for DataFrameGroupBy # With self.axis == 0, we have multi-block tests @@ -1113,7 +1119,8 @@ def _cython_transform( # With self.axis == 1, _get_data_to_aggregate does a transpose # so we always have a single block. mgr: Manager2D = self._get_data_to_aggregate() - if numeric_only: + orig_len = len(mgr) + if numeric_only_bool: mgr = mgr.get_numeric_data(copy=False) def arr_func(bvalues: ArrayLike) -> ArrayLike: @@ -1126,7 +1133,9 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True) res_mgr.set_axis(1, mgr.axes[1]) - if len(res_mgr) < len(mgr): + if len(res_mgr) < len(mgr) or ( + numeric_only is lib.no_default and len(res_mgr) < orig_len + ): warn_dropping_nuisance_columns_deprecated(type(self), how) res_df = self.obj._constructor(res_mgr) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4eb907e06adf1..69b11c0671210 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1494,7 +1494,7 @@ def _python_agg_general(self, func, *args, **kwargs): @final def _agg_general( self, - numeric_only: bool = True, + numeric_only: bool | lib.NoDefault = True, min_count: int = -1, *, alias: str, @@ -1553,15 +1553,21 @@ def _agg_py_fallback( @final def _cython_agg_general( - self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1 + self, + how: str, + alt: Callable, + numeric_only: bool | lib.NoDefault, + min_count: int = -1, ): # Note: we never get here with how="ohlc" for DataFrameGroupBy; # that goes through SeriesGroupBy data = self._get_data_to_aggregate() + orig_len = len(data) is_ser = data.ndim == 1 + numeric_only_bool = self._resolve_numeric_only(numeric_only) - if numeric_only: + if numeric_only_bool: if is_ser and not is_numeric_dtype(self._selected_obj.dtype): # GH#41291 match Series behavior kwd_name = "numeric_only" @@ -1591,7 +1597,10 @@ def array_func(values: ArrayLike) -> ArrayLike: # continue and exclude the block new_mgr = data.grouped_reduce(array_func, ignore_failures=True) - if not is_ser and len(new_mgr) < len(data): + if not is_ser and ( + len(new_mgr) < len(data) + or (numeric_only is lib.no_default and len(new_mgr) < orig_len) + ): warn_dropping_nuisance_columns_deprecated(type(self), how) res = self._wrap_agged_manager(new_mgr) @@ -1947,7 +1956,6 @@ def mean( Name: B, dtype: float64 """ numeric_only_bool = self._resolve_numeric_only(numeric_only) - if maybe_use_numba(engine): from pandas.core._numba.kernels import sliding_mean @@ -1956,7 +1964,7 @@ def mean( result = self._cython_agg_general( "mean", alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool), - numeric_only=numeric_only_bool, + numeric_only=numeric_only, ) return result.__finalize__(self.obj, method="groupby") @@ -1981,11 +1989,10 @@ def median(self, numeric_only: bool | lib.NoDefault = lib.no_default): Median of values within each group. """ numeric_only_bool = self._resolve_numeric_only(numeric_only) - result = self._cython_agg_general( "median", alt=lambda x: Series(x).median(numeric_only=numeric_only_bool), - numeric_only=numeric_only_bool, + numeric_only=numeric_only, ) return result.__finalize__(self.obj, method="groupby") @@ -2180,8 +2187,6 @@ def sum( "groupby_sum", ) else: - numeric_only = self._resolve_numeric_only(numeric_only) - # If we are grouping on categoricals we want unobserved categories to # return zero, rather than the default of NaN which the reindexing in # _agg_general() returns. GH #31422 @@ -2200,8 +2205,6 @@ def sum( def prod( self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0 ): - numeric_only = self._resolve_numeric_only(numeric_only) - return self._agg_general( numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod ) @@ -3343,7 +3346,7 @@ def _get_cythonized_result( ------- `Series` or `DataFrame` with filled values """ - numeric_only = self._resolve_numeric_only(numeric_only) + numeric_only_bool = self._resolve_numeric_only(numeric_only) if post_processing and not callable(post_processing): raise ValueError("'post_processing' must be a callable!") @@ -3412,13 +3415,17 @@ def blk_func(values: ArrayLike) -> ArrayLike: # Operate block-wise instead of column-by-column is_ser = obj.ndim == 1 mgr = self._get_data_to_aggregate() + orig_len = len(mgr.items) - if numeric_only: + if numeric_only_bool: mgr = mgr.get_numeric_data() res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True) - if not is_ser and len(res_mgr.items) != len(mgr.items): + if not is_ser and ( + len(res_mgr.items) < len(mgr.items) + or (numeric_only is lib.no_default and len(res_mgr.items) < orig_len) + ): howstr = how.replace("group_", "") warn_dropping_nuisance_columns_deprecated(type(self), howstr) @@ -3922,6 +3929,9 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None: + if how == "add": + # groupby internally uses "add" instead of "sum" in some places + how = "sum" warnings.warn( "Dropping invalid columns in " f"{cls.__name__}.{how} is deprecated. " diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index db0190d488d42..6d72a9e6a8348 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -96,7 +96,8 @@ def test_in_numeric_groupby(self, data_for_grouping): "C": [1, 1, 1, 1, 1, 1, 1, 1], } ) - result = df.groupby("A").sum().columns + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby("A").sum().columns if data_for_grouping.dtype._is_numeric: expected = pd.Index(["B", "C"]) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 7efcc0ce8556b..fd89c1586f2ba 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1777,7 +1777,8 @@ def test_stack_multiple_bug(self): multi = df.set_index(["DATE", "ID"]) multi.columns.name = "Params" unst = multi.unstack("ID") - down = unst.resample("W-THU").mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + down = unst.resample("W-THU").mean() rs = down.stack("ID") xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID") diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 2b248afb42057..7f384758a2724 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -71,7 +71,8 @@ def test_metadata_propagation_indiv_groupby(self): "D": np.random.randn(8), } ) - result = df.groupby("A").sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby("A").sum() tm.assert_metadata_equivalent(df, result) def test_metadata_propagation_indiv_resample(self): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 4f17a83b45ab7..21abfee8813e5 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -103,7 +103,8 @@ def test_basic(): # TODO: split this test gb = df.groupby("A", observed=False) exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True) expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)}) - result = gb.sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.sum() tm.assert_frame_equal(result, expected) # GH 8623 @@ -344,7 +345,8 @@ def test_observed(observed): gb = df.groupby(["A", "B"], observed=observed) exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) - result = gb.sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.sum() if not observed: expected = cartesian_product_for_groupers( expected, [cat1, cat2], list("AB"), fill_value=0 @@ -807,8 +809,11 @@ def test_preserve_categorical_dtype(): } ) for col in ["C1", "C2"]: - result1 = df.groupby(by=col, as_index=False, observed=False).mean() - result2 = df.groupby(by=col, as_index=True, observed=False).mean().reset_index() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result1 = df.groupby(by=col, as_index=False, observed=False).mean() + result2 = ( + df.groupby(by=col, as_index=True, observed=False).mean().reset_index() + ) expected = exp_full.reindex(columns=result1.columns) tm.assert_frame_equal(result1, expected) tm.assert_frame_equal(result2, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 1555e9d02c8ca..ab8b049a9facc 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -258,6 +258,8 @@ def _check(self, df, method, expected_columns, expected_columns_numeric): elif method in ["min", "max"]: # these have numeric_only kwarg, but default to False warn = FutureWarning + elif method in ["mean", "median", "prod", "cumprod", "sum", "cumsum"]: + warn = FutureWarning with tm.assert_produces_warning(warn, match="Dropping invalid columns"): result = getattr(gb, method)() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d48477618767c..813f3e8c104f4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -473,13 +473,16 @@ def test_frame_groupby_columns(tsframe): def test_frame_set_name_single(df): grouped = df.groupby("A") - result = grouped.mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = grouped.mean() assert result.index.name == "A" - result = df.groupby("A", as_index=False).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby("A", as_index=False).mean() assert result.index.name != "A" - result = grouped.agg(np.mean) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = grouped.agg(np.mean) assert result.index.name == "A" result = grouped.agg({"C": np.mean, "D": np.std}) @@ -502,7 +505,8 @@ def test_multi_func(df): col2 = df["B"] grouped = df.groupby([col1.get, col2.get]) - agged = grouped.mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + agged = grouped.mean() expected = df.groupby(["A", "B"]).mean() # TODO groupby get drops names @@ -658,13 +662,15 @@ def test_groupby_as_index_agg(df): # single-key - result = grouped.agg(np.mean) - expected = grouped.mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = grouped.agg(np.mean) + expected = grouped.mean() tm.assert_frame_equal(result, expected) result2 = grouped.agg({"C": np.mean, "D": np.sum}) - expected2 = grouped.mean() - expected2["D"] = grouped.sum()["D"] + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + expected2 = grouped.mean() + expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) grouped = df.groupby("A", as_index=True) @@ -746,7 +752,8 @@ def test_as_index_series_return_frame(df): grouped2 = df.groupby(["A", "B"], as_index=False) result = grouped["C"].agg(np.sum) - expected = grouped.agg(np.sum).loc[:, ["A", "C"]] + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + expected = grouped.agg(np.sum).loc[:, ["A", "C"]] assert isinstance(result, DataFrame) tm.assert_frame_equal(result, expected) @@ -756,7 +763,8 @@ def test_as_index_series_return_frame(df): tm.assert_frame_equal(result2, expected2) result = grouped["C"].sum() - expected = grouped.sum().loc[:, ["A", "C"]] + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + expected = grouped.sum().loc[:, ["A", "C"]] assert isinstance(result, DataFrame) tm.assert_frame_equal(result, expected) @@ -780,8 +788,9 @@ def test_groupby_as_index_cython(df): # single-key grouped = data.groupby("A", as_index=False) - result = grouped.mean() - expected = data.groupby(["A"]).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = grouped.mean() + expected = data.groupby(["A"]).mean() expected.insert(0, "A", expected.index) expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) @@ -850,15 +859,17 @@ def test_groupby_multi_corner(df): def test_omit_nuisance(df): grouped = df.groupby("A") - agged = grouped.agg(np.mean) - exp = grouped.mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + agged = grouped.agg(np.mean) + exp = grouped.mean() tm.assert_frame_equal(agged, exp) df = df.loc[:, ["A", "C", "D"]] df["E"] = datetime.now() grouped = df.groupby("A") - result = grouped.agg(np.sum) - expected = grouped.sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = grouped.agg(np.sum) + expected = grouped.sum() tm.assert_frame_equal(result, expected) # won't work with axis = 1 @@ -889,7 +900,8 @@ def test_keep_nuisance_agg(df, agg_function): def test_omit_nuisance_agg(df, agg_function): # GH 38774, GH 38815 grouped = df.groupby("A") - result = getattr(grouped, agg_function)() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = getattr(grouped, agg_function)() expected = getattr(df.loc[:, ["A", "C", "D"]].groupby("A"), agg_function)() tm.assert_frame_equal(result, expected) @@ -906,8 +918,9 @@ def test_omit_nuisance_warnings(df): def test_omit_nuisance_python_multiple(three_group): grouped = three_group.groupby(["A", "B"]) - agged = grouped.agg(np.mean) - exp = grouped.mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + agged = grouped.agg(np.mean) + exp = grouped.mean() tm.assert_frame_equal(agged, exp) @@ -924,8 +937,9 @@ def test_empty_groups_corner(mframe): ) grouped = df.groupby(["k1", "k2"]) - result = grouped.agg(np.mean) - expected = grouped.mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = grouped.agg(np.mean) + expected = grouped.mean() tm.assert_frame_equal(result, expected) grouped = mframe[3:5].groupby(level=0) @@ -947,7 +961,8 @@ def test_wrap_aggregated_output_multindex(mframe): df["baz", "two"] = "peekaboo" keys = [np.array([0, 0, 1]), np.array([0, 0, 1])] - agged = df.groupby(keys).agg(np.mean) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + agged = df.groupby(keys).agg(np.mean) assert isinstance(agged.columns, MultiIndex) def aggfun(ser): @@ -1108,15 +1123,17 @@ def test_groupby_with_hier_columns(): # add a nuisance column sorted_columns, _ = columns.sortlevel(0) df["A", "foo"] = "bar" - result = df.groupby(level=0).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby(level=0).mean() tm.assert_index_equal(result.columns, df.columns[:-1]) def test_grouping_ndarray(df): grouped = df.groupby(df["A"].values) - result = grouped.sum() - expected = df.groupby("A").sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = grouped.sum() + expected = df.groupby("A").sum() tm.assert_frame_equal( result, expected, check_names=False ) # Note: no names when grouping by value @@ -1144,13 +1161,15 @@ def test_groupby_wrong_multi_labels(): def test_groupby_series_with_name(df): - result = df.groupby(df["A"]).mean() - result2 = df.groupby(df["A"], as_index=False).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby(df["A"]).mean() + result2 = df.groupby(df["A"], as_index=False).mean() assert result.index.name == "A" assert "A" in result2 - result = df.groupby([df["A"], df["B"]]).mean() - result2 = df.groupby([df["A"], df["B"]], as_index=False).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby([df["A"], df["B"]]).mean() + result2 = df.groupby([df["A"], df["B"]], as_index=False).mean() assert result.index.names == ("A", "B") assert "A" in result2 assert "B" in result2 @@ -1296,8 +1315,9 @@ def test_groupby_unit64_float_conversion(): def test_groupby_list_infer_array_like(df): - result = df.groupby(list(df["A"])).mean() - expected = df.groupby(df["A"]).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby(list(df["A"])).mean() + expected = df.groupby(df["A"]).mean() tm.assert_frame_equal(result, expected, check_names=False) with pytest.raises(KeyError, match=r"^'foo'$"): @@ -1410,7 +1430,8 @@ def test_groupby_2d_malformed(): d["zeros"] = [0, 0] d["ones"] = [1, 1] d["label"] = ["l1", "l2"] - tmp = d.groupby(["group"]).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + tmp = d.groupby(["group"]).mean() res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) @@ -1576,10 +1597,12 @@ def f(group): def test_no_dummy_key_names(df): # see gh-1291 - result = df.groupby(df["A"].values).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby(df["A"].values).sum() assert result.index.name is None - result = df.groupby([df["A"].values, df["B"].values]).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby([df["A"].values, df["B"].values]).sum() assert result.index.names == (None, None) @@ -2556,7 +2579,8 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): ) gb = df.groupby(by=["x"]) - result = gb.sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = gb.sum() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 3f83bc06e6c38..febc187ec0b6b 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -109,5 +109,6 @@ def test_groupby_resample_preserves_subclass(obj): df = df.set_index("Date") # Confirm groupby.resample() preserves dataframe type - result = df.groupby("Buyer").resample("5D").sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby("Buyer").resample("5D").sum() assert isinstance(result, obj) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index efb0b82f58e97..b1b14475b90f9 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -59,8 +59,9 @@ def test_column_select_via_attr(self, df): tm.assert_series_equal(result, expected) df["mean"] = 1.5 - result = df.groupby("A").mean() - expected = df.groupby("A").agg(np.mean) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby("A").mean() + expected = df.groupby("A").agg(np.mean) tm.assert_frame_equal(result, expected) def test_getitem_list_of_columns(self): @@ -284,25 +285,29 @@ def test_grouper_column_and_index(self): {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]}, index=idx, ) - result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() - expected = df_multi.reset_index().groupby(["B", "inner"]).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_multi.reset_index().groupby(["B", "inner"]).mean() tm.assert_frame_equal(result, expected) # Test the reverse grouping order - result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() - expected = df_multi.reset_index().groupby(["inner", "B"]).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_multi.reset_index().groupby(["inner", "B"]).mean() tm.assert_frame_equal(result, expected) # Grouping a single-index frame by a column and the index should # be equivalent to resetting the index and grouping by two columns df_single = df_multi.reset_index("outer") - result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() - expected = df_single.reset_index().groupby(["B", "inner"]).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_single.reset_index().groupby(["B", "inner"]).mean() tm.assert_frame_equal(result, expected) # Test the reverse grouping order - result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() - expected = df_single.reset_index().groupby(["inner", "B"]).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_single.reset_index().groupby(["inner", "B"]).mean() tm.assert_frame_equal(result, expected) def test_groupby_levels_and_columns(self): @@ -376,8 +381,9 @@ def test_empty_groups(self, df): def test_groupby_grouper(self, df): grouped = df.groupby("A") - result = df.groupby(grouped.grouper).mean() - expected = grouped.mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby(grouped.grouper).mean() + expected = grouped.mean() tm.assert_frame_equal(result, expected) def test_groupby_dict_mapping(self): diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index 971a447b84cae..335f104cc3336 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -47,8 +47,9 @@ def series(): ], ) def test_grouper_index_level_as_string(frame, key_strs, groupers): - result = frame.groupby(key_strs).mean() - expected = frame.groupby(groupers).mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = frame.groupby(key_strs).mean() + expected = frame.groupby(groupers).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 1229251f88c7d..d631e0a135d06 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -65,7 +65,8 @@ def g(dfgb, arg2): def h(df, arg3): return df.x + df.y - arg3 - result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100) # Assert the results here index = Index(["A", "B", "C"], name="group") diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index d4b21633309db..8d317344b1c4a 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -105,14 +105,17 @@ def test_groupby_with_timegrouper(self): ) expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") - result1 = df.resample("5D").sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result1 = df.resample("5D").sum() tm.assert_frame_equal(result1, expected) df_sorted = df.sort_index() - result2 = df_sorted.groupby(Grouper(freq="5D")).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result2 = df_sorted.groupby(Grouper(freq="5D")).sum() tm.assert_frame_equal(result2, expected) - result3 = df.groupby(Grouper(freq="5D")).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result3 = df.groupby(Grouper(freq="5D")).sum() tm.assert_frame_equal(result3, expected) @pytest.mark.parametrize("should_sort", [True, False]) @@ -186,7 +189,8 @@ def test_timegrouper_with_reg_groups(self): } ).set_index(["Date", "Buyer"]) - result = df.groupby([Grouper(freq="A"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby([Grouper(freq="A"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) expected = DataFrame( @@ -201,7 +205,8 @@ def test_timegrouper_with_reg_groups(self): ], } ).set_index(["Date", "Buyer"]) - result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) df_original = DataFrame( @@ -239,10 +244,12 @@ def test_timegrouper_with_reg_groups(self): } ).set_index(["Date", "Buyer"]) - result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) - result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby([Grouper(freq="1M"), "Buyer"]).sum() expected = DataFrame( { "Buyer": "Carl Joe Mark".split(), @@ -258,11 +265,15 @@ def test_timegrouper_with_reg_groups(self): # passing the name df = df.reset_index() - result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() tm.assert_frame_equal(result, expected) - with pytest.raises(KeyError, match="'The grouper name foo is not found'"): - df.groupby([Grouper(freq="1M", key="foo"), "Buyer"]).sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + with pytest.raises( + KeyError, match="'The grouper name foo is not found'" + ): + df.groupby([Grouper(freq="1M", key="foo"), "Buyer"]).sum() # passing the level df = df.set_index("Date") diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 86e0411ee3334..f82fbf71286b1 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -88,9 +88,9 @@ def test_groupby_resample_on_api(): } ) - expected = df.set_index("dates").groupby("key").resample("D").mean() - - result = df.groupby("key").resample("D", on="dates").mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + expected = df.set_index("dates").groupby("key").resample("D").mean() + result = df.groupby("key").resample("D", on="dates").mean() tm.assert_frame_equal(result, expected) @@ -169,7 +169,8 @@ def tests_skip_nuisance(test_frame): tm.assert_frame_equal(result, expected) expected = r[["A", "B", "C"]].sum() - result = r.sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = r.sum() tm.assert_frame_equal(result, expected) @@ -607,10 +608,12 @@ def test_selection_api_validation(): exp = df_exp.resample("2D").sum() exp.index.name = "date" - tm.assert_frame_equal(exp, df.resample("2D", on="date").sum()) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + tm.assert_frame_equal(exp, df.resample("2D", on="date").sum()) exp.index.name = "d" - tm.assert_frame_equal(exp, df.resample("2D", level="d").sum()) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + tm.assert_frame_equal(exp, df.resample("2D", level="d").sum()) @pytest.mark.parametrize( diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index cae2d77dfbd3f..1a1050ead6c6e 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -408,7 +408,8 @@ def test_resample_groupby_agg(): df["date"] = pd.to_datetime(df["date"]) resampled = df.groupby("cat").resample("Y", on="date") - expected = resampled.sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + expected = resampled.sum() result = resampled.agg({"num": "sum"}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 7b932a3bb80c0..9a00f339956d2 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -555,7 +555,8 @@ def test_mixed_type_join_with_suffix(self): df.insert(5, "dt", "foo") grouped = df.groupby("id") - mn = grouped.mean() + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + mn = grouped.mean() cn = grouped.count() # it works! diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6c222669c37db..0ceae5eb1f921 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -146,8 +146,9 @@ def test_pivot_table_nocols(self): df = DataFrame( {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]} ) - rs = df.pivot_table(columns="cols", aggfunc=np.sum) - xp = df.pivot_table(index="cols", aggfunc=np.sum).T + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + rs = df.pivot_table(columns="cols", aggfunc=np.sum) + xp = df.pivot_table(index="cols", aggfunc=np.sum).T tm.assert_frame_equal(rs, xp) rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"}) @@ -903,12 +904,18 @@ def test_no_col(self): # to help with a buglet self.data.columns = [k * 2 for k in self.data.columns] - table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + table = self.data.pivot_table( + index=["AA", "BB"], margins=True, aggfunc=np.mean + ) for value_col in table.columns: totals = table.loc[("All", ""), value_col] assert totals == self.data[value_col].mean() - table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + table = self.data.pivot_table( + index=["AA", "BB"], margins=True, aggfunc="mean" + ) for item in ["DD", "EE", "FF"]: totals = table.loc[("All", ""), item] assert totals == self.data[item].mean() @@ -964,7 +971,8 @@ def test_margin_with_only_columns_defined( } ) - result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc) expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns) tm.assert_frame_equal(result, expected) @@ -1990,8 +1998,9 @@ def test_pivot_string_as_func(self): def test_pivot_string_func_vs_func(self, f, f_numpy): # GH #18713 # for consistency purposes - result = pivot_table(self.data, index="A", columns="B", aggfunc=f) - expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy) + with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"): + result = pivot_table(self.data, index="A", columns="B", aggfunc=f) + expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy) tm.assert_frame_equal(result, expected) @pytest.mark.slow