From ff63216c6a39b1b7994958440c5d46c476aa9361 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 May 2021 18:35:55 -0700 Subject: [PATCH 1/8] DEPR: dropping nuisance columns in DataFrame reductions --- pandas/core/frame.py | 28 ++++ pandas/tests/apply/test_frame_apply.py | 10 +- pandas/tests/apply/test_invalid_arg.py | 1 + pandas/tests/frame/methods/test_quantile.py | 3 +- pandas/tests/frame/methods/test_rank.py | 1 + pandas/tests/frame/test_arithmetic.py | 1 + pandas/tests/frame/test_reductions.py | 142 ++++++++++++++------ pandas/tests/frame/test_subclass.py | 1 + pandas/tests/groupby/test_apply.py | 3 +- pandas/tests/groupby/test_categorical.py | 6 +- pandas/tests/groupby/test_function.py | 3 + pandas/tests/groupby/test_groupby.py | 1 + 12 files changed, 153 insertions(+), 47 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2941b6ac01904..7bfd555fa995c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9800,6 +9800,21 @@ def _get_data() -> DataFrame: # Even if we are object dtype, follow numpy and return # float64, see test_apply_funcs_over_empty out = out.astype(np.float64) + + if numeric_only is None and out.shape[0] != df.shape[1]: + # columns have been dropped + arg_name = "numeric_only" + if name in ["all", "any"]: + arg_name = "bool_only" + warnings.warn( + "Dropping of nuisance columns in DataFrame reductions " + f"(with '{arg_name}=None') is deprecated; in a future " + "version this will raise TypeError. Select only valid " + "columns before calling the reduction.", + FutureWarning, + stacklevel=5, + ) + return out assert numeric_only is None @@ -9820,6 +9835,19 @@ def _get_data() -> DataFrame: with np.errstate(all="ignore"): result = func(values) + # columns have been dropped + arg_name = "numeric_only" + if name in ["all", "any"]: + arg_name = "bool_only" + warnings.warn( + "Dropping of nuisance columns in DataFrame reductions " + f"(with '{arg_name}=None') is deprecated; in a future " + "version this will raise TypeError. Select only valid " + "columns before calling the reduction.", + FutureWarning, + stacklevel=5, + ) + if hasattr(result, "dtype"): if filter_type == "bool" and notna(result).all(): result = result.astype(np.bool_) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index cee8a0218e9e8..06e5b82d43093 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1209,7 +1209,10 @@ def test_nuiscance_columns(): ) tm.assert_frame_equal(result, expected) - result = df.agg("sum") + with tm.assert_produces_warning( + FutureWarning, match="Select only valid", check_stacklevel=False + ): + result = df.agg("sum") expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) @@ -1426,8 +1429,9 @@ def test_apply_datetime_tz_issue(): @pytest.mark.parametrize("method", ["min", "max", "sum"]) def test_consistency_of_aggregates_of_columns_with_missing_values(df, method): # GH 16832 - none_in_first_column_result = getattr(df[["A", "B"]], method)() - none_in_second_column_result = getattr(df[["B", "A"]], method)() + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + none_in_first_column_result = getattr(df[["A", "B"]], method)() + none_in_second_column_result = getattr(df[["B", "A"]], method)() tm.assert_series_equal(none_in_first_column_result, none_in_second_column_result) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 698f85a04a757..83a1baa9d13d6 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -342,6 +342,7 @@ def test_transform_wont_agg_series(string_series, func): @pytest.mark.parametrize( "op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}] ) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): # GH 35964 op = op_wrapper(all_reductions) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index dbb5cb357de47..cd16644eccf0a 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -55,7 +55,8 @@ def test_quantile(self, datetime_frame): # non-numeric exclusion df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) rs = df.quantile(0.5) - xp = df.median().rename(0.5) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + xp = df.median().rename(0.5) tm.assert_series_equal(rs, xp) # axis diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 6538eda8cdeff..5ba4ab4408f11 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -248,6 +248,7 @@ def test_rank_methods_frame(self): @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) + @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_rank_descending(self, method, dtype): if "i" in dtype: diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index b9f6e72acf71b..7fe921571ee2e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1021,6 +1021,7 @@ def test_zero_len_frame_with_series_corner_cases(): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_frame_single_columns_object_sum_axis_1(): # GH 13758 data = { diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 2df59923221ec..125f6317b97a2 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -8,6 +8,8 @@ from pandas.compat import is_platform_windows import pandas.util._test_decorators as td +from pandas.core.dtypes.common import is_categorical_dtype + import pandas as pd from pandas import ( Categorical, @@ -90,7 +92,7 @@ def wrapper(x): tm.assert_series_equal( result0, frame.apply(wrapper), check_dtype=check_dtype, rtol=rtol, atol=atol ) - # HACK: win32 + # FIXME: HACK: win32 tm.assert_series_equal( result1, frame.apply(wrapper, axis=1), @@ -140,7 +142,7 @@ def wrapper(x): tm.assert_series_equal(r1, expected) -def assert_stat_op_api(opname, float_frame, float_string_frame, has_numeric_only=False): +def assert_stat_op_api(opname, float_frame, float_string_frame, has_numeric_only=True): """ Check that API for operator opname works as advertised on frame @@ -199,7 +201,7 @@ def wrapper(x): tm.assert_series_equal(result0, frame.apply(wrapper)) tm.assert_series_equal( result1, frame.apply(wrapper, axis=1), check_dtype=False - ) # HACK: win32 + ) # FIXME: HACK: win32 else: skipna_wrapper = alternative wrapper = alternative @@ -249,6 +251,7 @@ def assert_bool_op_api( # make sure op works on mixed-type frame mixed = float_string_frame mixed["_bool_"] = np.random.randn(len(mixed)) > 0.5 + getattr(mixed, opname)(axis=0) getattr(mixed, opname)(axis=1) @@ -264,21 +267,22 @@ class TestDataFrameAnalytics: # --------------------------------------------------------------------- # Reductions + @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") def test_stat_op_api(self, float_frame, float_string_frame): + assert_stat_op_api("count", float_frame, float_string_frame) + assert_stat_op_api("sum", float_frame, float_string_frame) + assert_stat_op_api( - "count", float_frame, float_string_frame, has_numeric_only=True - ) - assert_stat_op_api( - "sum", float_frame, float_string_frame, has_numeric_only=True + "nunique", float_frame, float_string_frame, has_numeric_only=False ) - - assert_stat_op_api("nunique", float_frame, float_string_frame) assert_stat_op_api("mean", float_frame, float_string_frame) assert_stat_op_api("product", float_frame, float_string_frame) assert_stat_op_api("median", float_frame, float_string_frame) assert_stat_op_api("min", float_frame, float_string_frame) assert_stat_op_api("max", float_frame, float_string_frame) - assert_stat_op_api("mad", float_frame, float_string_frame) + assert_stat_op_api( + "mad", float_frame, float_string_frame, has_numeric_only=False + ) assert_stat_op_api("var", float_frame, float_string_frame) assert_stat_op_api("std", float_frame, float_string_frame) assert_stat_op_api("sem", float_frame, float_string_frame) @@ -435,12 +439,17 @@ def test_mixed_ops(self, op): "str": ["a", "b", "c", "d"], } ) - - result = getattr(df, op)() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, op)() assert len(result) == 2 with pd.option_context("use_bottleneck", False): - result = getattr(df, op)() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, op)() assert len(result) == 2 def test_reduce_mixed_frame(self): @@ -457,7 +466,8 @@ def test_reduce_mixed_frame(self): tm.assert_numpy_array_equal( test.values, np.array([2, 150, "abcde"], dtype=object) ) - tm.assert_series_equal(test, df.T.sum(axis=1)) + alt = df.T.sum(axis=1) + tm.assert_series_equal(test, alt) def test_nunique(self): df = DataFrame({"A": [1, 1, 1], "B": [1, 2, 3], "C": [1, np.nan, 3]}) @@ -510,7 +520,10 @@ def test_mean_mixed_string_decimal(self): df = DataFrame(d) - result = df.mean() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() expected = Series([2.7, 681.6], index=["A", "C"]) tm.assert_series_equal(result, expected) @@ -740,7 +753,8 @@ def test_operators_timedelta64(self): tm.assert_series_equal(result, expected) # excludes numeric - result = mixed.min(axis=1) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = mixed.min(axis=1) expected = Series([1, 1, 1.0], index=[0, 1, 2]) tm.assert_series_equal(result, expected) @@ -801,8 +815,9 @@ def test_sum_prod_nanops(self, method, unit): idx = ["a", "b", "c"] df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]}) # The default - result = getattr(df, method) + result = getattr(df, method)() expected = Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) # min_count=1 result = getattr(df, method)(min_count=1) @@ -873,20 +888,23 @@ def test_sum_mixed_datetime(self): df = DataFrame({"A": date_range("2000", periods=4), "B": [1, 2, 3, 4]}).reindex( [2, 3, 4] ) - result = df.sum() + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = df.sum() expected = Series({"B": 7.0}) tm.assert_series_equal(result, expected) def test_mean_corner(self, float_frame, float_string_frame): # unit test when have object data - the_mean = float_string_frame.mean(axis=0) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + the_mean = float_string_frame.mean(axis=0) the_sum = float_string_frame.sum(axis=0, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) assert len(the_mean.index) < len(float_string_frame.columns) # xs sum mixed type, just want to know it works... - the_mean = float_string_frame.mean(axis=1) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + the_mean = float_string_frame.mean(axis=1) the_sum = float_string_frame.sum(axis=1, numeric_only=True) tm.assert_index_equal(the_sum.index, the_mean.index) @@ -947,10 +965,13 @@ def test_mean_extensionarray_numeric_only_true(self): def test_stats_mixed_type(self, float_string_frame): # don't blow up - float_string_frame.std(1) - float_string_frame.var(1) - float_string_frame.mean(1) - float_string_frame.skew(1) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + float_string_frame.std(1) + float_string_frame.var(1) + float_string_frame.mean(1) + float_string_frame.skew(1) def test_sum_bools(self): df = DataFrame(index=range(1), columns=range(10)) @@ -1125,7 +1146,6 @@ def test_any_all_object_dtype(self, axis, bool_agg_func, skipna): [np.nan, np.nan, "5", np.nan], ] ) - result = getattr(df, bool_agg_func)(axis=axis, skipna=skipna) expected = Series([True, True, True, True]) tm.assert_series_equal(result, expected) @@ -1224,12 +1244,23 @@ def test_any_all_bool_only(self): def test_any_all_np_func(self, func, data, expected): # GH 19976 data = DataFrame(data) - result = func(data) + + warn = None + if any(is_categorical_dtype(x) for x in data.dtypes): + warn = FutureWarning + + with tm.assert_produces_warning( + warn, match="Select only valid columns", check_stacklevel=False + ): + result = func(data) assert isinstance(result, np.bool_) assert result.item() is expected # method version - result = getattr(DataFrame(data), func.__name__)(axis=None) + with tm.assert_produces_warning( + warn, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(DataFrame(data), func.__name__)(axis=None) assert isinstance(result, np.bool_) assert result.item() is expected @@ -1349,7 +1380,6 @@ def test_min_max_dt64_with_NaT_skipna_false(self, request, tz_naive_fixture): "b": [Timestamp("2020-02-01 08:00:00", tz=tz), pd.NaT], } ) - res = df.min(axis=1, skipna=False) expected = Series([df.loc[0, "a"], pd.NaT]) assert expected.dtype == df["a"].dtype @@ -1411,12 +1441,12 @@ def test_frame_any_all_with_level(self): ], ) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, match="Using the level"): result = df.any(level=0) ex = DataFrame({"data": [False, True]}, index=["one", "two"]) tm.assert_frame_equal(result, ex) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, match="Using the level"): result = df.all(level=0) ex = DataFrame({"data": [False, False]}, index=["one", "two"]) tm.assert_frame_equal(result, ex) @@ -1463,7 +1493,7 @@ def test_reductions_deprecation_level_argument(self, frame_or_series, func): obj = frame_or_series( [1, 2, 3], index=MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]]) ) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, match="level"): getattr(obj, func)(level=0) @@ -1486,11 +1516,17 @@ def test_any_all_categorical_dtype_nuisance_column(self, method): # With bool_only=None, operating on this column raises and is ignored, # so we expect an empty result. - result = getattr(df, method)(bool_only=None) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)(bool_only=None) expected = Series([], index=Index([]), dtype=bool) tm.assert_series_equal(result, expected) - result = getattr(np, method)(df, axis=0) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df, axis=0) tm.assert_series_equal(result, expected) def test_median_categorical_dtype_nuisance_column(self): @@ -1505,7 +1541,10 @@ def test_median_categorical_dtype_nuisance_column(self): with pytest.raises(TypeError, match="does not implement reduction"): df.median(numeric_only=False) - result = df.median() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.median() expected = Series([], index=Index([]), dtype=np.float64) tm.assert_series_equal(result, expected) @@ -1515,7 +1554,10 @@ def test_median_categorical_dtype_nuisance_column(self): with pytest.raises(TypeError, match="does not implement reduction"): df.median(numeric_only=False) - result = df.median() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.median() expected = Series([2.0], index=["B"]) tm.assert_series_equal(result, expected) @@ -1539,23 +1581,35 @@ def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): with pytest.raises(TypeError, match="is not ordered for operation"): getattr(df, method)(numeric_only=False) - result = getattr(df, method)() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)() expected = Series([], index=Index([]), dtype=np.float64) tm.assert_series_equal(result, expected) - result = getattr(np, method)(df) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df) tm.assert_series_equal(result, expected) # same thing, but with an additional non-categorical column df["B"] = df["A"].astype(object) - result = getattr(df, method)() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = getattr(df, method)() if method == "min": expected = Series(["a"], index=["B"]) else: expected = Series(["c"], index=["B"]) tm.assert_series_equal(result, expected) - result = getattr(np, method)(df) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns", check_stacklevel=False + ): + result = getattr(np, method)(df) tm.assert_series_equal(result, expected) def test_reduction_object_block_splits_nuisance_columns(self): @@ -1563,14 +1617,20 @@ def test_reduction_object_block_splits_nuisance_columns(self): df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object) # We should only exclude "B", not "A" - result = df.mean() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() expected = Series([1.0], index=["A"]) tm.assert_series_equal(result, expected) # Same behavior but heterogeneous dtype df["C"] = df["A"].astype(int) + 4 - result = df.mean() + with tm.assert_produces_warning( + FutureWarning, match="Select only valid columns" + ): + result = df.mean() expected = Series([1.0, 5.0], index=["A", "C"]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 3214290465832..42474ff00ad6d 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -567,6 +567,7 @@ def stretch(row): assert not isinstance(result, tm.SubclassedDataFrame) tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings("ignore:.*None will no longer:FutureWarning") def test_subclassed_reductions(self, all_reductions): # GH 25596 diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 2f87f4a19b93f..cf4127da79bf9 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1003,7 +1003,8 @@ def test_apply_function_with_indexing_return_column(): "foo2": [1, 2, 4, 4, 5, 6], } ) - result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) + with tm.assert_produces_warning(FutureWarning, match="Select only valid"): + result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 7349664614614..f6c211d137206 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -282,7 +282,10 @@ def test_apply(ordered): # GH#21636 tracking down the xfail, in some builds np.mean(df.loc[[0]]) # is coming back as Series([0., 1., 0.], index=["missing", "dense", "values"]) # when we expect Series(0., index=["values"]) - result = grouped.apply(lambda x: np.mean(x)) + with tm.assert_produces_warning( + FutureWarning, match="Select only valid", check_stacklevel=False + ): + result = grouped.apply(lambda x: np.mean(x)) tm.assert_frame_equal(result, expected) # we coerce back to ints @@ -1289,6 +1292,7 @@ def test_groupby_categorical_axis_1(code): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_groupby_cat_preserves_structure(observed, ordered): # GH 28787 df = DataFrame( diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 3f43c34b6eb34..4fa21a259e7cb 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -333,6 +333,7 @@ def gni(self, df): return gni # TODO: non-unique columns, as_index=False + @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_idxmax(self, gb): # object dtype so idxmax goes through _aggregate_item_by_item # GH#5610 @@ -342,6 +343,7 @@ def test_idxmax(self, gb): result = gb.idxmax() tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_idxmin(self, gb): # object dtype so idxmax goes through _aggregate_item_by_item # GH#5610 @@ -524,6 +526,7 @@ def test_groupby_non_arithmetic_agg_int_like_precision(i): ("idxmax", {"c_int": [1, 3], "c_float": [0, 2], "c_date": [0, 3]}), ], ) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_idxmin_idxmax_returns_int_types(func, values): # GH 25444 df = DataFrame( diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 83aeb29ec53df..2dd9c25a4e47e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1757,6 +1757,7 @@ def test_pivot_table_values_key_error(): @pytest.mark.parametrize( "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"] ) +@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") def test_empty_groupby(columns, keys, values, method, op, request): # GH8093 & GH26411 override_dtype = None From a2a510600e813616f5f4b3bcb71c53537e3cfdb2 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 May 2021 21:17:22 -0700 Subject: [PATCH 2/8] troubleshoot docbuild --- doc/source/whatsnew/v1.2.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6dd011c588702..a8deb0643a0cc 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -394,6 +394,7 @@ instead of casting to a NumPy array which may have different semantics (:issue:` :issue:`28949`, :issue:`21020`). .. ipython:: python + :okwarning: ser = pd.Series([0, 1], dtype="category", name="A") df = ser.to_frame() @@ -411,6 +412,7 @@ instead of casting to a NumPy array which may have different semantics (:issue:` *New behavior*: .. ipython:: python + :okwarning: df.any() From 62d5c89b5770de3877c7d590a2b8dd9ecf2c90a4 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 15 May 2021 12:23:54 -0700 Subject: [PATCH 3/8] troubleshoot docbuild --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a8deb0643a0cc..36b591c3c3142 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -381,6 +381,7 @@ this pathological behavior (:issue:`37827`): *New behavior*: .. ipython:: python + :okwarning: df.mean() From e983473c839ff97026d8446cc4f5ba4dfa2ef537 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 07:44:10 -0700 Subject: [PATCH 4/8] gh ref --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3d89bdb75ab46..87293eb0374e7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9803,7 +9803,7 @@ def _get_data() -> DataFrame: out = out.astype(np.float64) if numeric_only is None and out.shape[0] != df.shape[1]: - # columns have been dropped + # columns have been dropped GH#41480 arg_name = "numeric_only" if name in ["all", "any"]: arg_name = "bool_only" @@ -9836,7 +9836,7 @@ def _get_data() -> DataFrame: with np.errstate(all="ignore"): result = func(values) - # columns have been dropped + # columns have been dropped GH#41480 arg_name = "numeric_only" if name in ["all", "any"]: arg_name = "bool_only" From fd03e6b0d16b9da84640af8268c9c63d7d6b7845 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 11:47:56 -0700 Subject: [PATCH 5/8] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 87da8a8dda8cf..36b9b3cc43a33 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -649,6 +649,45 @@ Deprecations - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) +.. _whatsnew_130.deprecations.nuisance_columns: + +Deprecated Dropping Nuisance Columns in DataFrame Reductions and DataFrameGroupBy Operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with +``numeric_only=None`` (the default, columns on which the reduction raises ``TypeError`` +are silently ignored and dropped from the result. This behavior is deprecated. +In a future version, the ``TypeError`` will be raised, and users will need to +select only valid columns before calling the function. + +For example: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)}) + +*Old behavior*: + +.. code-block:: ipython + + In [3]: df.prod() + Out[3]: + Out[3]: + A 24 + dtype: int64 + +*Future behavior*: + +.. code-block:: ipython + + In [4]: df.prod() + ... + TypeError: 'DatetimeArray' does not implement reduction 'prod' + + In [5]: df[["A"]].prod() + Out[5]: + A 24 + dtype: int64 + .. --------------------------------------------------------------------------- From a976e16e2307f01414175d222eb22b70f6c22ad0 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 18:05:55 -0700 Subject: [PATCH 6/8] requested whatsnew edits --- doc/source/whatsnew/v1.3.0.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 8022869d7be5e..53a455a241ef9 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -655,15 +655,17 @@ Deprecated Dropping Nuisance Columns in DataFrame Reductions and DataFrameGroupB ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with ``numeric_only=None`` (the default, columns on which the reduction raises ``TypeError`` -are silently ignored and dropped from the result. This behavior is deprecated. -In a future version, the ``TypeError`` will be raised, and users will need to -select only valid columns before calling the function. +are silently ignored and dropped from the result. + +This behavior is deprecated. In a future version, the ``TypeError`` will be raised, +and users will need to select only valid columns before calling the function. For example: .. ipython:: python df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)}) + df *Old behavior*: From a01a048095b6f5960685af19e5946bd2210953e5 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 18:06:28 -0700 Subject: [PATCH 7/8] requested whatsnew edits --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 53a455a241ef9..db19d953584b2 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -653,7 +653,7 @@ Deprecations Deprecated Dropping Nuisance Columns in DataFrame Reductions and DataFrameGroupBy Operations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with +The default of calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with ``numeric_only=None`` (the default, columns on which the reduction raises ``TypeError`` are silently ignored and dropped from the result. From 512c61175134a9a3ebbe74883a4a101f20ec430f Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 18 May 2021 08:49:48 -0700 Subject: [PATCH 8/8] catch warning --- pandas/tests/frame/test_reductions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index dbac407781edc..564f5d20b0301 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1704,6 +1704,7 @@ def test_groupy_regular_arithmetic_equivalent(meth): def test_frame_mixed_numeric_object_with_timestamp(ts_value): # GH 13912 df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]}) - result = df.sum() + with tm.assert_produces_warning(FutureWarning, match="Dropping of nuisance"): + result = df.sum() expected = Series([1, 1.1, "foo"], index=list("abc")) tm.assert_series_equal(result, expected)