Skip to content

Commit

Permalink
REGR: NumPy func warning when dropping nuisance in agg, apply, transf…
Browse files Browse the repository at this point in the history
…orm (#50627)

REGR: Warnings for NumPy funcs when dropping nuisance in agg, apply, transform
  • Loading branch information
rhshadrach authored Jan 16, 2023
1 parent 54b4037 commit ce123cd
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 13 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Fixed regressions
- Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`)
- Fixed regression in :meth:`SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`)
- Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`)
- Fixed regression in the methods ``apply``, ``agg``, and ``transform`` when used with NumPy functions that informed users to supply ``numeric_only=True`` if the operation failed on non-numeric dtypes; such columns must be dropped prior to using these methods (:issue:`50538`)
-

.. ---------------------------------------------------------------------------
Expand Down
24 changes: 21 additions & 3 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@
SpecificationError,
)
from pandas.util._decorators import cache_readonly
from pandas.util._exceptions import find_stack_level
from pandas.util._exceptions import (
find_stack_level,
rewrite_warning,
)

from pandas.core.dtypes.cast import is_nested_object
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -174,7 +177,15 @@ def agg(self) -> DataFrame | Series | None:
if callable(arg):
f = com.get_cython_func(arg)
if f and not args and not kwargs:
return getattr(obj, f)()
# GH#50538
old_msg = "The default value of numeric_only"
new_msg = (
f"The operation {arg} failed on a column. If any error is "
f"raised, this will raise an exception in a future version "
f"of pandas. Drop these columns to avoid this warning."
)
with rewrite_warning(old_msg, FutureWarning, new_msg):
return getattr(obj, f)()

# caller can react
return None
Expand Down Expand Up @@ -309,7 +320,14 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
if not args and not kwargs:
f = com.get_cython_func(func)
if f:
return getattr(obj, f)()
old_msg = "The default value of numeric_only"
new_msg = (
f"The operation {func} failed on a column. If any error is "
f"raised, this will raise an exception in a future version "
f"of pandas. Drop these columns to avoid this warning."
)
with rewrite_warning(old_msg, FutureWarning, new_msg):
return getattr(obj, f)()

# Two possible ways to use a UDF - apply or call directly
try:
Expand Down
38 changes: 33 additions & 5 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ class providing the base-class of operations.
"""
from __future__ import annotations

from contextlib import contextmanager
from contextlib import (
contextmanager,
nullcontext,
)
import datetime
from functools import (
partial,
Expand Down Expand Up @@ -64,7 +67,10 @@ class providing the base-class of operations.
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._exceptions import (
find_stack_level,
rewrite_warning,
)

from pandas.core.dtypes.cast import ensure_dtype_can_hold_na
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -1508,7 +1514,9 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)
)
)
def apply(self, func, *args, **kwargs) -> NDFrameT:

# GH#50538
is_np_func = func in com._cython_table and func not in com._builtin_table
orig_func = func
func = com.is_builtin_func(func)

if isinstance(func, str):
Expand Down Expand Up @@ -1546,7 +1554,17 @@ def f(g):
# ignore SettingWithCopy here in case the user mutates
with option_context("mode.chained_assignment", None):
try:
result = self._python_apply_general(f, self._selected_obj)
# GH#50538
old_msg = "The default value of numeric_only"
new_msg = (
f"The operation {orig_func} failed on a column. If any error is "
f"raised, this will raise an exception in a future version "
f"of pandas. Drop these columns to avoid this warning."
)
with rewrite_warning(
old_msg, FutureWarning, new_msg
) if is_np_func else nullcontext():
result = self._python_apply_general(f, self._selected_obj)
except TypeError:
# gh-20949
# try again, with .apply acting as a filtering
Expand All @@ -1557,7 +1575,17 @@ def f(g):
# on a string grouper column

with self._group_selection_context():
return self._python_apply_general(f, self._selected_obj)
# GH#50538
old_msg = "The default value of numeric_only"
new_msg = (
f"The operation {orig_func} failed on a column. If any error "
f"is raised, this will raise an exception in a future version "
f"of pandas. Drop these columns to avoid this warning."
)
with rewrite_warning(
old_msg, FutureWarning, new_msg
) if is_np_func else nullcontext():
return self._python_apply_general(f, self._selected_obj)

return result

Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,27 @@ def test_nuiscance_columns():
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("method", ["agg", "apply", "transform"])
def test_numeric_only_warning_numpy(method):
# GH#50538
df = DataFrame({"a": [1, 1, 2], "b": list("xyz")})
if method == "agg":
msg = "The operation <function mean.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
getattr(df, method)(np.mean)
# Ensure users can't pass numeric_only
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
getattr(df, method)(np.mean, numeric_only=True)
elif method == "apply":
with pytest.raises(TypeError, match="Could not convert"):
getattr(df, method)(np.mean)
else:
with pytest.raises(ValueError, match="Function did not transform"):
msg = "The operation <function mean.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
getattr(df, method)(np.mean)


@pytest.mark.parametrize("how", ["agg", "apply"])
def test_non_callable_aggregates(how):

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1454,3 +1454,15 @@ def test_agg_of_mode_list(test, constant):
expected = expected.set_index(0)

tm.assert_frame_equal(result, expected)


def test_numeric_only_warning_numpy():
# GH#50538
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
gb = df.groupby("a")
msg = "The operation <function mean.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
gb.agg(np.mean)
# Ensure users can't pass numeric_only
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
gb.agg(np.mean, numeric_only=True)
13 changes: 13 additions & 0 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,3 +1357,16 @@ def test_empty_df(method, op):
)

tm.assert_series_equal(result, expected)


def test_numeric_only_warning_numpy():
# GH#50538
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
gb = df.groupby("a")
msg = "The operation <function mean.*failed"
# Warning is raised from within NumPy
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
gb.apply(np.mean)
# Ensure users can't pass numeric_only
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
gb.apply(np.mean, numeric_only=True)
18 changes: 16 additions & 2 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,9 +486,14 @@ def test_frame_set_name_single(df):
result = df.groupby("A", as_index=False).mean()
assert result.index.name != "A"

# GH#50538
msg = "The operation <function mean.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = grouped.agg(np.mean)
assert result.index.name == "A"
# Ensure users can't pass numeric_only
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
grouped.agg(np.mean, numeric_only=True)

result = grouped.agg({"C": np.mean, "D": np.std})
assert result.index.name == "A"
Expand Down Expand Up @@ -766,19 +771,24 @@ def test_as_index_series_return_frame(df):
grouped = df.groupby("A", as_index=False)
grouped2 = df.groupby(["A", "B"], as_index=False)

msg = "The default value of numeric_only"
# GH#50538
msg = "The operation <function sum.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = grouped["C"].agg(np.sum)
expected = grouped.agg(np.sum).loc[:, ["A", "C"]]
assert isinstance(result, DataFrame)
tm.assert_frame_equal(result, expected)
# Ensure users can't pass numeric_only
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
grouped.agg(np.mean, numeric_only=True)

result2 = grouped2["C"].agg(np.sum)
expected2 = grouped2.agg(np.sum).loc[:, ["A", "B", "C"]]
assert isinstance(result2, DataFrame)
tm.assert_frame_equal(result2, expected2)

result = grouped["C"].sum()
msg = "The default value of numeric_only"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = grouped.sum().loc[:, ["A", "C"]]
assert isinstance(result, DataFrame)
Expand Down Expand Up @@ -1021,10 +1031,14 @@ def test_wrap_aggregated_output_multindex(mframe):
df["baz", "two"] = "peekaboo"

keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
msg = "The default value of numeric_only"
# GH#50538
msg = "The operation <function mean.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
agged = df.groupby(keys).agg(np.mean)
assert isinstance(agged.columns, MultiIndex)
# Ensure users can't pass numeric_only
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
df.groupby(keys).agg(np.mean, numeric_only=True)

def aggfun(ser):
if ser.name == ("foo", "one"):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1563,3 +1563,18 @@ def test_as_index_no_change(keys, df, groupby_func):
result = gb_as_index_true.transform(groupby_func, *args)
expected = gb_as_index_false.transform(groupby_func, *args)
tm.assert_equal(result, expected)


@pytest.mark.parametrize("func", [np.mean, np.cumprod])
def test_numeric_only_warning_numpy(func):
# GH#50538
df = DataFrame({"a": [1, 1, 2], "b": list("xyz"), "c": [3, 4, 5]})
gb = df.groupby("a")
msg = "The default value of numeric_only"
with tm.assert_produces_warning(FutureWarning, match=msg):
gb.transform(func)
# Ensure users can pass numeric_only
result = gb.transform(func, numeric_only=True)
values = [3.5, 3.5, 5.0] if func == np.mean else [3, 12, 5]
expected = DataFrame({"c": values})
tm.assert_frame_equal(result, expected)
21 changes: 21 additions & 0 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,3 +938,24 @@ def test_series_downsample_method(method, numeric_only, expected_data):
result = func(numeric_only=numeric_only)
expected = Series(expected_data, index=expected_index)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("method", ["agg", "apply", "transform"])
def test_numeric_only_warning_numpy(method):
# GH#50538
resampled = _test_frame.assign(D="x").resample("H")
if method == "transform":
msg = "The default value of numeric_only"
with tm.assert_produces_warning(FutureWarning, match=msg):
getattr(resampled, method)(np.mean)
# Ensure users can pass numeric_only
result = getattr(resampled, method)(np.mean, numeric_only=True)
expected = resampled.transform("mean", numeric_only=True)
tm.assert_frame_equal(result, expected)
else:
msg = "The operation <function mean.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
getattr(resampled, method)(np.mean)
# Ensure users can't pass numeric_only
with pytest.raises(TypeError, match="got an unexpected keyword argument"):
getattr(resampled, method)(np.mean, numeric_only=True)
13 changes: 10 additions & 3 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ def test_pivot_table_nocols(self):
df = DataFrame(
{"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
)
msg = "pivot_table dropped a column because it failed to aggregate"
# GH#50538
msg = "The operation <function sum.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = df.pivot_table(columns="cols", aggfunc=np.sum)
xp = df.pivot_table(index="cols", aggfunc=np.sum).T
Expand Down Expand Up @@ -907,7 +908,8 @@ def test_no_col(self):

# to help with a buglet
self.data.columns = [k * 2 for k in self.data.columns]
msg = "pivot_table dropped a column because it failed to aggregate"
# GH#50538
msg = "The operation <function mean.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
table = self.data.pivot_table(
index=["AA", "BB"], margins=True, aggfunc=np.mean
Expand All @@ -916,6 +918,7 @@ def test_no_col(self):
totals = table.loc[("All", ""), value_col]
assert totals == self.data[value_col].mean()

msg = "pivot_table dropped a column because it failed to aggregate"
with tm.assert_produces_warning(FutureWarning, match=msg):
table = self.data.pivot_table(
index=["AA", "BB"], margins=True, aggfunc="mean"
Expand Down Expand Up @@ -975,7 +978,11 @@ def test_margin_with_only_columns_defined(
}
)

msg = "pivot_table dropped a column because it failed to aggregate"
if aggfunc == "sum":
msg = "pivot_table dropped a column because it failed to aggregate"
else:
# GH#50538
msg = "The operation <function mean.*failed"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
expected = DataFrame(values, index=Index(["D", "E"]), columns=expected_columns)
Expand Down

0 comments on commit ce123cd

Please sign in to comment.