Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Partial failure in Series.transform and DataFrame.transform #40238

Merged
merged 1 commit into from
Mar 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ Deprecations
- Deprecated :meth:`core.window.ewm.ExponentialMovingWindow.vol` (:issue:`39220`)
- Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
- Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`)
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`)

.. ---------------------------------------------------------------------------
Expand Down
14 changes: 13 additions & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Union,
cast,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -267,6 +268,7 @@ def transform_dict_like(self, func):
func = self.normalize_dictlike_arg("transform", obj, func)

results: Dict[Hashable, FrameOrSeriesUnion] = {}
failed_names = []
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
try:
Expand All @@ -277,10 +279,20 @@ def transform_dict_like(self, func):
"No transform functions were provided",
}:
raise err

else:
failed_names.append(name)
# combine results
if not results:
raise ValueError("Transform function failed")
if len(failed_names) > 0:
warnings.warn(
f"{failed_names} did not transform successfully. "
f"Allowing for partial failure is deprecated, this will raise "
f"a ValueError in a future version of pandas."
f"Drop these columns/ops to avoid this warning.",
FutureWarning,
stacklevel=4,
)
return concat(results, axis=1)

def transform_str_or_callable(self, func) -> FrameOrSeriesUnion:
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/apply/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pandas.core.groupby.base import transformation_kernels

# tshift only works on time index and is deprecated
# There is no Series.cumcount or DataFrame.cumcount
series_transform_kernels = [
x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"]
]
frame_transform_kernels = [
x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"]
]
24 changes: 11 additions & 13 deletions pandas/tests/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,9 @@
Series,
)
import pandas._testing as tm
from pandas.core.groupby.base import transformation_kernels
from pandas.tests.apply.common import frame_transform_kernels
from pandas.tests.frame.common import zip_frames

# tshift only works on time index and is deprecated
# There is no DataFrame.cumcount
frame_kernels = [
x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"]
]


def unpack_obj(obj, klass, axis):
"""
Expand All @@ -44,7 +38,7 @@ def test_transform_ufunc(axis, float_frame, frame_or_series):
tm.assert_equal(result, expected)


@pytest.mark.parametrize("op", frame_kernels)
@pytest.mark.parametrize("op", frame_transform_kernels)
def test_transform_groupby_kernel(axis, float_frame, op):
# GH 35964

Expand Down Expand Up @@ -158,7 +152,7 @@ def test_transform_method_name(method):


wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
frame_kernels_raise = [x for x in frame_kernels if x not in wont_fail]
frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]


# mypy doesn't allow adding lists of different types
Expand Down Expand Up @@ -187,21 +181,25 @@ def test_transform_bad_dtype(op, frame_or_series):

@pytest.mark.parametrize("op", frame_kernels_raise)
def test_transform_partial_failure(op):
# GH 35964
# GH 35964 & GH 40211
match = "Allowing for partial failure is deprecated"

# Using object makes most transform kernels fail
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})

expected = df[["B"]].transform([op])
result = df.transform([op])
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform([op])
tm.assert_equal(result, expected)

expected = df[["B"]].transform({"B": op})
result = df.transform({"B": op})
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": op, "B": op})
tm.assert_equal(result, expected)

expected = df[["B"]].transform({"B": [op]})
result = df.transform({"B": [op]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)


Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
timedelta_range,
)
import pandas._testing as tm
from pandas.tests.apply.common import series_transform_kernels


def test_series_map_box_timedelta():
Expand Down Expand Up @@ -256,6 +257,34 @@ def test_transform(string_series):
tm.assert_series_equal(result.reindex_like(expected), expected)


@pytest.mark.parametrize("op", series_transform_kernels)
def test_transform_partial_failure(op, request):
# GH 35964 & GH 40211
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
request.node.add_marker(
pytest.mark.xfail(reason=f"{op} is successful on any dtype")
)
match = "Allowing for partial failure is deprecated"

# Using object makes most transform kernels fail
ser = Series(3 * [object])

expected = ser.transform(["shift"])
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([op, "shift"])
tm.assert_equal(result, expected)

expected = ser.transform({"B": "shift"})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": op, "B": "shift"})
tm.assert_equal(result, expected)

expected = ser.transform({"B": ["shift"]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [op], "B": ["shift"]})
tm.assert_equal(result, expected)


def test_demo():
# demonstration tests
s = Series(range(6), dtype="int64", name="series")
Expand Down