Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: move mixed-dtype frame_apply check outside of _reduce try/except #32950

Merged
merged 12 commits into from
Mar 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 30 additions & 20 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7995,42 +7995,52 @@ def blk_func(values):
out[:] = coerce_to_dtypes(out.values, df.dtypes)
return out

if not self._is_homogeneous_type:
# try to avoid self.values call

if filter_type is None and axis == 0 and len(self) > 0:
# operate column-wise

# numeric_only must be None here, as other cases caught above
# require len(self) > 0 bc frame_apply messes up empty prod/sum

# this can end up with a non-reduction
# but not always. if the types are mixed
# with datelike then need to make sure a series

# we only end up here if we have not specified
# numeric_only and yet we have tried a
# column-by-column reduction, where we have mixed type.
# So let's just do what we can
from pandas.core.apply import frame_apply

opa = frame_apply(
self, func=f, result_type="expand", ignore_failures=True
)
result = opa.get_result()
if result.ndim == self.ndim:
result = result.iloc[0].rename(None)
return result

data = self
if numeric_only is None:
data = self
values = data.values

try:
result = f(values)

except TypeError:
# e.g. in nanops trying to convert strs to float

# try by-column first
if filter_type is None and axis == 0:
# this can end up with a non-reduction
# but not always. if the types are mixed
# with datelike then need to make sure a series

# we only end up here if we have not specified
# numeric_only and yet we have tried a
# column-by-column reduction, where we have mixed type.
# So let's just do what we can
from pandas.core.apply import frame_apply

opa = frame_apply(
self, func=f, result_type="expand", ignore_failures=True
)
result = opa.get_result()
if result.ndim == self.ndim:
result = result.iloc[0]
return result

# TODO: why doesnt axis matter here?
data = _get_data(axis_matters=False)
labels = data._get_agg_axis(axis)

values = data.values
with np.errstate(all="ignore"):
result = f(values)

else:
if numeric_only:
data = _get_data(axis_matters=True)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,9 @@ def kurt(x):
"sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum
)
assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True)
assert_stat_op_calc("product", np.prod, float_frame_with_na)
assert_stat_op_calc(
"product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod
)

assert_stat_op_calc("mad", mad, float_frame_with_na)
assert_stat_op_calc("var", var, float_frame_with_na)
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,12 @@ def test_fillna_categorical_nan(self):
cat = Categorical([np.nan, 2, np.nan])
val = Categorical([np.nan, np.nan, np.nan])
df = DataFrame({"cats": cat, "vals": val})
with tm.assert_produces_warning(RuntimeWarning):
res = df.fillna(df.median())

# GH#32950 df.median() is poorly behaved because there is no
# Categorical.median
median = Series({"cats": 2.0, "vals": np.nan})

res = df.fillna(median)
v_exp = [np.nan, np.nan, np.nan]
df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category")
tm.assert_frame_equal(res, df_exp)
Expand Down