Skip to content

Commit

Permalink
REF: move mixed-dtype frame_apply check outside of _reduce try/except (
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Mar 30, 2020
1 parent 914c390 commit b838508
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 23 deletions.
50 changes: 30 additions & 20 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7996,42 +7996,52 @@ def blk_func(values):
out[:] = coerce_to_dtypes(out.values, df.dtypes)
return out

if not self._is_homogeneous_type:
# try to avoid self.values call

if filter_type is None and axis == 0 and len(self) > 0:
# operate column-wise

# numeric_only must be None here, as other cases caught above
# require len(self) > 0 bc frame_apply messes up empty prod/sum

# this can end up with a non-reduction
# but not always. if the types are mixed
# with datelike then need to make sure a series

# we only end up here if we have not specified
# numeric_only and yet we have tried a
# column-by-column reduction, where we have mixed type.
# So let's just do what we can
from pandas.core.apply import frame_apply

opa = frame_apply(
self, func=f, result_type="expand", ignore_failures=True
)
result = opa.get_result()
if result.ndim == self.ndim:
result = result.iloc[0].rename(None)
return result

data = self
if numeric_only is None:
data = self
values = data.values

try:
result = f(values)

except TypeError:
# e.g. in nanops trying to convert strs to float

# try by-column first
if filter_type is None and axis == 0:
# this can end up with a non-reduction
# but not always. if the types are mixed
# with datelike then need to make sure a series

# we only end up here if we have not specified
# numeric_only and yet we have tried a
# column-by-column reduction, where we have mixed type.
# So let's just do what we can
from pandas.core.apply import frame_apply

opa = frame_apply(
self, func=f, result_type="expand", ignore_failures=True
)
result = opa.get_result()
if result.ndim == self.ndim:
result = result.iloc[0]
return result

# TODO: why doesnt axis matter here?
data = _get_data(axis_matters=False)
labels = data._get_agg_axis(axis)

values = data.values
with np.errstate(all="ignore"):
result = f(values)

else:
if numeric_only:
data = _get_data(axis_matters=True)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,9 @@ def kurt(x):
"sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum
)
assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True)
assert_stat_op_calc("product", np.prod, float_frame_with_na)
assert_stat_op_calc(
"product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod
)

assert_stat_op_calc("mad", mad, float_frame_with_na)
assert_stat_op_calc("var", var, float_frame_with_na)
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,12 @@ def test_fillna_categorical_nan(self):
cat = Categorical([np.nan, 2, np.nan])
val = Categorical([np.nan, np.nan, np.nan])
df = DataFrame({"cats": cat, "vals": val})
with tm.assert_produces_warning(RuntimeWarning):
res = df.fillna(df.median())

# GH#32950 df.median() is poorly behaved because there is no
# Categorical.median
median = Series({"cats": 2.0, "vals": np.nan})

res = df.fillna(median)
v_exp = [np.nan, np.nan, np.nan]
df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category")
tm.assert_frame_equal(res, df_exp)
Expand Down

0 comments on commit b838508

Please sign in to comment.