Skip to content

Commit

Permalink
REF: handle axis=None case inside DataFrame.any/all to simplify _redu…
Browse files Browse the repository at this point in the history
…ce (pandas-dev#35899)

* REF: remove unnecesary try/except

* TST: add test for agg on ordered categorical cols (pandas-dev#35630)

* TST: resample does not yield empty groups (pandas-dev#10603) (pandas-dev#35799)

* revert accidental rebase

* REF: handle axis=None cases inside DataFrame.all/any

* annotate

* dummy commit to force Travis

Co-authored-by: Karthik Mathur <22126205+mathurk1@users.noreply.github.com>
Co-authored-by: tkmz-n <60312218+tkmz-n@users.noreply.github.com>
  • Loading branch information
3 people authored and Kevin D Smith committed Nov 2, 2020
1 parent c9d443f commit e0c1639
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 39 deletions.
61 changes: 22 additions & 39 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8617,22 +8617,19 @@ def _reduce(
cols = self.columns[~dtype_is_dt]
self = self[cols]

if axis is None and filter_type == "bool":
labels = None
constructor = None
else:
# TODO: Make other agg func handle axis=None properly
axis = self._get_axis_number(axis)
labels = self._get_agg_axis(axis)
constructor = self._constructor
# TODO: Make other agg func handle axis=None properly
axis = self._get_axis_number(axis)
labels = self._get_agg_axis(axis)
constructor = self._constructor
assert axis in [0, 1]

def func(values):
if is_extension_array_dtype(values.dtype):
return extract_array(values)._reduce(name, skipna=skipna, **kwds)
else:
return op(values, axis=axis, skipna=skipna, **kwds)

def _get_data(axis_matters):
def _get_data(axis_matters: bool) -> "DataFrame":
if filter_type is None:
data = self._get_numeric_data()
elif filter_type == "bool":
Expand All @@ -8649,7 +8646,7 @@ def _get_data(axis_matters):
raise NotImplementedError(msg)
return data

if numeric_only is not None and axis in [0, 1]:
if numeric_only is not None:
df = self
if numeric_only is True:
df = _get_data(axis_matters=True)
Expand All @@ -8675,6 +8672,8 @@ def blk_func(values):
out[:] = coerce_to_dtypes(out.values, df.dtypes)
return out

assert numeric_only is None

if not self._is_homogeneous_type or self._mgr.any_extension_types:
# try to avoid self.values call

Expand Down Expand Up @@ -8702,40 +8701,24 @@ def blk_func(values):
result = result.iloc[0].rename(None)
return result

if numeric_only is None:
data = self
values = data.values

try:
result = func(values)

except TypeError:
# e.g. in nanops trying to convert strs to float
data = self
values = data.values

# TODO: why doesnt axis matter here?
data = _get_data(axis_matters=False)
labels = data._get_agg_axis(axis)
try:
result = func(values)

values = data.values
with np.errstate(all="ignore"):
result = func(values)
except TypeError:
# e.g. in nanops trying to convert strs to float

else:
if numeric_only:
data = _get_data(axis_matters=True)
labels = data._get_agg_axis(axis)
# TODO: why doesnt axis matter here?
data = _get_data(axis_matters=False)
labels = data._get_agg_axis(axis)

values = data.values
else:
data = self
values = data.values
result = func(values)
values = data.values
with np.errstate(all="ignore"):
result = func(values)

if filter_type == "bool" and is_object_dtype(values) and axis is None:
# work around https://github.com/numpy/numpy/issues/10489
# TODO: can we de-duplicate parts of this with the next blocK?
result = np.bool_(result)
elif hasattr(result, "dtype") and is_object_dtype(result.dtype):
if is_object_dtype(result.dtype):
try:
if filter_type is None:
result = result.astype(np.float64)
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11499,6 +11499,14 @@ def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs
"Option bool_only is not implemented with option level."
)
return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)

if self.ndim > 1 and axis is None:
# Reduce along one dimension then the other, to simplify DataFrame._reduce
res = logical_func(
self, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
)
return logical_func(res, skipna=skipna, **kwargs)

return self._reduce(
func,
name=name,
Expand Down

0 comments on commit e0c1639

Please sign in to comment.