Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: handle axis=None case inside DataFrame.any/all to simplify _reduce #35899

Merged
merged 18 commits into from
Sep 2, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
4c5eddd
REF: remove unnecesary try/except
jbrockmendel Aug 21, 2020
c632c9f
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Aug 21, 2020
9e64be3
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Aug 21, 2020
42649fb
TST: add test for agg on ordered categorical cols (#35630)
mathurk1 Aug 21, 2020
47121dd
TST: resample does not yield empty groups (#10603) (#35799)
tkmz-n Aug 21, 2020
1decb3e
revert accidental rebase
jbrockmendel Aug 22, 2020
57c5dd3
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 22, 2020
a358463
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 23, 2020
ffa7ad7
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 23, 2020
e5e98d4
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 24, 2020
408db5a
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 24, 2020
d3493cf
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 25, 2020
75a805a
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 25, 2020
9f61070
Merge branch 'master' of https://github.com/pandas-dev/pandas into ma…
jbrockmendel Aug 25, 2020
fbbfb1f
REF: handle axis=None cases inside DataFrame.all/any
jbrockmendel Aug 25, 2020
f4ca110
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Aug 26, 2020
644269e
annotate
jbrockmendel Aug 26, 2020
bac997f
dummy commit to force Travis
jbrockmendel Aug 27, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 22 additions & 39 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8598,22 +8598,19 @@ def _reduce(
cols = self.columns[~dtype_is_dt]
self = self[cols]

if axis is None and filter_type == "bool":
labels = None
constructor = None
else:
# TODO: Make other agg func handle axis=None properly
axis = self._get_axis_number(axis)
labels = self._get_agg_axis(axis)
constructor = self._constructor
# TODO: Make other agg func handle axis=None properly
axis = self._get_axis_number(axis)
labels = self._get_agg_axis(axis)
constructor = self._constructor
assert axis in [0, 1]

def func(values):
if is_extension_array_dtype(values.dtype):
return extract_array(values)._reduce(name, skipna=skipna, **kwds)
else:
return op(values, axis=axis, skipna=skipna, **kwds)

def _get_data(axis_matters):
def _get_data(axis_matters: bool):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While we're here could we type that it returns a DataFrame?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

if filter_type is None:
data = self._get_numeric_data()
elif filter_type == "bool":
Expand All @@ -8630,7 +8627,7 @@ def _get_data(axis_matters):
raise NotImplementedError(msg)
return data

if numeric_only is not None and axis in [0, 1]:
if numeric_only is not None:
df = self
if numeric_only is True:
df = _get_data(axis_matters=True)
Expand All @@ -8656,6 +8653,8 @@ def blk_func(values):
out[:] = coerce_to_dtypes(out.values, df.dtypes)
return out

assert numeric_only is None

if not self._is_homogeneous_type or self._mgr.any_extension_types:
# try to avoid self.values call

Expand Down Expand Up @@ -8683,40 +8682,24 @@ def blk_func(values):
result = result.iloc[0].rename(None)
return result

if numeric_only is None:
data = self
values = data.values

try:
result = func(values)

except TypeError:
# e.g. in nanops trying to convert strs to float
data = self
values = data.values

# TODO: why doesnt axis matter here?
data = _get_data(axis_matters=False)
labels = data._get_agg_axis(axis)
try:
result = func(values)

values = data.values
with np.errstate(all="ignore"):
result = func(values)
except TypeError:
# e.g. in nanops trying to convert strs to float

else:
if numeric_only:
data = _get_data(axis_matters=True)
labels = data._get_agg_axis(axis)
# TODO: why doesnt axis matter here?
data = _get_data(axis_matters=False)
labels = data._get_agg_axis(axis)

values = data.values
else:
data = self
values = data.values
result = func(values)
values = data.values
with np.errstate(all="ignore"):
result = func(values)

if filter_type == "bool" and is_object_dtype(values) and axis is None:
# work around https://github.com/numpy/numpy/issues/10489
# TODO: can we de-duplicate parts of this with the next blocK?
result = np.bool_(result)
elif hasattr(result, "dtype") and is_object_dtype(result.dtype):
if is_object_dtype(result.dtype):
try:
if filter_type is None:
result = result.astype(np.float64)
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11658,6 +11658,14 @@ def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs
"Option bool_only is not implemented with option level."
)
return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)

if self.ndim > 1 and axis is None:
# Reduce along one dimension then the other, to simplify DataFrame._reduce
res = logical_func(
self, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
)
return logical_func(res, skipna=skipna, **kwargs)

return self._reduce(
func,
name=name,
Expand Down