Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: implement cumulative ops block-wise #29872

Merged
merged 10 commits into from
Dec 30, 2019
36 changes: 23 additions & 13 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11326,20 +11326,30 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
else:
axis = self._get_axis_number(axis)

y = com.values_from_object(self).copy()

if skipna and issubclass(y.dtype.type, (np.datetime64, np.timedelta64)):
result = accum_func(y, axis)
mask = isna(self)
np.putmask(result, mask, iNaT)
elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)):
mask = isna(self)
np.putmask(y, mask, mask_a)
result = accum_func(y, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(y, axis)
if axis == 1:
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T
jreback marked this conversation as resolved.
Show resolved Hide resolved

def na_accum_func(blk_values):
jreback marked this conversation as resolved.
Show resolved Hide resolved
# We will be applying this function to block values
if skipna and issubclass(
blk_values.dtype.type, (np.datetime64, np.timedelta64)
):
result = accum_func(blk_values.T, axis)
mask = isna(blk_values.T)
np.putmask(result, mask, iNaT)
elif skipna and not issubclass(
blk_values.dtype.type, (np.integer, np.bool_)
):
vals = blk_values.copy().T
mask = isna(vals)
np.putmask(vals, mask, mask_a)
result = accum_func(vals, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(blk_values.T, axis)
return result.T

result = self._data.apply(na_accum_func)
d = self._construct_axes_dict()
d["copy"] = False
return self._constructor(result, **d).__finalize__(self)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,10 @@ def apply(
axis = obj._info_axis_number
kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)

applied = getattr(b, f)(**kwargs)
if callable(f):
jreback marked this conversation as resolved.
Show resolved Hide resolved
applied = b.apply(f, **kwargs)
else:
applied = getattr(b, f)(**kwargs)
result_blocks = _extend_blocks(applied, result_blocks)

if len(result_blocks) == 0:
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1329,8 +1329,8 @@ def test_agg_cython_table(self, df, func, expected, axis):
_get_cython_table_params(
DataFrame([[np.nan, 1], [1, 2]]),
[
("cumprod", DataFrame([[np.nan, 1], [1.0, 2.0]])),
("cumsum", DataFrame([[np.nan, 1], [1.0, 3.0]])),
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
],
),
),
Expand All @@ -1339,6 +1339,10 @@ def test_agg_cython_table_transform(self, df, func, expected, axis):
# GH 21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
if axis == "columns" or axis == 1:
# operating blockwise doesn't let us preserve dtypes
expected = expected.astype("float64")

result = df.agg(func, axis=axis)
tm.assert_frame_equal(result, expected)

Expand Down