From e6ab0df8d4dc5b460bc2deaae509b52308d69082 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 15:22:07 -0800 Subject: [PATCH 1/6] REF: implement cumulative ops block-wise --- pandas/core/generic.py | 36 ++++++++++++++++++++----------- pandas/core/internals/managers.py | 5 ++++- pandas/tests/frame/test_apply.py | 8 +++++-- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2e2ae4e1dfa0a..54c29fcd46aaf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11326,20 +11326,30 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): else: axis = self._get_axis_number(axis) - y = com.values_from_object(self).copy() - - if skipna and issubclass(y.dtype.type, (np.datetime64, np.timedelta64)): - result = accum_func(y, axis) - mask = isna(self) - np.putmask(result, mask, iNaT) - elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)): - mask = isna(self) - np.putmask(y, mask, mask_a) - result = accum_func(y, axis) - np.putmask(result, mask, mask_b) - else: - result = accum_func(y, axis) + if axis == 1: + return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T + + def na_accum_func(blk_values): + # We will be applying this function to block values + if skipna and issubclass( + blk_values.dtype.type, (np.datetime64, np.timedelta64) + ): + result = accum_func(blk_values.T, axis) + mask = isna(blk_values.T) + np.putmask(result, mask, iNaT) + elif skipna and not issubclass( + blk_values.dtype.type, (np.integer, np.bool_) + ): + vals = blk_values.copy().T + mask = isna(vals) + np.putmask(vals, mask, mask_a) + result = accum_func(vals, axis) + np.putmask(result, mask, mask_b) + else: + result = accum_func(blk_values.T, axis) + return result.T + result = self._data.apply(na_accum_func) d = self._construct_axes_dict() d["copy"] = False return self._constructor(result, **d).__finalize__(self) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c37a8ea5e42a4..c296f1556fa19 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -430,7 +430,10 @@ def apply( axis = obj._info_axis_number kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy) - applied = getattr(b, f)(**kwargs) + if callable(f): + applied = b.apply(f, **kwargs) + else: + applied = getattr(b, f)(**kwargs) result_blocks = _extend_blocks(applied, result_blocks) if len(result_blocks) == 0: diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 3c97a87c95bd2..d30fb53729486 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1329,8 +1329,8 @@ def test_agg_cython_table(self, df, func, expected, axis): _get_cython_table_params( DataFrame([[np.nan, 1], [1, 2]]), [ - ("cumprod", DataFrame([[np.nan, 1], [1.0, 2.0]])), - ("cumsum", DataFrame([[np.nan, 1], [1.0, 3.0]])), + ("cumprod", DataFrame([[np.nan, 1], [1, 2]])), + ("cumsum", DataFrame([[np.nan, 1], [1, 3]])), ], ), ), @@ -1339,6 +1339,10 @@ def test_agg_cython_table_transform(self, df, func, expected, axis): # GH 21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + if axis == "columns" or axis == 1: + # operating blockwise doesn't let us preserve dtypes + expected = expected.astype("float64") + result = df.agg(func, axis=axis) tm.assert_frame_equal(result, expected) From bdfbdc96f58f4cf069a9153d6e9bd3282e55cbe9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Dec 2019 09:06:12 -0800 Subject: [PATCH 2/6] test+whatsnew --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/tests/frame/test_cumulative.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7948b3bf2fd2f..f92506cf4964f 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -749,6 +749,7 @@ Numeric - Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) - Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) Conversion ^^^^^^^^^^ diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index ad2cbff888b2e..f7c299d5f8c6d 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -118,3 +118,18 @@ def test_cummax(self, datetime_frame): # fix issue cummax_xs = datetime_frame.cummax(axis=1) assert np.shape(cummax_xs) == np.shape(datetime_frame) + + def test_cumulative_ops_preserve_dtypes(self): + # GH#19296 dont incorrectly upcast to object + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3.0], "C": [True, False, False],}) + + result = df.cumsum() + + expected = DataFrame( + { + "A": Series([1, 3, 6], dtype=int), + "B": Series([1, 3, 6], dtype=float), + "C": Series([1, 1, 1], dtype=int), + } + ) + tm.assert_frame_equal(result, expected) From 5969a5a8281cd7ec7a99ab0363eb47f719bbbcab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Dec 2019 09:07:43 -0800 Subject: [PATCH 3/6] flake8 fixup --- pandas/tests/frame/test_cumulative.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index f7c299d5f8c6d..3385231e8cfe3 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -121,7 +121,7 @@ def test_cummax(self, datetime_frame): def test_cumulative_ops_preserve_dtypes(self): # GH#19296 dont incorrectly upcast to object - df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3.0], "C": [True, False, False],}) + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3.0], "C": [True, False, False]}) result = df.cumsum() From 3793d5a8465de8c6d79820c8fc906892de53162d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Dec 2019 10:00:33 -0800 Subject: [PATCH 4/6] trooubleshoot windows/32bit --- pandas/tests/frame/test_cumulative.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 3385231e8cfe3..b7309747f3290 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -127,9 +127,9 @@ def test_cumulative_ops_preserve_dtypes(self): expected = DataFrame( { - "A": Series([1, 3, 6], dtype=int), - "B": Series([1, 3, 6], dtype=float), - "C": Series([1, 1, 1], dtype=int), + "A": Series([1, 3, 6], dtype=np.int64), + "B": Series([1, 3, 6], dtype=np.float64), + "C": Series([1, 1, 1], dtype=np.int64), } ) tm.assert_frame_equal(result, expected) From 227b6add0e690892fe913a2649f9578e95df13ae Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Dec 2019 10:49:07 -0800 Subject: [PATCH 5/6] troubleshoot widnows build --- pandas/tests/frame/test_cumulative.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index b7309747f3290..f279d587e52f1 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -129,7 +129,7 @@ def test_cumulative_ops_preserve_dtypes(self): { "A": Series([1, 3, 6], dtype=np.int64), "B": Series([1, 3, 6], dtype=np.float64), - "C": Series([1, 1, 1], dtype=np.int64), + "C": Series([1, 1, 1], dtype=np.intp), } ) tm.assert_frame_equal(result, expected) From 567b88123e7d835605f9f8ddafdb333058fb414f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Dec 2019 12:22:51 -0800 Subject: [PATCH 6/6] troubleshoot windows CI --- pandas/tests/frame/test_cumulative.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index f279d587e52f1..2deeeb95d057d 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -129,7 +129,7 @@ def test_cumulative_ops_preserve_dtypes(self): { "A": Series([1, 3, 6], dtype=np.int64), "B": Series([1, 3, 6], dtype=np.float64), - "C": Series([1, 1, 1], dtype=np.intp), + "C": df["C"].cumsum(), } ) tm.assert_frame_equal(result, expected)