Skip to content

Commit

Permalink
API: Return sparse objects always for cumsum
Browse files Browse the repository at this point in the history
Always return SparseArray and SparseSeries for
SparseArray.cumsum() and SparseSeries.cumsum()
respectively, regardless of fill_value.

Close pandas-devgh-12855.
  • Loading branch information
gfyoung committed Nov 30, 2016
1 parent 2bd9c95 commit 59fd828
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 52 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Backwards incompatible API changes


- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)



Expand Down
21 changes: 15 additions & 6 deletions pandas/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,19 +620,28 @@ def sum(self, axis=0, *args, **kwargs):

def cumsum(self, axis=0, *args, **kwargs):
"""
Cumulative sum of values. Preserves locations of NaN values
Cumulative sum of non-NA/null values.
When performing the cumulative summation, any non-NA/null values will
be skipped. The resulting SparseArray will preserve the locations of
NaN values, but the fill value will be `np.nan` regardless.
Parameters
----------
axis : int
Axis over which to perform the cumulative summation. Currently,
this parameter is ignored because `SparseArray` only works with
1-D array-like objects.
Returns
-------
cumsum : Series
cumsum : SparseArray
"""
nv.validate_cumsum(args, kwargs)

# TODO: gh-12855 - return a SparseArray here
if notnull(self.fill_value):
return self.to_dense().cumsum()
if not self._null_fill_value:
return SparseArray(self.to_dense()).cumsum()

# TODO: what if sp_values contains NaN??
return SparseArray(self.sp_values.cumsum(), sparse_index=self.sp_index,
fill_value=self.fill_value)

Expand Down
29 changes: 19 additions & 10 deletions pandas/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,21 +630,30 @@ def take(self, indices, axis=0, convert=True, *args, **kwargs):

def cumsum(self, axis=0, *args, **kwargs):
"""
Cumulative sum of values. Preserves locations of NaN values
Cumulative sum of non-NA/null values.
When performing the cumulative summation, any non-NA/null values will
be skipped. The resulting SparseSeries will preserve the locations of
NaN values, but the fill value will be `np.nan` regardless.
Parameters
----------
axis : int
Axis over which to perform the cumulative summation. This
parameter is ignored because `SparseSeries` is 1-D, but it
is kept in the signature for consistency with other sparse
array-like objects.
Returns
-------
cumsum : SparseSeries if `self` has a null `fill_value` and a
generic Series otherwise
cumsum : SparseSeries
"""
nv.validate_cumsum(args, kwargs)
new_array = SparseArray.cumsum(self.values)
if isinstance(new_array, SparseArray):
return self._constructor(
new_array, index=self.index,
sparse_index=new_array.sp_index).__finalize__(self)
# TODO: gh-12855 - return a SparseSeries here
return Series(new_array, index=self.index).__finalize__(self)
new_array = self.values.cumsum()

return self._constructor(
new_array, index=self.index,
sparse_index=new_array.sp_index).__finalize__(self)

@Appender(generic._shared_docs['isnull'])
def isnull(self):
Expand Down
70 changes: 38 additions & 32 deletions pandas/sparse/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,46 +688,52 @@ def test_numpy_sum(self):
SparseArray(data), out=out)

def test_cumsum(self):
data = np.arange(10).astype(float)
out = SparseArray(data).cumsum()
expected = SparseArray(data.cumsum())
tm.assert_sp_array_equal(out, expected)
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
non_null_expected = SparseArray(non_null_data.cumsum())

# TODO: gh-12855 - return a SparseArray here
data[5] = np.nan
out = SparseArray(data, fill_value=2).cumsum()
self.assertNotIsInstance(out, SparseArray)
tm.assert_numpy_array_equal(out, data.cumsum())
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))

for data, expected in [
(null_data, null_expected),
(non_null_data, non_null_expected)
]:
out = SparseArray(data).cumsum()
tm.assert_sp_array_equal(out, expected)

out = SparseArray(data, fill_value=np.nan).cumsum()
expected = SparseArray(np.array([
0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40]))
tm.assert_sp_array_equal(out, expected)
out = SparseArray(data, fill_value=np.nan).cumsum()
tm.assert_sp_array_equal(out, expected)

out = SparseArray(data, fill_value=2).cumsum()
tm.assert_sp_array_equal(out, expected)

def test_numpy_cumsum(self):
data = np.arange(10).astype(float)
out = np.cumsum(SparseArray(data))
expected = SparseArray(data.cumsum())
tm.assert_sp_array_equal(out, expected)
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
non_null_expected = SparseArray(non_null_data.cumsum())

# TODO: gh-12855 - return a SparseArray here
data[5] = np.nan
out = np.cumsum(SparseArray(data, fill_value=2))
self.assertNotIsInstance(out, SparseArray)
tm.assert_numpy_array_equal(out, data.cumsum())
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))

out = np.cumsum(SparseArray(data, fill_value=np.nan))
expected = SparseArray(np.array([
0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40]))
tm.assert_sp_array_equal(out, expected)
for data, expected in [
(null_data, null_expected),
(non_null_data, non_null_expected)
]:
out = np.cumsum(SparseArray(data))
tm.assert_sp_array_equal(out, expected)

msg = "the 'dtype' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
SparseArray(data), dtype=np.int64)
out = np.cumsum(SparseArray(data, fill_value=np.nan))
tm.assert_sp_array_equal(out, expected)

msg = "the 'out' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
SparseArray(data), out=out)
out = np.cumsum(SparseArray(data, fill_value=2))
tm.assert_sp_array_equal(out, expected)

msg = "the 'dtype' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
SparseArray(data), dtype=np.int64)

msg = "the 'out' parameter is not supported"
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
SparseArray(data), out=out)

def test_mean(self):
data = np.arange(10).astype(float)
Expand Down
4 changes: 0 additions & 4 deletions pandas/sparse/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1331,21 +1331,17 @@ def test_cumsum(self):
expected = SparseSeries(self.bseries.to_dense().cumsum())
tm.assert_sp_series_equal(result, expected)

# TODO: gh-12855 - return a SparseSeries here
result = self.zbseries.cumsum()
expected = self.zbseries.to_dense().cumsum()
self.assertNotIsInstance(result, SparseSeries)
tm.assert_series_equal(result, expected)

def test_numpy_cumsum(self):
result = np.cumsum(self.bseries)
expected = SparseSeries(self.bseries.to_dense().cumsum())
tm.assert_sp_series_equal(result, expected)

# TODO: gh-12855 - return a SparseSeries here
result = np.cumsum(self.zbseries)
expected = self.zbseries.to_dense().cumsum()
self.assertNotIsInstance(result, SparseSeries)
tm.assert_series_equal(result, expected)

msg = "the 'dtype' parameter is not supported"
Expand Down

0 comments on commit 59fd828

Please sign in to comment.