Skip to content

Commit

Permalink
BUG: Fix slicing subclasses of SparseDataFrames.
Browse files Browse the repository at this point in the history
Use proper subclassing behaviour so subclasses work properly: this fixes
an issue where a multi-element slice of a subclass of SparseDataFrame
returned the SparseDataFrame type instead of the subclass type.

closes #13787
  • Loading branch information
sstanovnik authored and jreback committed Aug 2, 2016
1 parent 1f55e91 commit a7f7e1d
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 21 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ API changes
- ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`)
- ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`)
- ``pd.read_hdf`` will now raise a ``ValueError`` instead of ``KeyError``, if a mode other than ``r``, ``r+`` and ``a`` is supplied. (:issue:`13623`)
- Subclassed ``SparseDataFrame`` and ``SparseSeries`` now preserve class types when slicing or transposing. (:issue:`13787`)



.. _whatsnew_0190.api.tolist:
Expand Down
8 changes: 8 additions & 0 deletions pandas/io/tests/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ def compare(self, vf, version):
comparator(result, expected, typ, version)
return data

def compare_sp_series_ts(self, res, exp, typ, version):
# SparseTimeSeries integrated into SparseSeries in 0.12.0
# and deprecated in 0.17.0
if version and LooseVersion(version) <= "0.12.0":
tm.assert_sp_series_equal(res, exp, check_series_type=False)
else:
tm.assert_sp_series_equal(res, exp)

def compare_series_ts(self, result, expected, typ, version):
# GH 7748
tm.assert_series_equal(result, expected)
Expand Down
23 changes: 13 additions & 10 deletions pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def _init_matrix(self, data, index, columns, dtype=None):
return self._init_dict(data, index, columns, dtype)

def __array_wrap__(self, result):
return SparseDataFrame(
return self._constructor(
result, index=self.index, columns=self.columns,
default_kind=self._default_kind,
default_fill_value=self._default_fill_value).__finalize__(self)
Expand Down Expand Up @@ -407,7 +407,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
raise NotImplementedError("'level' argument is not supported")

if self.empty and other.empty:
return SparseDataFrame(index=new_index).__finalize__(self)
return self._constructor(index=new_index).__finalize__(self)

new_data = {}
new_fill_value = None
Expand Down Expand Up @@ -519,7 +519,8 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
return self

if len(self.index) == 0:
return SparseDataFrame(index=index, columns=self.columns)
return self._constructor(
index=index, columns=self.columns).__finalize__(self)

indexer = self.index.get_indexer(index, method, limit=limit)
indexer = _ensure_platform_int(indexer)
Expand All @@ -540,8 +541,9 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,

new_series[col] = new

return SparseDataFrame(new_series, index=index, columns=self.columns,
default_fill_value=self._default_fill_value)
return self._constructor(
new_series, index=index, columns=self.columns,
default_fill_value=self._default_fill_value).__finalize__(self)

def _reindex_columns(self, columns, copy, level, fill_value, limit=None,
takeable=False):
Expand All @@ -556,8 +558,9 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None,

# TODO: fill value handling
sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns)
return SparseDataFrame(sdict, index=self.index, columns=columns,
default_fill_value=self._default_fill_value)
return self._constructor(
sdict, index=self.index, columns=columns,
default_fill_value=self._default_fill_value).__finalize__(self)

def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
limit=None, copy=False, allow_dups=False):
Expand Down Expand Up @@ -586,8 +589,8 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
else:
new_arrays[col] = self[col]

return SparseDataFrame(new_arrays, index=index,
columns=columns).__finalize__(self)
return self._constructor(new_arrays, index=index,
columns=columns).__finalize__(self)

def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
sort=False):
Expand Down Expand Up @@ -644,7 +647,7 @@ def transpose(self, *args, **kwargs):
Returns a DataFrame with the rows/columns switched.
"""
nv.validate_transpose(args, kwargs)
return SparseDataFrame(
return self._constructor(
self.values.T, index=self.columns, columns=self.index,
default_fill_value=self._default_fill_value,
default_kind=self._default_kind).__finalize__(self)
Expand Down
12 changes: 6 additions & 6 deletions pandas/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ def wrapper(self, other):
new_fill_value = op(np.float64(self.fill_value),
np.float64(other))

return SparseSeries(op(self.sp_values, other),
index=self.index,
sparse_index=self.sp_index,
fill_value=new_fill_value,
name=self.name)
return self._constructor(op(self.sp_values, other),
index=self.index,
sparse_index=self.sp_index,
fill_value=new_fill_value,
name=self.name)
else: # pragma: no cover
raise TypeError('operation with %s not supported' % type(other))

Expand All @@ -85,7 +85,7 @@ def _sparse_series_op(left, right, op, name):
new_name = _maybe_match_name(left, right)

result = _sparse_array_op(left, right, op, name)
return SparseSeries(result, index=new_index, name=new_name)
return left._constructor(result, index=new_index, name=new_name)


class SparseSeries(Series):
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/frame/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,33 @@ def test_subclass_align_combinations(self):
tm.assert_series_equal(res1, exp2)
tm.assertIsInstance(res2, tm.SubclassedDataFrame)
tm.assert_frame_equal(res2, exp1)

def test_subclass_sparse_slice(self):
rows = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]
ssdf = tm.SubclassedSparseDataFrame(rows)
ssdf.testattr = "testattr"

tm.assert_sp_frame_equal(ssdf.loc[:2],
tm.SubclassedSparseDataFrame(rows[:3]))
tm.assert_sp_frame_equal(ssdf.iloc[:2],
tm.SubclassedSparseDataFrame(rows[:2]))
tm.assert_sp_frame_equal(ssdf[:2],
tm.SubclassedSparseDataFrame(rows[:2]))
tm.assert_equal(ssdf.loc[:2].testattr, "testattr")
tm.assert_equal(ssdf.iloc[:2].testattr, "testattr")
tm.assert_equal(ssdf[:2].testattr, "testattr")

tm.assert_sp_series_equal(ssdf.loc[1],
tm.SubclassedSparseSeries(rows[1]),
check_names=False)
tm.assert_sp_series_equal(ssdf.iloc[1],
tm.SubclassedSparseSeries(rows[1]),
check_names=False)

def test_subclass_sparse_transpose(self):
ossdf = tm.SubclassedSparseDataFrame([[1, 2, 3],
[4, 5, 6]])
essdf = tm.SubclassedSparseDataFrame([[1, 4],
[2, 5],
[3, 6]])
tm.assert_sp_frame_equal(ossdf.T, essdf)
24 changes: 24 additions & 0 deletions pandas/tests/series/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,27 @@ def test_to_frame(self):
exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd'))
tm.assert_frame_equal(res, exp)
tm.assertIsInstance(res, tm.SubclassedDataFrame)

def test_subclass_sparse_slice(self):
s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
tm.assert_sp_series_equal(s.loc[1:3],
tm.SubclassedSparseSeries([2.0, 3.0, 4.0],
index=[1, 2, 3]))
tm.assert_sp_series_equal(s.iloc[1:3],
tm.SubclassedSparseSeries([2.0, 3.0],
index=[1, 2]))
tm.assert_sp_series_equal(s[1:3],
tm.SubclassedSparseSeries([2.0, 3.0],
index=[1, 2]))

def test_subclass_sparse_addition(self):
s1 = tm.SubclassedSparseSeries([1, 3, 5])
s2 = tm.SubclassedSparseSeries([-2, 5, 12])
tm.assert_sp_series_equal(s1 + s2,
tm.SubclassedSparseSeries([-1.0, 8.0, 17.0]))

def test_subclass_sparse_to_frame(self):
s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx')
res = s.to_frame()
exp = tm.SubclassedSparseDataFrame({'xxx': [1, 2]}, index=list('abcd'))
tm.assert_sp_frame_equal(res, exp)
73 changes: 68 additions & 5 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1322,7 +1322,8 @@ def assert_panelnd_equal(left, right,
check_less_precise=False,
assert_func=assert_frame_equal,
check_names=False,
by_blocks=False):
by_blocks=False,
obj='Panel'):
"""Check that left and right Panels are equal.
Parameters
Expand All @@ -1343,6 +1344,9 @@ def assert_panelnd_equal(left, right,
by_blocks : bool, default False
Specify how to compare internal data. If False, compare by columns.
If True, compare by blocks.
obj : str, default 'Panel'
Specify the object name being compared, internally used to show
the appropriate assertion message.
"""

if check_panel_type:
Expand Down Expand Up @@ -1404,10 +1408,30 @@ def assert_sp_array_equal(left, right):


def assert_sp_series_equal(left, right, exact_indices=True,
check_names=True, obj='SparseSeries'):
check_series_type=True,
check_names=True,
obj='SparseSeries'):
"""Check that the left and right SparseSeries are equal.
Parameters
----------
left : SparseSeries
right : SparseSeries
exact_indices : bool, default True
check_series_type : bool, default True
Whether to check the SparseSeries class is identical.
check_names : bool, default True
Whether to check the SparseSeries name attribute.
obj : str, default 'SparseSeries'
Specify the object name being compared, internally used to show
the appropriate assertion message.
"""
assertIsInstance(left, pd.SparseSeries, '[SparseSeries]')
assertIsInstance(right, pd.SparseSeries, '[SparseSeries]')

if check_series_type:
assert_class_equal(left, right, obj=obj)

assert_index_equal(left.index, right.index,
obj='{0}.index'.format(obj))

Expand All @@ -1421,14 +1445,29 @@ def assert_sp_series_equal(left, right, exact_indices=True,


def assert_sp_frame_equal(left, right, exact_indices=True,
check_frame_type=True,
obj='SparseDataFrame'):
"""
exact: Series SparseIndex objects must be exactly the same, otherwise just
compare dense representations
"""Check that the left and right SparseDataFrame are equal.
Parameters
----------
left : SparseDataFrame
right : SparseDataFrame
exact_indices : bool, default True
SparseSeries SparseIndex objects must be exactly the same,
otherwise just compare dense representations.
check_frame_type : bool, default True
Whether to check the SparseDataFrame class is identical.
obj : str, default 'SparseDataFrame'
Specify the object name being compared, internally used to show
the appropriate assertion message.
"""
assertIsInstance(left, pd.SparseDataFrame, '[SparseDataFrame]')
assertIsInstance(right, pd.SparseDataFrame, '[SparseDataFrame]')

if check_frame_type:
assert_class_equal(left, right, obj=obj)

assert_index_equal(left.index, right.index,
obj='{0}.index'.format(obj))
assert_index_equal(left.columns, right.columns,
Expand Down Expand Up @@ -2607,6 +2646,30 @@ def _constructor_sliced(self):
return SubclassedSeries


class SubclassedSparseSeries(pd.SparseSeries):
_metadata = ['testattr']

@property
def _constructor(self):
return SubclassedSparseSeries

@property
def _constructor_expanddim(self):
return SubclassedSparseDataFrame


class SubclassedSparseDataFrame(pd.SparseDataFrame):
_metadata = ['testattr']

@property
def _constructor(self):
return SubclassedSparseDataFrame

@property
def _constructor_sliced(self):
return SubclassedSparseSeries


@contextmanager
def patch(ob, attr, value):
"""Temporarily patch an attribute of an object.
Expand Down

0 comments on commit a7f7e1d

Please sign in to comment.