From a7f7e1d18baa36afe9317aa48fbcf170b0375318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= Date: Mon, 25 Jul 2016 16:58:27 +0200 Subject: [PATCH] BUG: Fix slicing subclasses of SparseDataFrames. Use proper subclassing behaviour so subclasses work properly: this fixes an issue where a multi-element slice of a subclass of SparseDataFrame returned the SparseDataFrame type instead of the subclass type. closes #13787 --- doc/source/whatsnew/v0.19.0.txt | 2 + pandas/io/tests/test_pickle.py | 8 +++ pandas/sparse/frame.py | 23 +++++---- pandas/sparse/series.py | 12 ++--- pandas/tests/frame/test_subclass.py | 30 ++++++++++++ pandas/tests/series/test_subclass.py | 24 +++++++++ pandas/util/testing.py | 73 ++++++++++++++++++++++++++-- 7 files changed, 151 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index d440ff748292e..6de22272c65e6 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -380,6 +380,8 @@ API changes - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) - ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`) - ``pd.read_hdf`` will now raise a ``ValueError`` instead of ``KeyError``, if a mode other than ``r``, ``r+`` and ``a`` is supplied. (:issue:`13623`) +- Subclassed ``SparseDataFrame`` and ``SparseSeries`` now preserve class types when slicing or transposing. (:issue:`13787`) + .. _whatsnew_0190.api.tolist: diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 6019144d59698..94885d90d3c4a 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -86,6 +86,14 @@ def compare(self, vf, version): comparator(result, expected, typ, version) return data + def compare_sp_series_ts(self, res, exp, typ, version): + # SparseTimeSeries integrated into SparseSeries in 0.12.0 + # and deprecated in 0.17.0 + if version and LooseVersion(version) <= "0.12.0": + tm.assert_sp_series_equal(res, exp, check_series_type=False) + else: + tm.assert_sp_series_equal(res, exp) + def compare_series_ts(self, result, expected, typ, version): # GH 7748 tm.assert_series_equal(result, expected) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 811d8019c7fee..2ea0536ca4fbb 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -188,7 +188,7 @@ def _init_matrix(self, data, index, columns, dtype=None): return self._init_dict(data, index, columns, dtype) def __array_wrap__(self, result): - return SparseDataFrame( + return self._constructor( result, index=self.index, columns=self.columns, default_kind=self._default_kind, default_fill_value=self._default_fill_value).__finalize__(self) @@ -407,7 +407,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None): raise NotImplementedError("'level' argument is not supported") if self.empty and other.empty: - return SparseDataFrame(index=new_index).__finalize__(self) + return self._constructor(index=new_index).__finalize__(self) new_data = {} new_fill_value = None @@ -519,7 +519,8 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, return self if len(self.index) == 0: - return SparseDataFrame(index=index, columns=self.columns) + return self._constructor( + index=index, columns=self.columns).__finalize__(self) indexer = self.index.get_indexer(index, method, limit=limit) indexer = _ensure_platform_int(indexer) @@ -540,8 +541,9 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, new_series[col] = new - return SparseDataFrame(new_series, index=index, columns=self.columns, - default_fill_value=self._default_fill_value) + return self._constructor( + new_series, index=index, columns=self.columns, + default_fill_value=self._default_fill_value).__finalize__(self) def _reindex_columns(self, columns, copy, level, fill_value, limit=None, takeable=False): @@ -556,8 +558,9 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None, # TODO: fill value handling sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns) - return SparseDataFrame(sdict, index=self.index, columns=columns, - default_fill_value=self._default_fill_value) + return self._constructor( + sdict, index=self.index, columns=columns, + default_fill_value=self._default_fill_value).__finalize__(self) def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, limit=None, copy=False, allow_dups=False): @@ -586,8 +589,8 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, else: new_arrays[col] = self[col] - return SparseDataFrame(new_arrays, index=index, - columns=columns).__finalize__(self) + return self._constructor(new_arrays, index=index, + columns=columns).__finalize__(self) def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): @@ -644,7 +647,7 @@ def transpose(self, *args, **kwargs): Returns a DataFrame with the rows/columns switched. """ nv.validate_transpose(args, kwargs) - return SparseDataFrame( + return self._constructor( self.values.T, index=self.columns, columns=self.index, default_fill_value=self._default_fill_value, default_kind=self._default_kind).__finalize__(self) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 951c2ae0c0d5a..6c4392dbf7cb4 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -63,11 +63,11 @@ def wrapper(self, other): new_fill_value = op(np.float64(self.fill_value), np.float64(other)) - return SparseSeries(op(self.sp_values, other), - index=self.index, - sparse_index=self.sp_index, - fill_value=new_fill_value, - name=self.name) + return self._constructor(op(self.sp_values, other), + index=self.index, + sparse_index=self.sp_index, + fill_value=new_fill_value, + name=self.name) else: # pragma: no cover raise TypeError('operation with %s not supported' % type(other)) @@ -85,7 +85,7 @@ def _sparse_series_op(left, right, op, name): new_name = _maybe_match_name(left, right) result = _sparse_array_op(left, right, op, name) - return SparseSeries(result, index=new_index, name=new_name) + return left._constructor(result, index=new_index, name=new_name) class SparseSeries(Series): diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index ee12d9e84511c..0e0ee75a30c84 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -210,3 +210,33 @@ def test_subclass_align_combinations(self): tm.assert_series_equal(res1, exp2) tm.assertIsInstance(res2, tm.SubclassedDataFrame) tm.assert_frame_equal(res2, exp1) + + def test_subclass_sparse_slice(self): + rows = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] + ssdf = tm.SubclassedSparseDataFrame(rows) + ssdf.testattr = "testattr" + + tm.assert_sp_frame_equal(ssdf.loc[:2], + tm.SubclassedSparseDataFrame(rows[:3])) + tm.assert_sp_frame_equal(ssdf.iloc[:2], + tm.SubclassedSparseDataFrame(rows[:2])) + tm.assert_sp_frame_equal(ssdf[:2], + tm.SubclassedSparseDataFrame(rows[:2])) + tm.assert_equal(ssdf.loc[:2].testattr, "testattr") + tm.assert_equal(ssdf.iloc[:2].testattr, "testattr") + tm.assert_equal(ssdf[:2].testattr, "testattr") + + tm.assert_sp_series_equal(ssdf.loc[1], + tm.SubclassedSparseSeries(rows[1]), + check_names=False) + tm.assert_sp_series_equal(ssdf.iloc[1], + tm.SubclassedSparseSeries(rows[1]), + check_names=False) + + def test_subclass_sparse_transpose(self): + ossdf = tm.SubclassedSparseDataFrame([[1, 2, 3], + [4, 5, 6]]) + essdf = tm.SubclassedSparseDataFrame([[1, 4], + [2, 5], + [3, 6]]) + tm.assert_sp_frame_equal(ossdf.T, essdf) diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 016113961ec74..dabecefaee9d1 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -31,3 +31,27 @@ def test_to_frame(self): exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd')) tm.assert_frame_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedDataFrame) + + def test_subclass_sparse_slice(self): + s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) + tm.assert_sp_series_equal(s.loc[1:3], + tm.SubclassedSparseSeries([2.0, 3.0, 4.0], + index=[1, 2, 3])) + tm.assert_sp_series_equal(s.iloc[1:3], + tm.SubclassedSparseSeries([2.0, 3.0], + index=[1, 2])) + tm.assert_sp_series_equal(s[1:3], + tm.SubclassedSparseSeries([2.0, 3.0], + index=[1, 2])) + + def test_subclass_sparse_addition(self): + s1 = tm.SubclassedSparseSeries([1, 3, 5]) + s2 = tm.SubclassedSparseSeries([-2, 5, 12]) + tm.assert_sp_series_equal(s1 + s2, + tm.SubclassedSparseSeries([-1.0, 8.0, 17.0])) + + def test_subclass_sparse_to_frame(self): + s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx') + res = s.to_frame() + exp = tm.SubclassedSparseDataFrame({'xxx': [1, 2]}, index=list('abcd')) + tm.assert_sp_frame_equal(res, exp) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index e49d92e4ab202..e4a84ea4ae296 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1322,7 +1322,8 @@ def assert_panelnd_equal(left, right, check_less_precise=False, assert_func=assert_frame_equal, check_names=False, - by_blocks=False): + by_blocks=False, + obj='Panel'): """Check that left and right Panels are equal. Parameters @@ -1343,6 +1344,9 @@ def assert_panelnd_equal(left, right, by_blocks : bool, default False Specify how to compare internal data. If False, compare by columns. If True, compare by blocks. + obj : str, default 'Panel' + Specify the object name being compared, internally used to show + the appropriate assertion message. """ if check_panel_type: @@ -1404,10 +1408,30 @@ def assert_sp_array_equal(left, right): def assert_sp_series_equal(left, right, exact_indices=True, - check_names=True, obj='SparseSeries'): + check_series_type=True, + check_names=True, + obj='SparseSeries'): + """Check that the left and right SparseSeries are equal. + + Parameters + ---------- + left : SparseSeries + right : SparseSeries + exact_indices : bool, default True + check_series_type : bool, default True + Whether to check the SparseSeries class is identical. + check_names : bool, default True + Whether to check the SparseSeries name attribute. + obj : str, default 'SparseSeries' + Specify the object name being compared, internally used to show + the appropriate assertion message. + """ assertIsInstance(left, pd.SparseSeries, '[SparseSeries]') assertIsInstance(right, pd.SparseSeries, '[SparseSeries]') + if check_series_type: + assert_class_equal(left, right, obj=obj) + assert_index_equal(left.index, right.index, obj='{0}.index'.format(obj)) @@ -1421,14 +1445,29 @@ def assert_sp_series_equal(left, right, exact_indices=True, def assert_sp_frame_equal(left, right, exact_indices=True, + check_frame_type=True, obj='SparseDataFrame'): - """ - exact: Series SparseIndex objects must be exactly the same, otherwise just - compare dense representations + """Check that the left and right SparseDataFrame are equal. + + Parameters + ---------- + left : SparseDataFrame + right : SparseDataFrame + exact_indices : bool, default True + SparseSeries SparseIndex objects must be exactly the same, + otherwise just compare dense representations. + check_frame_type : bool, default True + Whether to check the SparseDataFrame class is identical. + obj : str, default 'SparseDataFrame' + Specify the object name being compared, internally used to show + the appropriate assertion message. """ assertIsInstance(left, pd.SparseDataFrame, '[SparseDataFrame]') assertIsInstance(right, pd.SparseDataFrame, '[SparseDataFrame]') + if check_frame_type: + assert_class_equal(left, right, obj=obj) + assert_index_equal(left.index, right.index, obj='{0}.index'.format(obj)) assert_index_equal(left.columns, right.columns, @@ -2607,6 +2646,30 @@ def _constructor_sliced(self): return SubclassedSeries +class SubclassedSparseSeries(pd.SparseSeries): + _metadata = ['testattr'] + + @property + def _constructor(self): + return SubclassedSparseSeries + + @property + def _constructor_expanddim(self): + return SubclassedSparseDataFrame + + +class SubclassedSparseDataFrame(pd.SparseDataFrame): + _metadata = ['testattr'] + + @property + def _constructor(self): + return SubclassedSparseDataFrame + + @property + def _constructor_sliced(self): + return SubclassedSparseSeries + + @contextmanager def patch(ob, attr, value): """Temporarily patch an attribute of an object.