From a7f7e1d18baa36afe9317aa48fbcf170b0375318 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sa=C5=A1o=20Stanovnik?= <sstanovnik@gmail.com>
Date: Mon, 25 Jul 2016 16:58:27 +0200
Subject: [PATCH] BUG: Fix slicing subclasses of SparseDataFrames.

Use proper subclassing behaviour so subclasses work properly: this fixes
an issue where a multi-element slice of a subclass of SparseDataFrame
returned the SparseDataFrame type instead of the subclass type.

closes #13787
---
 doc/source/whatsnew/v0.19.0.txt      |  2 +
 pandas/io/tests/test_pickle.py       |  8 +++
 pandas/sparse/frame.py               | 23 +++++----
 pandas/sparse/series.py              | 12 ++---
 pandas/tests/frame/test_subclass.py  | 30 ++++++++++++
 pandas/tests/series/test_subclass.py | 24 +++++++++
 pandas/util/testing.py               | 73 ++++++++++++++++++++++++++--
 7 files changed, 151 insertions(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index d440ff748292e..6de22272c65e6 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -380,6 +380,8 @@ API changes
 - ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`)
 - ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`)
 - ``pd.read_hdf`` will now raise a ``ValueError`` instead of ``KeyError``, if a mode other than ``r``, ``r+`` and ``a`` is supplied. (:issue:`13623`)
+- Subclassed ``SparseDataFrame`` and ``SparseSeries`` now preserve class types when slicing or transposing. (:issue:`13787`)
+
 
 
 .. _whatsnew_0190.api.tolist:
diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py
index 6019144d59698..94885d90d3c4a 100644
--- a/pandas/io/tests/test_pickle.py
+++ b/pandas/io/tests/test_pickle.py
@@ -86,6 +86,14 @@ def compare(self, vf, version):
                 comparator(result, expected, typ, version)
         return data
 
+    def compare_sp_series_ts(self, res, exp, typ, version):
+        # SparseTimeSeries integrated into SparseSeries in 0.12.0
+        # and deprecated in 0.17.0
+        if version and LooseVersion(version) <= "0.12.0":
+            tm.assert_sp_series_equal(res, exp, check_series_type=False)
+        else:
+            tm.assert_sp_series_equal(res, exp)
+
     def compare_series_ts(self, result, expected, typ, version):
         # GH 7748
         tm.assert_series_equal(result, expected)
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
index 811d8019c7fee..2ea0536ca4fbb 100644
--- a/pandas/sparse/frame.py
+++ b/pandas/sparse/frame.py
@@ -188,7 +188,7 @@ def _init_matrix(self, data, index, columns, dtype=None):
         return self._init_dict(data, index, columns, dtype)
 
     def __array_wrap__(self, result):
-        return SparseDataFrame(
+        return self._constructor(
             result, index=self.index, columns=self.columns,
             default_kind=self._default_kind,
             default_fill_value=self._default_fill_value).__finalize__(self)
@@ -407,7 +407,7 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
             raise NotImplementedError("'level' argument is not supported")
 
         if self.empty and other.empty:
-            return SparseDataFrame(index=new_index).__finalize__(self)
+            return self._constructor(index=new_index).__finalize__(self)
 
         new_data = {}
         new_fill_value = None
@@ -519,7 +519,8 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
                 return self
 
         if len(self.index) == 0:
-            return SparseDataFrame(index=index, columns=self.columns)
+            return self._constructor(
+                index=index, columns=self.columns).__finalize__(self)
 
         indexer = self.index.get_indexer(index, method, limit=limit)
         indexer = _ensure_platform_int(indexer)
@@ -540,8 +541,9 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
 
             new_series[col] = new
 
-        return SparseDataFrame(new_series, index=index, columns=self.columns,
-                               default_fill_value=self._default_fill_value)
+        return self._constructor(
+            new_series, index=index, columns=self.columns,
+            default_fill_value=self._default_fill_value).__finalize__(self)
 
     def _reindex_columns(self, columns, copy, level, fill_value, limit=None,
                          takeable=False):
@@ -556,8 +558,9 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None,
 
         # TODO: fill value handling
         sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns)
-        return SparseDataFrame(sdict, index=self.index, columns=columns,
-                               default_fill_value=self._default_fill_value)
+        return self._constructor(
+            sdict, index=self.index, columns=columns,
+            default_fill_value=self._default_fill_value).__finalize__(self)
 
     def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
                                limit=None, copy=False, allow_dups=False):
@@ -586,8 +589,8 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
             else:
                 new_arrays[col] = self[col]
 
-        return SparseDataFrame(new_arrays, index=index,
-                               columns=columns).__finalize__(self)
+        return self._constructor(new_arrays, index=index,
+                                 columns=columns).__finalize__(self)
 
     def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
                      sort=False):
@@ -644,7 +647,7 @@ def transpose(self, *args, **kwargs):
         Returns a DataFrame with the rows/columns switched.
         """
         nv.validate_transpose(args, kwargs)
-        return SparseDataFrame(
+        return self._constructor(
             self.values.T, index=self.columns, columns=self.index,
             default_fill_value=self._default_fill_value,
             default_kind=self._default_kind).__finalize__(self)
diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
index 951c2ae0c0d5a..6c4392dbf7cb4 100644
--- a/pandas/sparse/series.py
+++ b/pandas/sparse/series.py
@@ -63,11 +63,11 @@ def wrapper(self, other):
                 new_fill_value = op(np.float64(self.fill_value),
                                     np.float64(other))
 
-            return SparseSeries(op(self.sp_values, other),
-                                index=self.index,
-                                sparse_index=self.sp_index,
-                                fill_value=new_fill_value,
-                                name=self.name)
+            return self._constructor(op(self.sp_values, other),
+                                     index=self.index,
+                                     sparse_index=self.sp_index,
+                                     fill_value=new_fill_value,
+                                     name=self.name)
         else:  # pragma: no cover
             raise TypeError('operation with %s not supported' % type(other))
 
@@ -85,7 +85,7 @@ def _sparse_series_op(left, right, op, name):
     new_name = _maybe_match_name(left, right)
 
     result = _sparse_array_op(left, right, op, name)
-    return SparseSeries(result, index=new_index, name=new_name)
+    return left._constructor(result, index=new_index, name=new_name)
 
 
 class SparseSeries(Series):
diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py
index ee12d9e84511c..0e0ee75a30c84 100644
--- a/pandas/tests/frame/test_subclass.py
+++ b/pandas/tests/frame/test_subclass.py
@@ -210,3 +210,33 @@ def test_subclass_align_combinations(self):
         tm.assert_series_equal(res1, exp2)
         tm.assertIsInstance(res2, tm.SubclassedDataFrame)
         tm.assert_frame_equal(res2, exp1)
+
+    def test_subclass_sparse_slice(self):
+        rows = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]
+        ssdf = tm.SubclassedSparseDataFrame(rows)
+        ssdf.testattr = "testattr"
+
+        tm.assert_sp_frame_equal(ssdf.loc[:2],
+                                 tm.SubclassedSparseDataFrame(rows[:3]))
+        tm.assert_sp_frame_equal(ssdf.iloc[:2],
+                                 tm.SubclassedSparseDataFrame(rows[:2]))
+        tm.assert_sp_frame_equal(ssdf[:2],
+                                 tm.SubclassedSparseDataFrame(rows[:2]))
+        tm.assert_equal(ssdf.loc[:2].testattr, "testattr")
+        tm.assert_equal(ssdf.iloc[:2].testattr, "testattr")
+        tm.assert_equal(ssdf[:2].testattr, "testattr")
+
+        tm.assert_sp_series_equal(ssdf.loc[1],
+                                  tm.SubclassedSparseSeries(rows[1]),
+                                  check_names=False)
+        tm.assert_sp_series_equal(ssdf.iloc[1],
+                                  tm.SubclassedSparseSeries(rows[1]),
+                                  check_names=False)
+
+    def test_subclass_sparse_transpose(self):
+        ossdf = tm.SubclassedSparseDataFrame([[1, 2, 3],
+                                              [4, 5, 6]])
+        essdf = tm.SubclassedSparseDataFrame([[1, 4],
+                                              [2, 5],
+                                              [3, 6]])
+        tm.assert_sp_frame_equal(ossdf.T, essdf)
diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py
index 016113961ec74..dabecefaee9d1 100644
--- a/pandas/tests/series/test_subclass.py
+++ b/pandas/tests/series/test_subclass.py
@@ -31,3 +31,27 @@ def test_to_frame(self):
         exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd'))
         tm.assert_frame_equal(res, exp)
         tm.assertIsInstance(res, tm.SubclassedDataFrame)
+
+    def test_subclass_sparse_slice(self):
+        s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
+        tm.assert_sp_series_equal(s.loc[1:3],
+                                  tm.SubclassedSparseSeries([2.0, 3.0, 4.0],
+                                                            index=[1, 2, 3]))
+        tm.assert_sp_series_equal(s.iloc[1:3],
+                                  tm.SubclassedSparseSeries([2.0, 3.0],
+                                                            index=[1, 2]))
+        tm.assert_sp_series_equal(s[1:3],
+                                  tm.SubclassedSparseSeries([2.0, 3.0],
+                                                            index=[1, 2]))
+
+    def test_subclass_sparse_addition(self):
+        s1 = tm.SubclassedSparseSeries([1, 3, 5])
+        s2 = tm.SubclassedSparseSeries([-2, 5, 12])
+        tm.assert_sp_series_equal(s1 + s2,
+                                  tm.SubclassedSparseSeries([-1.0, 8.0, 17.0]))
+
+    def test_subclass_sparse_to_frame(self):
+        s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx')
+        res = s.to_frame()
+        exp = tm.SubclassedSparseDataFrame({'xxx': [1, 2]}, index=list('abcd'))
+        tm.assert_sp_frame_equal(res, exp)
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index e49d92e4ab202..e4a84ea4ae296 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1322,7 +1322,8 @@ def assert_panelnd_equal(left, right,
                          check_less_precise=False,
                          assert_func=assert_frame_equal,
                          check_names=False,
-                         by_blocks=False):
+                         by_blocks=False,
+                         obj='Panel'):
     """Check that left and right Panels are equal.
 
     Parameters
@@ -1343,6 +1344,9 @@ def assert_panelnd_equal(left, right,
     by_blocks : bool, default False
         Specify how to compare internal data. If False, compare by columns.
         If True, compare by blocks.
+    obj : str, default 'Panel'
+        Specify the object name being compared, internally used to show
+        the appropriate assertion message.
     """
 
     if check_panel_type:
@@ -1404,10 +1408,30 @@ def assert_sp_array_equal(left, right):
 
 
 def assert_sp_series_equal(left, right, exact_indices=True,
-                           check_names=True, obj='SparseSeries'):
+                           check_series_type=True,
+                           check_names=True,
+                           obj='SparseSeries'):
+    """Check that the left and right SparseSeries are equal.
+
+    Parameters
+    ----------
+    left : SparseSeries
+    right : SparseSeries
+    exact_indices : bool, default True
+    check_series_type : bool, default True
+        Whether to check the SparseSeries class is identical.
+    check_names : bool, default True
+        Whether to check the SparseSeries name attribute.
+    obj : str, default 'SparseSeries'
+        Specify the object name being compared, internally used to show
+        the appropriate assertion message.
+    """
     assertIsInstance(left, pd.SparseSeries, '[SparseSeries]')
     assertIsInstance(right, pd.SparseSeries, '[SparseSeries]')
 
+    if check_series_type:
+        assert_class_equal(left, right, obj=obj)
+
     assert_index_equal(left.index, right.index,
                        obj='{0}.index'.format(obj))
 
@@ -1421,14 +1445,29 @@ def assert_sp_series_equal(left, right, exact_indices=True,
 
 
 def assert_sp_frame_equal(left, right, exact_indices=True,
+                          check_frame_type=True,
                           obj='SparseDataFrame'):
-    """
-    exact: Series SparseIndex objects must be exactly the same, otherwise just
-    compare dense representations
+    """Check that the left and right SparseDataFrame are equal.
+
+    Parameters
+    ----------
+    left : SparseDataFrame
+    right : SparseDataFrame
+    exact_indices : bool, default True
+        SparseSeries SparseIndex objects must be exactly the same,
+        otherwise just compare dense representations.
+    check_frame_type : bool, default True
+        Whether to check the SparseDataFrame class is identical.
+    obj : str, default 'SparseDataFrame'
+        Specify the object name being compared, internally used to show
+        the appropriate assertion message.
     """
     assertIsInstance(left, pd.SparseDataFrame, '[SparseDataFrame]')
     assertIsInstance(right, pd.SparseDataFrame, '[SparseDataFrame]')
 
+    if check_frame_type:
+        assert_class_equal(left, right, obj=obj)
+
     assert_index_equal(left.index, right.index,
                        obj='{0}.index'.format(obj))
     assert_index_equal(left.columns, right.columns,
@@ -2607,6 +2646,30 @@ def _constructor_sliced(self):
         return SubclassedSeries
 
 
+class SubclassedSparseSeries(pd.SparseSeries):
+    _metadata = ['testattr']
+
+    @property
+    def _constructor(self):
+        return SubclassedSparseSeries
+
+    @property
+    def _constructor_expanddim(self):
+        return SubclassedSparseDataFrame
+
+
+class SubclassedSparseDataFrame(pd.SparseDataFrame):
+    _metadata = ['testattr']
+
+    @property
+    def _constructor(self):
+        return SubclassedSparseDataFrame
+
+    @property
+    def _constructor_sliced(self):
+        return SubclassedSparseSeries
+
+
 @contextmanager
 def patch(ob, attr, value):
     """Temporarily patch an attribute of an object.