DEPR: deprecate .as_blocks() (#17656)

closes #17302
pandas-dev · Sep 25, 2017 · 868389d · 868389d
1 parent 4c9e98d
commit 868389d
Show file tree

Hide file tree

Showing 11 changed files with 79 additions and 49 deletions.
diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -95,17 +95,7 @@ will be completed:
    df2.append             df2.combine_first
    df2.apply              df2.compound
    df2.applymap           df2.consolidate
-   df2.as_blocks          df2.convert_objects
-   df2.asfreq             df2.copy
-   df2.as_matrix          df2.corr
-   df2.astype             df2.corrwith
-   df2.at                 df2.count
-   df2.at_time            df2.cov
-   df2.axes               df2.cummax
-   df2.B                  df2.cummin
-   df2.between_time       df2.cumprod
-   df2.bfill              df2.cumsum
-   df2.blocks             df2.D
+   df2.D
 
 As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically
 tab completed. ``E`` is there as well; the rest of the attributes have been

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -488,10 +488,9 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
-
 - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
-
 - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
+- :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
 
 .. _whatsnew_0210.prior_deprecations:
 

diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py
@@ -165,7 +165,7 @@ def _has_bool_dtype(x):
         return x.dtype == bool
     except AttributeError:
         try:
-            return 'bool' in x.blocks
+            return 'bool' in x.dtypes
         except AttributeError:
             return isinstance(x, (bool, np.bool_))
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1650,7 +1650,7 @@ def to_xarray(self):
                                 coords=coords,
                                 )
 
-    _shared_docs['to_latex'] = """
+    _shared_docs['to_latex'] = r"""
         Render an object to a tabular environment table. You can splice
         this into a LaTeX document. Requires \\usepackage{booktabs}.
 
@@ -3271,7 +3271,7 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
         locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
         return self.take(locs, axis=axis, is_copy=False)
 
-    _shared_docs['pipe'] = ("""
+    _shared_docs['pipe'] = (r"""
         Apply func(self, \*args, \*\*kwargs)
 
         Parameters
@@ -3692,39 +3692,43 @@ def as_blocks(self, copy=True):
         Convert the frame to a dict of dtype -> Constructor Types that each has
         a homogeneous dtype.
 
+        .. deprecated:: 0.21.0
+
         NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in
               as_matrix)
 
         Parameters
         ----------
         copy : boolean, default True
 
-               .. versionadded: 0.16.1
-
         Returns
         -------
         values : a dict of dtype -> Constructor Types
         """
-        self._consolidate_inplace()
-
-        bd = {}
-        for b in self._data.blocks:
-            bd.setdefault(str(b.dtype), []).append(b)
-
-        result = {}
-        for dtype, blocks in bd.items():
-            # Must combine even after consolidation, because there may be
-            # sparse items which are never consolidated into one block.
-            combined = self._data.combine(blocks, copy=copy)
-            result[dtype] = self._constructor(combined).__finalize__(self)
-
-        return result
+        warnings.warn("as_blocks is deprecated and will "
+                      "be removed in a future version",
+                      FutureWarning, stacklevel=2)
+        return self._to_dict_of_blocks(copy=copy)
 
     @property
     def blocks(self):
-        """Internal property, property synonym for as_blocks()"""
+        """
+        Internal property, property synonym for as_blocks()
+
+        .. deprecated:: 0.21.0
+        """
         return self.as_blocks()
 
+    def _to_dict_of_blocks(self, copy=True):
+        """
+        Return a dict of dtype -> Constructor Types that
+        each is a homogeneous dtype.
+
+        Internal ONLY
+        """
+        return {k: self._constructor(v).__finalize__(self)
+                for k, v, in self._data.to_dict(copy=copy).items()}
+
     @deprecate_kwarg(old_arg_name='raise_on_error', new_arg_name='errors',
                      mapping={True: 'raise', False: 'ignore'})
     def astype(self, dtype, copy=True, errors='raise', **kwargs):
@@ -3931,13 +3935,12 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
         -------
         converted : same as input object
         """
-        from warnings import warn
         msg = ("convert_objects is deprecated.  To re-infer data dtypes for "
                "object columns, use {klass}.infer_objects()\nFor all "
                "other conversions use the data-type specific converters "
                "pd.to_datetime, pd.to_timedelta and pd.to_numeric."
                ).format(klass=self.__class__.__name__)
-        warn(msg, FutureWarning, stacklevel=2)
+        warnings.warn(msg, FutureWarning, stacklevel=2)
 
         return self._constructor(
             self._data.convert(convert_dates=convert_dates,
@@ -4310,9 +4313,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
             raise AssertionError("'to_replace' must be 'None' if 'regex' is "
                                  "not a bool")
         if axis is not None:
-            from warnings import warn
-            warn('the "axis" argument is deprecated and will be removed in'
-                 'v0.13; this argument has no effect')
+            warnings.warn('the "axis" argument is deprecated '
+                          'and will be removed in'
+                          'v0.13; this argument has no effect')
 
         self._consolidate_inplace()
 

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -3583,6 +3583,31 @@ def _interleave(self):
 
         return result
 
+    def to_dict(self, copy=True):
+        """
+        Return a dict of str(dtype) -> BlockManager
+
+        Parameters
+        ----------
+        copy : boolean, default True
+
+        Returns
+        -------
+        values : a dict of dtype -> BlockManager
+
+        Notes
+        -----
+        This consolidates based on str(dtype)
+        """
+        self._consolidate_inplace()
+
+        bd = {}
+        for b in self.blocks:
+            bd.setdefault(str(b.dtype), []).append(b)
+
+        return {dtype: self.combine(blocks, copy=copy)
+                for dtype, blocks in bd.items()}
+
     def xs(self, key, axis=1, copy=True, takeable=False):
         if axis < 1:
             raise AssertionError('Can only take xs across axis >= 1, got %d' %

diff --git a/pandas/core/window.py b/pandas/core/window.py
@@ -141,7 +141,7 @@ def _create_blocks(self, how):
             if obj.ndim == 2:
                 obj = obj.reindex(columns=obj.columns.difference([self.on]),
                                   copy=False)
-        blocks = obj.as_blocks(copy=False).values()
+        blocks = obj._to_dict_of_blocks(copy=False).values()
 
         return blocks, obj, index
 

diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -320,7 +320,11 @@ def test_copy_blocks(self):
         column = df.columns[0]
 
         # use the default copy=True, change a column
-        blocks = df.as_blocks()
+
+        # deprecated 0.21.0
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            blocks = df.as_blocks()
         for dtype, _df in blocks.items():
             if column in _df:
                 _df.loc[:, column] = _df[column] + 1
@@ -334,7 +338,11 @@ def test_no_copy_blocks(self):
         column = df.columns[0]
 
         # use the copy=False, change a column
-        blocks = df.as_blocks(copy=False)
+
+        # deprecated 0.21.0
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            blocks = df.as_blocks(copy=False)
         for dtype, _df in blocks.items():
             if column in _df:
                 _df.loc[:, column] = _df[column] + 1

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -1766,7 +1766,7 @@ def test_from_records_sequencelike(self):
 
         # this is actually tricky to create the recordlike arrays and
         # have the dtypes be intact
-        blocks = df.blocks
+        blocks = df._to_dict_of_blocks()
         tuples = []
         columns = []
         dtypes = []
@@ -1841,8 +1841,9 @@ def test_from_records_dictlike(self):
 
         # columns is in a different order here than the actual items iterated
         # from the dict
+        blocks = df._to_dict_of_blocks()
         columns = []
-        for dtype, b in compat.iteritems(df.blocks):
+        for dtype, b in compat.iteritems(blocks):
             columns.extend(b.columns)
 
         asdict = dict((x, y) for x, y in compat.iteritems(df))

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -469,10 +469,11 @@ def test_set_change_dtype_slice(self):  # GH8850
         df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols)
         df['2nd'] = df['2nd'] * 2.0
 
-        assert sorted(df.blocks.keys()) == ['float64', 'int64']
-        assert_frame_equal(df.blocks['float64'], DataFrame(
+        blocks = df._to_dict_of_blocks()
+        assert sorted(blocks.keys()) == ['float64', 'int64']
+        assert_frame_equal(blocks['float64'], DataFrame(
             [[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]))
-        assert_frame_equal(df.blocks['int64'], DataFrame(
+        assert_frame_equal(blocks['int64'], DataFrame(
             [[3], [6]], columns=cols[2:]))
 
     def test_copy(self, mgr):

diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
@@ -1099,7 +1099,10 @@ def test_as_blocks(self):
         df = SparseDataFrame({'A': [1.1, 3.3], 'B': [nan, -3.9]},
                              dtype='float64')
 
-        df_blocks = df.blocks
+        # deprecated 0.21.0
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            df_blocks = df.blocks
         assert list(df_blocks.keys()) == ['float64']
         tm.assert_frame_equal(df_blocks['float64'], df)
 

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -1385,8 +1385,8 @@ def assert_frame_equal(left, right, check_dtype=True,
 
     # compare by blocks
     if by_blocks:
-        rblocks = right.blocks
-        lblocks = left.blocks
+        rblocks = right._to_dict_of_blocks()
+        lblocks = left._to_dict_of_blocks()
         for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
             assert dtype in lblocks
             assert dtype in rblocks