DEPR: SparseArray.astype (pandas-dev#49324)

* DEPR: SparseArray.astype * fix append test * remove no-longer-overriden tests
phofl · Nov 9, 2022 · 273f0fe · 273f0fe
1 parent 6aba4e8
commit 273f0fe
Show file tree

Hide file tree

Showing 8 changed files with 27 additions and 90 deletions.
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -285,6 +285,7 @@ Removal of prior version deprecations/changes
 - Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
 - Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`)
 - Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
+- Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`)
 - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
 - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
 - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -120,6 +120,8 @@ class ellipsis(Enum):
 
     SparseIndexKind = Literal["integer", "block"]
 
+    from pandas.core.dtypes.dtypes import ExtensionDtype
+
     from pandas import Series
 
 else:
@@ -1328,14 +1330,13 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
         future_dtype = pandas_dtype(dtype)
         if not isinstance(future_dtype, SparseDtype):
             # GH#34457
-            warnings.warn(
-                "The behavior of .astype from SparseDtype to a non-sparse dtype "
-                "is deprecated. In a future version, this will return a non-sparse "
-                "array with the requested dtype. To retain the old behavior, use "
-                "`obj.astype(SparseDtype(dtype))`",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
+            if isinstance(future_dtype, np.dtype):
+                values = np.array(self)
+                return astype_nansafe(values, dtype=future_dtype)
+            else:
+                dtype = cast(ExtensionDtype, dtype)
+                cls = dtype.construct_array_type()
+                return cls._from_sequence(self, dtype=dtype, copy=copy)
 
         dtype = self.dtype.update_dtype(dtype)
         subtype = pandas_dtype(dtype._subtype_with_str)

diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py
@@ -39,12 +39,9 @@ def test_astype(self):
 
     def test_astype_bool(self):
         a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
-        with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
-            result = a.astype(bool)
-        expected = SparseArray(
-            [True, False, False, True], dtype=SparseDtype(bool, False)
-        )
-        tm.assert_sp_array_equal(result, expected)
+        result = a.astype(bool)
+        expected = np.array([1, 0, 0, 1], dtype=bool)
+        tm.assert_numpy_array_equal(result, expected)
 
         # update fill value
         result = a.astype(SparseDtype(bool, False))
@@ -57,12 +54,8 @@ def test_astype_all(self, any_real_numpy_dtype):
         vals = np.array([1, 2, 3])
         arr = SparseArray(vals, fill_value=1)
         typ = np.dtype(any_real_numpy_dtype)
-        with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
-            res = arr.astype(typ)
-        assert res.dtype == SparseDtype(typ, 1)
-        assert res.sp_values.dtype == typ
-
-        tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ))
+        res = arr.astype(typ)
+        tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype))
 
     @pytest.mark.parametrize(
         "arr, dtype, expected",
@@ -100,22 +93,13 @@ def test_astype_all(self, any_real_numpy_dtype):
         ],
     )
     def test_astype_more(self, arr, dtype, expected):
-
-        if isinstance(dtype, SparseDtype):
-            warn = None
-        else:
-            warn = FutureWarning
-
-        with tm.assert_produces_warning(warn, match="astype from SparseDtype"):
-            result = arr.astype(dtype)
+        result = arr.astype(arr.dtype.update_dtype(dtype))
         tm.assert_sp_array_equal(result, expected)
 
     def test_astype_nan_raises(self):
         arr = SparseArray([1.0, np.nan])
         with pytest.raises(ValueError, match="Cannot convert non-finite"):
-            msg = "astype from SparseDtype"
-            with tm.assert_produces_warning(FutureWarning, match=msg):
-                arr.astype(int)
+            arr.astype(int)
 
     def test_astype_copy_false(self):
         # GH#34456 bug caused by using .view instead of .astype in astype_nansafe

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
@@ -4,7 +4,6 @@
 import pytest
 
 import pandas as pd
-import pandas._testing as tm
 from pandas.api.extensions import ExtensionArray
 from pandas.core.internals.blocks import EABackedBlock
 from pandas.tests.extension.base.base import BaseExtensionTests
@@ -319,23 +318,13 @@ def test_unstack(self, data, index, obj):
                 alt = df.unstack(level=level).droplevel(0, axis=1)
                 self.assert_frame_equal(result, alt)
 
-            if obj == "series":
-                is_sparse = isinstance(ser.dtype, pd.SparseDtype)
-            else:
-                is_sparse = isinstance(ser.dtypes.iat[0], pd.SparseDtype)
-            warn = None if not is_sparse else FutureWarning
-            with tm.assert_produces_warning(warn, match="astype from Sparse"):
-                obj_ser = ser.astype(object)
+            obj_ser = ser.astype(object)
 
             expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
-            if obj == "series" and not is_sparse:
-                # GH#34457 SparseArray.astype(object) gives Sparse[object]
-                #  instead of np.dtype(object)
+            if obj == "series":
                 assert (expected.dtypes == object).all()
 
-            with tm.assert_produces_warning(warn, match="astype from Sparse"):
-                result = result.astype(object)
-
+            result = result.astype(object)
             self.assert_frame_equal(result, expected)
 
     def test_ravel(self, data):

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
@@ -19,8 +19,6 @@
 
 from pandas.errors import PerformanceWarning
 
-from pandas.core.dtypes.common import is_object_dtype
-
 import pandas as pd
 from pandas import SparseDtype
 import pandas._testing as tm
@@ -159,10 +157,7 @@ def test_concat_mixed_dtypes(self, data):
         ],
     )
     def test_stack(self, data, columns):
-        with tm.assert_produces_warning(
-            FutureWarning, check_stacklevel=False, match="astype from Sparse"
-        ):
-            super().test_stack(data, columns)
+        super().test_stack(data, columns)
 
     def test_concat_columns(self, data, na_value):
         self._check_unsupported(data)
@@ -385,33 +380,11 @@ def test_equals(self, data, na_value, as_series, box):
 
 
 class TestCasting(BaseSparseTests, base.BaseCastingTests):
-    def test_astype_object_series(self, all_data):
-        # Unlike the base class, we do not expect the resulting Block
-        #  to be ObjectBlock / resulting array to be np.dtype("object")
-        ser = pd.Series(all_data, name="A")
-        with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
-            result = ser.astype(object)
-        assert is_object_dtype(result.dtype)
-        assert is_object_dtype(result._mgr.array.dtype)
-
-    def test_astype_object_frame(self, all_data):
-        # Unlike the base class, we do not expect the resulting Block
-        #  to be ObjectBlock / resulting array to be np.dtype("object")
-        df = pd.DataFrame({"A": all_data})
-
-        with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
-            result = df.astype(object)
-        assert is_object_dtype(result._mgr.arrays[0].dtype)
-
-        # check that we can compare the dtypes
-        comp = result.dtypes == df.dtypes
-        assert not comp.any()
-
     def test_astype_str(self, data):
-        with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
-            result = pd.Series(data[:5]).astype(str)
-        expected_dtype = SparseDtype(str, str(data.fill_value))
-        expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype)
+        # pre-2.0 this would give a SparseDtype even if the user asked
+        #  for a non-sparse dtype.
+        result = pd.Series(data[:5]).astype(str)
+        expected = pd.Series([str(x) for x in data[:5]], dtype=object)
         self.assert_series_equal(result, expected)
 
     @pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype")

diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py
@@ -241,12 +241,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
     def test_other_dtypes(self, data, dtype, using_array_manager):
         df = DataFrame(data, dtype=dtype)
 
-        warn = None
-        if using_array_manager and isinstance(dtype, pd.SparseDtype):
-            warn = FutureWarning
-
-        with tm.assert_produces_warning(warn, match="astype from SparseDtype"):
-            result = df._append(df.iloc[0]).iloc[-1]
+        result = df._append(df.iloc[0]).iloc[-1]
 
         expected = Series(data, name=0, dtype=dtype)
         tm.assert_series_equal(result, expected)

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -1300,10 +1300,6 @@ def test_loc_getitem_time_object(self, frame_or_series):
     @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
     @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
     @td.skip_if_no_scipy
-    @pytest.mark.filterwarnings(
-        # TODO(2.0): remove filtering; note only needed for using_array_manager
-        "ignore:The behavior of .astype from SparseDtype.*FutureWarning"
-    )
     def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
         import scipy.sparse
 

diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
@@ -85,9 +85,7 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
     name = "name"  # op(pd.Series, array) preserves the name.
     series = pd.Series(a1, name=name)
 
-    warn = None if not sparse else FutureWarning
-    with tm.assert_produces_warning(warn):
-        other = pd.Index(a2, name=name).astype("int64")
+    other = pd.Index(a2, name=name).astype("int64")
 
     array_args = (a1, a2)
     series_args = (series, other)  # ufunc(series, array)