pandas-dev · jreback · Jan 5, 2020 · Jan 3, 2020 · Jan 3, 2020 · Jan 3, 2020
diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst
@@ -399,7 +399,7 @@ DataFrame:
 * DataFrame
 * pandas.Index
 * pandas.Categorical
-* pandas.SparseArray
+* pandas.arrays.SparseArray
 
 If the exact type is not relevant, but must be compatible with a numpy
 array, array-like can be specified. If Any type that can be iterated is

diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
@@ -1951,7 +1951,7 @@ documentation sections for more on each type.
 | period            | :class:`PeriodDtype`      | :class:`Period`    | :class:`arrays.PeriodArray`   | ``'period[<freq>]'``,                   | :ref:`timeseries.periods`     |
 | (time spans)      |                           |                    |                               | ``'Period[<freq>]'``                    |                               |
 +-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
-| sparse            | :class:`SparseDtype`      | (none)             | :class:`SparseArray`          | ``'Sparse'``, ``'Sparse[int]'``,        | :ref:`sparse`                 |
+| sparse            | :class:`SparseDtype`      | (none)             | :class:`arrays.SparseArray`   | ``'Sparse'``, ``'Sparse[int]'``,        | :ref:`sparse`                 |
 |                   |                           |                    |                               | ``'Sparse[float]'``                     |                               |
 +-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+
 | intervals         | :class:`IntervalDtype`    | :class:`Interval`  | :class:`arrays.IntervalArray` | ``'interval'``, ``'Interval'``,         | :ref:`advanced.intervalindex` |

diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst
@@ -741,7 +741,7 @@ implementation takes precedence and a Series is returned.
    np.maximum(ser, idx)
 
 NumPy ufuncs are safe to apply to :class:`Series` backed by non-ndarray arrays,
-for example :class:`SparseArray` (see :ref:`sparse.calculation`). If possible,
+for example :class:`arrays.SparseArray` (see :ref:`sparse.calculation`). If possible,
 the ufunc is applied without converting the underlying data to an ndarray.
 
 Console display

diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst
@@ -444,13 +444,13 @@ Sparse data
 -----------
 
 Data where a single value is repeated many times (e.g. ``0`` or ``NaN``) may
-be stored efficiently as a :class:`SparseArray`.
+be stored efficiently as a :class:`arrays.SparseArray`.
 
 .. autosummary::
    :toctree: api/
    :template: autosummary/class_without_autosummary.rst
 
-   SparseArray
+   arrays.SparseArray
 
 .. autosummary::
    :toctree: api/

diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst
@@ -15,7 +15,7 @@ can be chosen, including 0) is omitted. The compressed values are not actually s
 
    arr = np.random.randn(10)
    arr[2:-2] = np.nan
-   ts = pd.Series(pd.SparseArray(arr))
+   ts = pd.Series(pd.arrays.SparseArray(arr))
    ts
 
 Notice the dtype, ``Sparse[float64, nan]``. The ``nan`` means that elements in the
@@ -51,7 +51,7 @@ identical to their dense counterparts.
 SparseArray
 -----------
 
-:class:`SparseArray` is a :class:`~pandas.api.extensions.ExtensionArray`
+:class:`arrays.SparseArray` is a :class:`~pandas.api.extensions.ExtensionArray`
 for storing an array of sparse values (see :ref:`basics.dtypes` for more
 on extension arrays). It is a 1-dimensional ndarray-like object storing
 only values distinct from the ``fill_value``:
@@ -61,7 +61,7 @@ only values distinct from the ``fill_value``:
    arr = np.random.randn(10)
    arr[2:5] = np.nan
    arr[7:8] = np.nan
-   sparr = pd.SparseArray(arr)
+   sparr = pd.arrays.SparseArray(arr)
    sparr
 
 A sparse array can be converted to a regular (dense) ndarray with :meth:`numpy.asarray`
@@ -144,7 +144,7 @@ to ``SparseArray`` and get a ``SparseArray`` as a result.
 
 .. ipython:: python
 
-   arr = pd.SparseArray([1., np.nan, np.nan, -2., np.nan])
+   arr = pd.arrays.SparseArray([1., np.nan, np.nan, -2., np.nan])
    np.abs(arr)
 
 
@@ -153,7 +153,7 @@ the correct dense result.
 
 .. ipython:: python
 
-   arr = pd.SparseArray([1., -1, -1, -2., -1], fill_value=-1)
+   arr = pd.arrays.SparseArray([1., -1, -1, -2., -1], fill_value=-1)
    np.abs(arr)
    np.abs(arr).to_dense()
 
@@ -194,7 +194,7 @@ From an array-like, use the regular :class:`Series` or
 .. ipython:: python
 
    # New way
-   pd.DataFrame({"A": pd.SparseArray([0, 1])})
+   pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])})
 
 From a SciPy sparse matrix, use :meth:`DataFrame.sparse.from_spmatrix`,
 
@@ -256,10 +256,10 @@ Instead, you'll need to ensure that the values being assigned are sparse
 
 .. ipython:: python
 
-   df = pd.DataFrame({"A": pd.SparseArray([0, 1])})
+   df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])})
    df['B'] = [0, 0]  # remains dense
    df['B'].dtype
-   df['B'] = pd.SparseArray([0, 0])
+   df['B'] = pd.arrays.SparseArray([0, 0])
    df['B'].dtype
 
 The ``SparseDataFrame.default_kind`` and ``SparseDataFrame.default_fill_value`` attributes

diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
@@ -1225,6 +1225,7 @@ Previously, sparse data were ``float64`` dtype by default, even if all inputs we
 As of v0.19.0, sparse data keeps the input dtype, and uses more appropriate ``fill_value`` defaults (``0`` for ``int64`` dtype, ``False`` for ``bool`` dtype).
 
 .. ipython:: python
+   :okwarning:
 
    pd.SparseArray([1, 2, 0, 0], dtype=np.int64)
    pd.SparseArray([True, False, False, False])

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -354,6 +354,7 @@ When passed DataFrames whose values are sparse, :func:`concat` will now return a
 :class:`Series` or :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (:issue:`25702`).
 
 .. ipython:: python
+   :okwarning:
 
    df = pd.DataFrame({"A": pd.SparseArray([0, 1])})
 
@@ -910,6 +911,7 @@ by a ``Series`` or ``DataFrame`` with sparse values.
 **New way**
 
 .. ipython:: python
+   :okwarning:
 
    df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])})
    df.dtypes

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -578,6 +578,7 @@ Deprecations
 - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
 - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
 - The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
+- ``pandas.SparseArray`` has been deprecated.  Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
 
 **Selecting Columns from a Grouped DataFrame**
 

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -115,7 +115,7 @@
     DataFrame,
 )
 
-from pandas.core.arrays.sparse import SparseArray, SparseDtype
+from pandas.core.arrays.sparse import SparseDtype
 
 from pandas.tseries.api import infer_freq
 from pandas.tseries import offsets
@@ -246,6 +246,19 @@ class Panel:
 
             return type(name, (), {})
 
+        elif name == "SparseArray":
+
+            warnings.warn(
+                "The pandas.SparseArray class is deprecated "
+                "and will be removed from pandas in a future version. "
+                "Use pandas.arrays.SparseArray instead.",
+                FutureWarning,
+                stacklevel=2,
+            )
+            from pandas.core.arrays.sparse import SparseArray as _SparseArray
+
+            return _SparseArray
+
         raise AttributeError(f"module 'pandas' has no attribute '{name}'")
 
 
@@ -308,6 +321,9 @@ def __getattr__(self, item):
 
     datetime = __Datetime().datetime
 
+    class SparseArray:
+        pass
+
 
 # module level doc-string
 __doc__ = """

diff --git a/pandas/_testing.py b/pandas/_testing.py
@@ -1492,7 +1492,7 @@ def assert_sp_array_equal(
         block indices.
     """
 
-    _check_isinstance(left, right, pd.SparseArray)
+    _check_isinstance(left, right, pd.arrays.SparseArray)
 
     assert_numpy_array_equal(left.sp_values, right.sp_values, check_dtype=check_dtype)
 

diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py
@@ -163,7 +163,7 @@ def to_dense(self):
 
         Examples
         --------
-        >>> series = pd.Series(pd.SparseArray([0, 1, 0]))
+        >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0]))
         >>> series
         0    0
         1    1
@@ -216,7 +216,7 @@ def from_spmatrix(cls, data, index=None, columns=None):
         -------
         DataFrame
             Each column of the DataFrame is stored as a
-            :class:`SparseArray`.
+            :class:`arrays.SparseArray`.
 
         Examples
         --------
@@ -251,7 +251,7 @@ def to_dense(self):
 
         Examples
         --------
-        >>> df = pd.DataFrame({"A": pd.SparseArray([0, 1, 0])})
+        >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])})
         >>> df.sparse.to_dense()
            A
         0  0

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -403,7 +403,7 @@ def from_spmatrix(cls, data):
         --------
         >>> import scipy.sparse
         >>> mat = scipy.sparse.coo_matrix((4, 1))
-        >>> pd.SparseArray.from_spmatrix(mat)
+        >>> pd.arrays.SparseArray.from_spmatrix(mat)
         [0.0, 0.0, 0.0, 0.0]
         Fill: 0.0
         IntIndex
@@ -1079,7 +1079,7 @@ def map(self, mapper):
 
         Examples
         --------
-        >>> arr = pd.SparseArray([0, 1, 2])
+        >>> arr = pd.arrays.SparseArray([0, 1, 2])
         >>> arr.apply(lambda x: x + 10)
         [10, 11, 12]
         Fill: 10

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -269,9 +269,9 @@ def is_sparse(arr) -> bool:
     --------
     Returns `True` if the parameter is a 1-D pandas sparse array.
 
-    >>> is_sparse(pd.SparseArray([0, 0, 1, 0]))
+    >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0]))
     True
-    >>> is_sparse(pd.Series(pd.SparseArray([0, 0, 1, 0])))
+    >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0])))
     True
 
     Returns `False` if the parameter is not sparse.
@@ -318,7 +318,7 @@ def is_scipy_sparse(arr) -> bool:
     >>> from scipy.sparse import bsr_matrix
     >>> is_scipy_sparse(bsr_matrix([1, 2, 3]))
     True
-    >>> is_scipy_sparse(pd.SparseArray([1, 2, 3]))
+    >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3]))
     False
     """
 
@@ -1467,7 +1467,7 @@ def is_bool_dtype(arr_or_dtype) -> bool:
     True
     >>> is_bool_dtype(pd.Categorical([True, False]))
     True
-    >>> is_bool_dtype(pd.SparseArray([True, False]))
+    >>> is_bool_dtype(pd.arrays.SparseArray([True, False]))
     True
     """
     if arr_or_dtype is None:
@@ -1529,7 +1529,7 @@ def is_extension_type(arr) -> bool:
     True
     >>> is_extension_type(pd.Series(cat))
     True
-    >>> is_extension_type(pd.SparseArray([1, 2, 3]))
+    >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3]))
     True
     >>> from scipy.sparse import bsr_matrix
     >>> is_extension_type(bsr_matrix([1, 2, 3]))

diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
@@ -67,7 +67,6 @@ class TestPDApi(Base):
         "RangeIndex",
         "UInt64Index",
         "Series",
-        "SparseArray",
         "SparseDtype",
         "StringDtype",
         "Timedelta",
@@ -91,7 +90,7 @@ class TestPDApi(Base):
         "NamedAgg",
     ]
     if not compat.PY37:
-        classes.extend(["Panel", "SparseSeries", "SparseDataFrame"])
+        classes.extend(["Panel", "SparseSeries", "SparseDataFrame", "SparseArray"])
         deprecated_modules.extend(["np", "datetime"])
 
     # these are already deprecated; awaiting removal

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
@@ -67,9 +67,11 @@ def test_to_coo(self):
     def test_to_dense(self):
         df = pd.DataFrame(
             {
-                "A": pd.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)),
-                "B": pd.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)),
-                "C": pd.SparseArray([1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)),
+                "A": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)),
+                "B": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)),
+                "C": pd.arrays.SparseArray(
+                    [1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)
+                ),
             },
             index=["b", "a"],
         )
@@ -82,8 +84,8 @@ def test_to_dense(self):
     def test_density(self):
         df = pd.DataFrame(
             {
-                "A": pd.SparseArray([1, 0, 2, 1], fill_value=0),
-                "B": pd.SparseArray([0, 1, 1, 1], fill_value=0),
+                "A": pd.arrays.SparseArray([1, 0, 2, 1], fill_value=0),
+                "B": pd.arrays.SparseArray([0, 1, 1, 1], fill_value=0),
             }
         )
         res = df.sparse.density
@@ -100,7 +102,7 @@ def test_series_from_coo(self, dtype, dense_index):
         result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
         index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
         expected = pd.Series(
-            pd.SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index
+            pd.arrays.SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index
         )
         if dense_index:
             expected = expected.reindex(pd.MultiIndex.from_product(index.levels))