DEPR: deprecate SparseArray.values (#26421)

pandas-dev · May 21, 2019 · d3a1912 · d3a1912
1 parent f5cc078
commit d3a1912
Show file tree

Hide file tree

Showing 11 changed files with 93 additions and 70 deletions.
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -260,8 +260,10 @@ Deprecations
 
 - The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
 - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
+- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
+  the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
 - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
-- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version.
+- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
 
 
 .. _whatsnew_0250.prior_deprecations:

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
@@ -15,7 +15,7 @@ from numpy cimport (ndarray,
 cnp.import_array()
 
 cimport pandas._libs.util as util
-from pandas._libs.lib import maybe_convert_objects
+from pandas._libs.lib import maybe_convert_objects, values_from_object
 
 
 cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
@@ -28,6 +28,14 @@ cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
     return np.empty(size, dtype='O')
 
 
+cdef bint _is_sparse_array(object obj):
+    # TODO can be removed one SparseArray.values is removed (GH26421)
+    if hasattr(obj, '_subtyp'):
+        if obj._subtyp == 'sparse_array':
+            return True
+    return False
+
+
 cdef class Reducer:
     """
     Performs generic reduction operation on a C or Fortran-contiguous ndarray
@@ -146,7 +154,8 @@ cdef class Reducer:
                 else:
                     res = self.f(chunk)
 
-                if hasattr(res, 'values') and util.is_array(res.values):
+                if (not _is_sparse_array(res) and hasattr(res, 'values')
+                        and util.is_array(res.values)):
                     res = res.values
                 if i == 0:
                     result = _get_result_array(res,
@@ -432,7 +441,8 @@ cdef class SeriesGrouper:
 cdef inline _extract_result(object res):
     """ extract the result object, it might be a 0-dim ndarray
         or a len-1 0-dim, or a scalar """
-    if hasattr(res, 'values') and util.is_array(res.values):
+    if (not _is_sparse_array(res) and hasattr(res, 'values')
+            and util.is_array(res.values)):
         res = res.values
     if not np.isscalar(res):
         if util.is_array(res):
@@ -635,8 +645,7 @@ def reduce(arr, f, axis=0, dummy=None, labels=None):
             raise Exception('Cannot use shortcut')
 
         # pass as an ndarray
-        if hasattr(labels, 'values'):
-            labels = labels.values
+        labels = values_from_object(labels)
 
     reducer = Reducer(arr, f, axis=axis, dummy=dummy, labels=labels)
     return reducer.get_result()
diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -210,17 +210,37 @@ static TypeContext *createTypeContext(void) {
     return pc;
 }
 
+
+static int is_sparse_array(PyObject *obj) {
+    // TODO can be removed again once SparseArray.values is removed (GH26421)
+    if (PyObject_HasAttrString(obj, "_subtyp")) {
+        PyObject *_subtype = PyObject_GetAttrString(obj, "_subtyp");
+        PyObject *sparse_array = PyUnicode_FromString("sparse_array");
+        int ret = PyUnicode_Compare(_subtype, sparse_array);
+
+        if (ret == 0) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+
 static PyObject *get_values(PyObject *obj) {
-    PyObject *values = PyObject_GetAttrString(obj, "values");
-    PRINTMARK();
+    PyObject *values = NULL;
+
+    if (!is_sparse_array(obj)) {
+        values = PyObject_GetAttrString(obj, "values");
+        PRINTMARK();
+    }
 
     if (values && !PyArray_CheckExact(values)) {
 
         if (PyObject_HasAttrString(values, "to_numpy")) {
             values = PyObject_CallMethod(values, "to_numpy", NULL);
         }
 
-        if (PyObject_HasAttrString(values, "values")) {
+        if (!is_sparse_array(values) && PyObject_HasAttrString(values, "values")) {
             PyObject *subvals = get_values(values);
             PyErr_Clear();
             PRINTMARK();

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -21,11 +21,10 @@
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import (
     astype_nansafe, construct_1d_arraylike_from_scalar, find_common_type,
-    infer_dtype_from_scalar, maybe_convert_platform)
+    infer_dtype_from_scalar)
 from pandas.core.dtypes.common import (
     is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal,
-    is_integer, is_list_like, is_object_dtype, is_scalar, is_string_dtype,
-    pandas_dtype)
+    is_integer, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype)
 from pandas.core.dtypes.dtypes import register_extension_dtype
 from pandas.core.dtypes.generic import (
     ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries)
@@ -890,7 +889,16 @@ def npoints(self):
     def values(self):
         """
         Dense values
+
+        .. deprecated:: 0.25.0
+
+            Use ``np.asarray(...)`` or the ``.to_dense()`` method instead.
         """
+        msg = (
+            "The SparseArray.values attribute is deprecated and will be "
+            "removed in a future version. You can use `np.asarray(...)` or "
+            "the `.to_dense()` method instead.")
+        warnings.warn(msg, FutureWarning, stacklevel=2)
         return self.to_dense()
 
     def isna(self):
@@ -1076,7 +1084,7 @@ def __getitem__(self, key):
         if is_integer(key):
             return self._get_val_at(key)
         elif isinstance(key, tuple):
-            data_slice = self.values[key]
+            data_slice = self.to_dense()[key]
         elif isinstance(key, slice):
             # special case to preserve dtypes
             if key == slice(None):
@@ -1635,7 +1643,7 @@ def __array_wrap__(self, array, context=None):
         from pandas.core.dtypes.generic import ABCSparseSeries
 
         ufunc, inputs, _ = context
-        inputs = tuple(x.values if isinstance(x, ABCSparseSeries) else x
+        inputs = tuple(x.to_dense() if isinstance(x, ABCSparseSeries) else x
                        for x in inputs)
         return self.__array_ufunc__(ufunc, '__call__', *inputs)
 
@@ -1854,37 +1862,10 @@ def _maybe_to_sparse(array):
     array must be SparseSeries or SparseArray
     """
     if isinstance(array, ABCSparseSeries):
-        array = array.values.copy()
+        array = array.array.copy()
     return array
 
 
-def _sanitize_values(arr):
-    """
-    return an ndarray for our input,
-    in a platform independent manner
-    """
-
-    if hasattr(arr, 'values'):
-        arr = arr.values
-    else:
-
-        # scalar
-        if is_scalar(arr):
-            arr = [arr]
-
-        # ndarray
-        if isinstance(arr, np.ndarray):
-            pass
-
-        elif is_list_like(arr) and len(arr) > 0:
-            arr = maybe_convert_platform(arr)
-
-        else:
-            arr = np.asarray(arr)
-
-    return arr
-
-
 def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
     """
     Convert ndarray to sparse format
@@ -1902,7 +1883,7 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
     (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar)
     """
 
-    arr = _sanitize_values(arr)
+    arr = com.values_from_object(arr)
 
     if arr.ndim > 1:
         raise TypeError("expected dimension <= 1 data")

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -375,8 +375,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
         # with a .values attribute.
         aligned_args = {k: kwargs[k]
                         for k in align_keys
-                        if hasattr(kwargs[k], 'values') and
-                        not isinstance(kwargs[k], ABCExtensionArray)}
+                        if not isinstance(kwargs[k], ABCExtensionArray) and
+                        hasattr(kwargs[k], 'values')}
 
         for b in self.blocks:
             if filter is not None:

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -2272,10 +2272,10 @@ def _cast_sparse_series_op(left, right, opname):
     # TODO: This should be moved to the array?
     if is_integer_dtype(left) and is_integer_dtype(right):
         # series coerces to float64 if result should have NaN/inf
-        if opname in ('floordiv', 'mod') and (right.values == 0).any():
+        if opname in ('floordiv', 'mod') and (right.to_dense() == 0).any():
             left = left.astype(SparseDtype(np.float64, left.fill_value))
             right = right.astype(SparseDtype(np.float64, right.fill_value))
-        elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any():
+        elif opname in ('rfloordiv', 'rmod') and (left.to_dense() == 0).any():
             left = left.astype(SparseDtype(np.float64, left.fill_value))
             right = right.astype(SparseDtype(np.float64, right.fill_value))
 

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -627,7 +627,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
             # .take returns SparseArray
             new = values.take(indexer)
             if need_mask:
-                new = new.values
+                new = new.to_dense()
                 # convert integer to float if necessary. need to do a lot
                 # more than that, handle boolean etc also
                 new, fill_value = maybe_upcast(new, fill_value=fill_value)

diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py
@@ -433,9 +433,9 @@ def test_constructor_bool(self):
         tm.assert_numpy_array_equal(arr.sp_index.indices,
                                     np.array([2, 3], np.int32))
 
-        for dense in [arr.to_dense(), arr.values]:
-            assert dense.dtype == bool
-            tm.assert_numpy_array_equal(dense, data)
+        dense = arr.to_dense()
+        assert dense.dtype == bool
+        tm.assert_numpy_array_equal(dense, data)
 
     def test_constructor_bool_fill_value(self):
         arr = SparseArray([True, False, True], dtype=None)
@@ -463,9 +463,9 @@ def test_constructor_float32(self):
         tm.assert_numpy_array_equal(arr.sp_index.indices,
                                     np.array([0, 2], dtype=np.int32))
 
-        for dense in [arr.to_dense(), arr.values]:
-            assert dense.dtype == np.float32
-            tm.assert_numpy_array_equal(dense, data)
+        dense = arr.to_dense()
+        assert dense.dtype == np.float32
+        tm.assert_numpy_array_equal(dense, data)
 
     def test_astype(self):
         # float -> float
@@ -514,7 +514,7 @@ def test_astype_all(self, any_real_dtype):
         assert res.dtype == SparseDtype(typ, 1)
         assert res.sp_values.dtype == typ
 
-        tm.assert_numpy_array_equal(np.asarray(res.values),
+        tm.assert_numpy_array_equal(np.asarray(res.to_dense()),
                                     vals.astype(typ))
 
     @pytest.mark.parametrize('array, dtype, expected', [
@@ -596,7 +596,6 @@ def test_copy_shallow(self):
         assert arr2.sp_index is self.arr.sp_index
 
     def test_values_asarray(self):
-        assert_almost_equal(self.arr.values, self.arr_data)
         assert_almost_equal(self.arr.to_dense(), self.arr_data)
 
     @pytest.mark.parametrize('data,shape,dtype', [
@@ -627,7 +626,7 @@ def test_dense_repr(self, vals, fill_value, method):
 
     def test_getitem(self):
         def _checkit(i):
-            assert_almost_equal(self.arr[i], self.arr.values[i])
+            assert_almost_equal(self.arr[i], self.arr.to_dense()[i])
 
         for i in range(len(self.arr)):
             _checkit(i)
@@ -641,11 +640,11 @@ def test_getitem_arraylike_mask(self):
 
     def test_getslice(self):
         result = self.arr[:-3]
-        exp = SparseArray(self.arr.values[:-3])
+        exp = SparseArray(self.arr.to_dense()[:-3])
         tm.assert_sp_array_equal(result, exp)
 
         result = self.arr[-4:]
-        exp = SparseArray(self.arr.values[-4:])
+        exp = SparseArray(self.arr.to_dense()[-4:])
         tm.assert_sp_array_equal(result, exp)
 
         # two corner cases from Series
@@ -654,7 +653,7 @@ def test_getslice(self):
         tm.assert_sp_array_equal(result, exp)
 
         result = self.arr[:-12]
-        exp = SparseArray(self.arr.values[:0])
+        exp = SparseArray(self.arr.to_dense()[:0])
         tm.assert_sp_array_equal(result, exp)
 
     def test_getslice_tuple(self):
@@ -702,16 +701,16 @@ def test_binary_operators(self, op):
 
         def _check_op(op, first, second):
             res = op(first, second)
-            exp = SparseArray(op(first.values, second.values),
+            exp = SparseArray(op(first.to_dense(), second.to_dense()),
                               fill_value=first.fill_value)
             assert isinstance(res, SparseArray)
-            assert_almost_equal(res.values, exp.values)
+            assert_almost_equal(res.to_dense(), exp.to_dense())
 
-            res2 = op(first, second.values)
+            res2 = op(first, second.to_dense())
             assert isinstance(res2, SparseArray)
             tm.assert_sp_array_equal(res, res2)
 
-            res3 = op(first.values, second)
+            res3 = op(first.to_dense(), second)
             assert isinstance(res3, SparseArray)
             tm.assert_sp_array_equal(res, res3)
 
@@ -720,13 +719,13 @@ def _check_op(op, first, second):
 
             # Ignore this if the actual op raises (e.g. pow).
             try:
-                exp = op(first.values, 4)
+                exp = op(first.to_dense(), 4)
                 exp_fv = op(first.fill_value, 4)
             except ValueError:
                 pass
             else:
                 assert_almost_equal(res4.fill_value, exp_fv)
-                assert_almost_equal(res4.values, exp)
+                assert_almost_equal(res4.to_dense(), exp)
 
         with np.errstate(all="ignore"):
             for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]:
@@ -1230,3 +1229,12 @@ def test_map_missing():
 
     result = arr.map({0: 10, 1: 11})
     tm.assert_sp_array_equal(result, expected)
+
+
+def test_deprecated_values():
+    arr = SparseArray([0, 1, 2])
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = arr.values
+
+    tm.assert_numpy_array_equal(result, arr.to_dense())
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
@@ -255,7 +255,7 @@ def test_constructor(self):
         assert isinstance(self.iseries.sp_index, IntIndex)
 
         assert self.zbseries.fill_value == 0
-        tm.assert_numpy_array_equal(self.zbseries.values.values,
+        tm.assert_numpy_array_equal(self.zbseries.values.to_dense(),
                                     self.bseries.to_dense().fillna(0).values)
 
         # pass SparseSeries
@@ -322,7 +322,7 @@ def test_constructor_ndarray(self):
     def test_constructor_nonnan(self):
         arr = [0, 0, 0, nan, nan]
         sp_series = SparseSeries(arr, fill_value=0)
-        tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr))
+        tm.assert_numpy_array_equal(sp_series.values.to_dense(), np.array(arr))
         assert len(sp_series) == 5
         assert sp_series.shape == (5, )
 
@@ -514,7 +514,7 @@ def _compare(idx):
                 sparse_result = sp.take(idx)
                 assert isinstance(sparse_result, SparseSeries)
                 tm.assert_almost_equal(dense_result,
-                                       sparse_result.values.values)
+                                       sparse_result.values.to_dense())
 
             _compare([1., 2., 3., 4., 5., 0.])
             _compare([7, 2, 9, 0, 4])

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -1403,7 +1403,7 @@ def assert_sp_array_equal(left, right, check_dtype=True, check_kind=True,
         assert_attr_equal('fill_value', left, right)
     if check_dtype:
         assert_attr_equal('dtype', left, right)
-    assert_numpy_array_equal(left.values, right.values,
+    assert_numpy_array_equal(left.to_dense(), right.to_dense(),
                              check_dtype=check_dtype)