Skip to content

Commit

Permalink
DEPR: deprecate SparseArray.values (#26421)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored May 21, 2019
1 parent f5cc078 commit d3a1912
Show file tree
Hide file tree
Showing 11 changed files with 93 additions and 70 deletions.
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,10 @@ Deprecations

- The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version.
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).


.. _whatsnew_0250.prior_deprecations:
Expand Down
19 changes: 14 additions & 5 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ from numpy cimport (ndarray,
cnp.import_array()

cimport pandas._libs.util as util
from pandas._libs.lib import maybe_convert_objects
from pandas._libs.lib import maybe_convert_objects, values_from_object


cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
Expand All @@ -28,6 +28,14 @@ cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
return np.empty(size, dtype='O')


cdef bint _is_sparse_array(object obj):
# TODO can be removed one SparseArray.values is removed (GH26421)
if hasattr(obj, '_subtyp'):
if obj._subtyp == 'sparse_array':
return True
return False


cdef class Reducer:
"""
Performs generic reduction operation on a C or Fortran-contiguous ndarray
Expand Down Expand Up @@ -146,7 +154,8 @@ cdef class Reducer:
else:
res = self.f(chunk)

if hasattr(res, 'values') and util.is_array(res.values):
if (not _is_sparse_array(res) and hasattr(res, 'values')
and util.is_array(res.values)):
res = res.values
if i == 0:
result = _get_result_array(res,
Expand Down Expand Up @@ -432,7 +441,8 @@ cdef class SeriesGrouper:
cdef inline _extract_result(object res):
""" extract the result object, it might be a 0-dim ndarray
or a len-1 0-dim, or a scalar """
if hasattr(res, 'values') and util.is_array(res.values):
if (not _is_sparse_array(res) and hasattr(res, 'values')
and util.is_array(res.values)):
res = res.values
if not np.isscalar(res):
if util.is_array(res):
Expand Down Expand Up @@ -635,8 +645,7 @@ def reduce(arr, f, axis=0, dummy=None, labels=None):
raise Exception('Cannot use shortcut')

# pass as an ndarray
if hasattr(labels, 'values'):
labels = labels.values
labels = values_from_object(labels)

reducer = Reducer(arr, f, axis=axis, dummy=dummy, labels=labels)
return reducer.get_result()
26 changes: 23 additions & 3 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,37 @@ static TypeContext *createTypeContext(void) {
return pc;
}


static int is_sparse_array(PyObject *obj) {
// TODO can be removed again once SparseArray.values is removed (GH26421)
if (PyObject_HasAttrString(obj, "_subtyp")) {
PyObject *_subtype = PyObject_GetAttrString(obj, "_subtyp");
PyObject *sparse_array = PyUnicode_FromString("sparse_array");
int ret = PyUnicode_Compare(_subtype, sparse_array);

if (ret == 0) {
return 1;
}
}
return 0;
}


static PyObject *get_values(PyObject *obj) {
PyObject *values = PyObject_GetAttrString(obj, "values");
PRINTMARK();
PyObject *values = NULL;

if (!is_sparse_array(obj)) {
values = PyObject_GetAttrString(obj, "values");
PRINTMARK();
}

if (values && !PyArray_CheckExact(values)) {

if (PyObject_HasAttrString(values, "to_numpy")) {
values = PyObject_CallMethod(values, "to_numpy", NULL);
}

if (PyObject_HasAttrString(values, "values")) {
if (!is_sparse_array(values) && PyObject_HasAttrString(values, "values")) {
PyObject *subvals = get_values(values);
PyErr_Clear();
PRINTMARK();
Expand Down
49 changes: 15 additions & 34 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.cast import (
astype_nansafe, construct_1d_arraylike_from_scalar, find_common_type,
infer_dtype_from_scalar, maybe_convert_platform)
infer_dtype_from_scalar)
from pandas.core.dtypes.common import (
is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal,
is_integer, is_list_like, is_object_dtype, is_scalar, is_string_dtype,
pandas_dtype)
is_integer, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import register_extension_dtype
from pandas.core.dtypes.generic import (
ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries)
Expand Down Expand Up @@ -890,7 +889,16 @@ def npoints(self):
def values(self):
"""
Dense values
.. deprecated:: 0.25.0
Use ``np.asarray(...)`` or the ``.to_dense()`` method instead.
"""
msg = (
"The SparseArray.values attribute is deprecated and will be "
"removed in a future version. You can use `np.asarray(...)` or "
"the `.to_dense()` method instead.")
warnings.warn(msg, FutureWarning, stacklevel=2)
return self.to_dense()

def isna(self):
Expand Down Expand Up @@ -1076,7 +1084,7 @@ def __getitem__(self, key):
if is_integer(key):
return self._get_val_at(key)
elif isinstance(key, tuple):
data_slice = self.values[key]
data_slice = self.to_dense()[key]
elif isinstance(key, slice):
# special case to preserve dtypes
if key == slice(None):
Expand Down Expand Up @@ -1635,7 +1643,7 @@ def __array_wrap__(self, array, context=None):
from pandas.core.dtypes.generic import ABCSparseSeries

ufunc, inputs, _ = context
inputs = tuple(x.values if isinstance(x, ABCSparseSeries) else x
inputs = tuple(x.to_dense() if isinstance(x, ABCSparseSeries) else x
for x in inputs)
return self.__array_ufunc__(ufunc, '__call__', *inputs)

Expand Down Expand Up @@ -1854,37 +1862,10 @@ def _maybe_to_sparse(array):
array must be SparseSeries or SparseArray
"""
if isinstance(array, ABCSparseSeries):
array = array.values.copy()
array = array.array.copy()
return array


def _sanitize_values(arr):
"""
return an ndarray for our input,
in a platform independent manner
"""

if hasattr(arr, 'values'):
arr = arr.values
else:

# scalar
if is_scalar(arr):
arr = [arr]

# ndarray
if isinstance(arr, np.ndarray):
pass

elif is_list_like(arr) and len(arr) > 0:
arr = maybe_convert_platform(arr)

else:
arr = np.asarray(arr)

return arr


def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
"""
Convert ndarray to sparse format
Expand All @@ -1902,7 +1883,7 @@ def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False):
(sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar)
"""

arr = _sanitize_values(arr)
arr = com.values_from_object(arr)

if arr.ndim > 1:
raise TypeError("expected dimension <= 1 data")
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
# with a .values attribute.
aligned_args = {k: kwargs[k]
for k in align_keys
if hasattr(kwargs[k], 'values') and
not isinstance(kwargs[k], ABCExtensionArray)}
if not isinstance(kwargs[k], ABCExtensionArray) and
hasattr(kwargs[k], 'values')}

for b in self.blocks:
if filter is not None:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2272,10 +2272,10 @@ def _cast_sparse_series_op(left, right, opname):
# TODO: This should be moved to the array?
if is_integer_dtype(left) and is_integer_dtype(right):
# series coerces to float64 if result should have NaN/inf
if opname in ('floordiv', 'mod') and (right.values == 0).any():
if opname in ('floordiv', 'mod') and (right.to_dense() == 0).any():
left = left.astype(SparseDtype(np.float64, left.fill_value))
right = right.astype(SparseDtype(np.float64, right.fill_value))
elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any():
elif opname in ('rfloordiv', 'rmod') and (left.to_dense() == 0).any():
left = left.astype(SparseDtype(np.float64, left.fill_value))
right = right.astype(SparseDtype(np.float64, right.fill_value))

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
# .take returns SparseArray
new = values.take(indexer)
if need_mask:
new = new.values
new = new.to_dense()
# convert integer to float if necessary. need to do a lot
# more than that, handle boolean etc also
new, fill_value = maybe_upcast(new, fill_value=fill_value)
Expand Down
44 changes: 26 additions & 18 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,9 +433,9 @@ def test_constructor_bool(self):
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([2, 3], np.int32))

for dense in [arr.to_dense(), arr.values]:
assert dense.dtype == bool
tm.assert_numpy_array_equal(dense, data)
dense = arr.to_dense()
assert dense.dtype == bool
tm.assert_numpy_array_equal(dense, data)

def test_constructor_bool_fill_value(self):
arr = SparseArray([True, False, True], dtype=None)
Expand Down Expand Up @@ -463,9 +463,9 @@ def test_constructor_float32(self):
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([0, 2], dtype=np.int32))

for dense in [arr.to_dense(), arr.values]:
assert dense.dtype == np.float32
tm.assert_numpy_array_equal(dense, data)
dense = arr.to_dense()
assert dense.dtype == np.float32
tm.assert_numpy_array_equal(dense, data)

def test_astype(self):
# float -> float
Expand Down Expand Up @@ -514,7 +514,7 @@ def test_astype_all(self, any_real_dtype):
assert res.dtype == SparseDtype(typ, 1)
assert res.sp_values.dtype == typ

tm.assert_numpy_array_equal(np.asarray(res.values),
tm.assert_numpy_array_equal(np.asarray(res.to_dense()),
vals.astype(typ))

@pytest.mark.parametrize('array, dtype, expected', [
Expand Down Expand Up @@ -596,7 +596,6 @@ def test_copy_shallow(self):
assert arr2.sp_index is self.arr.sp_index

def test_values_asarray(self):
assert_almost_equal(self.arr.values, self.arr_data)
assert_almost_equal(self.arr.to_dense(), self.arr_data)

@pytest.mark.parametrize('data,shape,dtype', [
Expand Down Expand Up @@ -627,7 +626,7 @@ def test_dense_repr(self, vals, fill_value, method):

def test_getitem(self):
def _checkit(i):
assert_almost_equal(self.arr[i], self.arr.values[i])
assert_almost_equal(self.arr[i], self.arr.to_dense()[i])

for i in range(len(self.arr)):
_checkit(i)
Expand All @@ -641,11 +640,11 @@ def test_getitem_arraylike_mask(self):

def test_getslice(self):
result = self.arr[:-3]
exp = SparseArray(self.arr.values[:-3])
exp = SparseArray(self.arr.to_dense()[:-3])
tm.assert_sp_array_equal(result, exp)

result = self.arr[-4:]
exp = SparseArray(self.arr.values[-4:])
exp = SparseArray(self.arr.to_dense()[-4:])
tm.assert_sp_array_equal(result, exp)

# two corner cases from Series
Expand All @@ -654,7 +653,7 @@ def test_getslice(self):
tm.assert_sp_array_equal(result, exp)

result = self.arr[:-12]
exp = SparseArray(self.arr.values[:0])
exp = SparseArray(self.arr.to_dense()[:0])
tm.assert_sp_array_equal(result, exp)

def test_getslice_tuple(self):
Expand Down Expand Up @@ -702,16 +701,16 @@ def test_binary_operators(self, op):

def _check_op(op, first, second):
res = op(first, second)
exp = SparseArray(op(first.values, second.values),
exp = SparseArray(op(first.to_dense(), second.to_dense()),
fill_value=first.fill_value)
assert isinstance(res, SparseArray)
assert_almost_equal(res.values, exp.values)
assert_almost_equal(res.to_dense(), exp.to_dense())

res2 = op(first, second.values)
res2 = op(first, second.to_dense())
assert isinstance(res2, SparseArray)
tm.assert_sp_array_equal(res, res2)

res3 = op(first.values, second)
res3 = op(first.to_dense(), second)
assert isinstance(res3, SparseArray)
tm.assert_sp_array_equal(res, res3)

Expand All @@ -720,13 +719,13 @@ def _check_op(op, first, second):

# Ignore this if the actual op raises (e.g. pow).
try:
exp = op(first.values, 4)
exp = op(first.to_dense(), 4)
exp_fv = op(first.fill_value, 4)
except ValueError:
pass
else:
assert_almost_equal(res4.fill_value, exp_fv)
assert_almost_equal(res4.values, exp)
assert_almost_equal(res4.to_dense(), exp)

with np.errstate(all="ignore"):
for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]:
Expand Down Expand Up @@ -1230,3 +1229,12 @@ def test_map_missing():

result = arr.map({0: 10, 1: 11})
tm.assert_sp_array_equal(result, expected)


def test_deprecated_values():
arr = SparseArray([0, 1, 2])

with tm.assert_produces_warning(FutureWarning):
result = arr.values

tm.assert_numpy_array_equal(result, arr.to_dense())
6 changes: 3 additions & 3 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def test_constructor(self):
assert isinstance(self.iseries.sp_index, IntIndex)

assert self.zbseries.fill_value == 0
tm.assert_numpy_array_equal(self.zbseries.values.values,
tm.assert_numpy_array_equal(self.zbseries.values.to_dense(),
self.bseries.to_dense().fillna(0).values)

# pass SparseSeries
Expand Down Expand Up @@ -322,7 +322,7 @@ def test_constructor_ndarray(self):
def test_constructor_nonnan(self):
arr = [0, 0, 0, nan, nan]
sp_series = SparseSeries(arr, fill_value=0)
tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr))
tm.assert_numpy_array_equal(sp_series.values.to_dense(), np.array(arr))
assert len(sp_series) == 5
assert sp_series.shape == (5, )

Expand Down Expand Up @@ -514,7 +514,7 @@ def _compare(idx):
sparse_result = sp.take(idx)
assert isinstance(sparse_result, SparseSeries)
tm.assert_almost_equal(dense_result,
sparse_result.values.values)
sparse_result.values.to_dense())

_compare([1., 2., 3., 4., 5., 0.])
_compare([7, 2, 9, 0, 4])
Expand Down
2 changes: 1 addition & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,7 @@ def assert_sp_array_equal(left, right, check_dtype=True, check_kind=True,
assert_attr_equal('fill_value', left, right)
if check_dtype:
assert_attr_equal('dtype', left, right)
assert_numpy_array_equal(left.values, right.values,
assert_numpy_array_equal(left.to_dense(), right.to_dense(),
check_dtype=check_dtype)


Expand Down
Loading

0 comments on commit d3a1912

Please sign in to comment.