diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4a3122a78b234..fabe86f6a86d6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -490,6 +490,7 @@ Deprecations ~~~~~~~~~~~~ - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). +- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`) - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a12e611f6618a..5d439f88bca15 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2034,7 +2034,7 @@ def _ixs(self, i, axis=0): return self.loc[:, lab_slice] else: if isinstance(label, Index): - return self.take(i, axis=1, convert=True) + return self._take(i, axis=1, convert=True) index_len = len(self.index) @@ -2116,10 +2116,10 @@ def _getitem_array(self, key): # be reindexed to match DataFrame rows key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] - return self.take(indexer, axis=0, convert=False) + return self._take(indexer, axis=0, convert=False) else: indexer = self.loc._convert_to_indexer(key, axis=1) - return self.take(indexer, axis=1, convert=True) + return self._take(indexer, axis=1, convert=True) def _getitem_multilevel(self, key): loc = self.columns.get_loc(key) @@ -3355,7 +3355,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: raise TypeError('must specify how or thresh') - result = self.take(mask.nonzero()[0], axis=axis, convert=False) + result = self._take(mask.nonzero()[0], axis=axis, convert=False) if inplace: self._update_inplace(result) @@ -3486,7 +3486,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, new_data = self._data.take(indexer, axis=self._get_block_manager_axis(axis), - convert=False, verify=False) + verify=False) if inplace: return self._update_inplace(new_data) @@ -3547,7 +3547,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, baxis = self._get_block_manager_axis(axis) new_data = self._data.take(indexer, axis=baxis, - convert=False, verify=False) + verify=False) # reconstruct axis if needed new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2fb0e348c01c0..99c58b246deb7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -38,6 +38,7 @@ from pandas.core.index import (Index, MultiIndex, _ensure_index, InvalidIndexError) import pandas.core.indexing as indexing +from pandas.core.indexing import maybe_convert_indices from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.period import PeriodIndex, Period from pandas.core.internals import BlockManager @@ -1822,7 +1823,8 @@ def _iget_item_cache(self, item): if ax.is_unique: lower = self._get_item_cache(ax[item]) else: - lower = self.take(item, axis=self._info_axis_number, convert=True) + lower = self._take(item, axis=self._info_axis_number, + convert=True) return lower def _box_item_values(self, key, values): @@ -2057,8 +2059,63 @@ def __delitem__(self, key): except KeyError: pass - def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): + _shared_docs['_take'] = """ + Return the elements in the given *positional* indices along an axis. + + This means that we are not indexing according to actual values in + the index attribute of the object. We are indexing according to the + actual position of the element in the object. + + This is the internal version of ``.take()`` and will contain a wider + selection of parameters useful for internal use but not as suitable + for public usage. + + Parameters + ---------- + indices : array-like + An array of ints indicating which positions to take. + axis : int, default 0 + The axis on which to select elements. "0" means that we are + selecting rows, "1" means that we are selecting columns, etc. + convert : bool, default True + Whether to convert negative indices into positive ones. + For example, ``-1`` would map to the ``len(axis) - 1``. + The conversions are similar to the behavior of indexing a + regular Python list. + is_copy : bool, default True + Whether to return a copy of the original object or not. + + Returns + ------- + taken : type of caller + An array-like containing the elements taken from the object. + + See Also + -------- + numpy.ndarray.take + numpy.take """ + + @Appender(_shared_docs['_take']) + def _take(self, indices, axis=0, convert=True, is_copy=True): + self._consolidate_inplace() + + if convert: + indices = maybe_convert_indices(indices, len(self._get_axis(axis))) + + new_data = self._data.take(indices, + axis=self._get_block_manager_axis(axis), + verify=True) + result = self._constructor(new_data).__finalize__(self) + + # Maybe set copy if we didn't actually change the index. + if is_copy: + if not result._get_axis(axis).equals(self._get_axis(axis)): + result._set_is_copy(self) + + return result + + _shared_docs['take'] = """ Return the elements in the given *positional* indices along an axis. This means that we are not indexing according to actual values in @@ -2073,9 +2130,12 @@ def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): The axis on which to select elements. "0" means that we are selecting rows, "1" means that we are selecting columns, etc. convert : bool, default True - Whether to convert negative indices to positive ones, just as with - indexing into Python lists. For example, if `-1` was passed in, - this index would be converted ``n - 1``. + .. deprecated:: 0.21.0 + + Whether to convert negative indices into positive ones. + For example, ``-1`` would map to the ``len(axis) - 1``. + The conversions are similar to the behavior of indexing a + regular Python list. is_copy : bool, default True Whether to return a copy of the original object or not. @@ -2131,19 +2191,17 @@ class max_speed numpy.ndarray.take numpy.take """ + + @Appender(_shared_docs['take']) + def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): nv.validate_take(tuple(), kwargs) - self._consolidate_inplace() - new_data = self._data.take(indices, - axis=self._get_block_manager_axis(axis), - convert=True, verify=True) - result = self._constructor(new_data).__finalize__(self) - # maybe set copy if we didn't actually change the index - if is_copy: - if not result._get_axis(axis).equals(self._get_axis(axis)): - result._set_is_copy(self) + if not convert: + msg = ("The 'convert' parameter is deprecated " + "and will be removed in a future version.") + warnings.warn(msg, FutureWarning, stacklevel=2) - return result + return self._take(indices, axis=axis, convert=convert, is_copy=is_copy) def xs(self, key, axis=0, level=None, drop_level=True): """ @@ -2244,9 +2302,9 @@ def xs(self, key, axis=0, level=None, drop_level=True): if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: inds, = loc.nonzero() - return self.take(inds, axis=axis, convert=False) + return self._take(inds, axis=axis, convert=False) else: - return self.take(loc, axis=axis, convert=True) + return self._take(loc, axis=axis, convert=True) if not is_scalar(loc): new_index = self.index[loc] @@ -5112,7 +5170,7 @@ def at_time(self, time, asof=False): """ try: indexer = self.index.indexer_at_time(time, asof=asof) - return self.take(indexer, convert=False) + return self._take(indexer, convert=False) except AttributeError: raise TypeError('Index must be DatetimeIndex') @@ -5136,7 +5194,7 @@ def between_time(self, start_time, end_time, include_start=True, indexer = self.index.indexer_between_time( start_time, end_time, include_start=include_start, include_end=include_end) - return self.take(indexer, convert=False) + return self._take(indexer, convert=False) except AttributeError: raise TypeError('Index must be DatetimeIndex') diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a62ae40a85941..c9edf52d992e7 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -319,8 +319,8 @@ def _set_grouper(self, obj, sort=False): # use stable sort to support first, last, nth indexer = self.indexer = ax.argsort(kind='mergesort') ax = ax.take(indexer) - obj = obj.take(indexer, axis=self.axis, - convert=False, is_copy=False) + obj = obj._take(indexer, axis=self.axis, + convert=False, is_copy=False) self.obj = obj self.grouper = ax @@ -643,7 +643,7 @@ def get_group(self, name, obj=None): if not len(inds): raise KeyError(name) - return obj.take(inds, axis=self.axis, convert=False) + return obj._take(inds, axis=self.axis, convert=False) def __iter__(self): """ @@ -2202,7 +2202,7 @@ def _aggregate_series_fast(self, obj, func): # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)).to_dense() indexer = get_group_index_sorter(group_index, ngroups) - obj = obj.take(indexer, convert=False).to_dense() + obj = obj._take(indexer, convert=False).to_dense() group_index = algorithms.take_nd( group_index, indexer, allow_fill=False) grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, @@ -4435,7 +4435,7 @@ def __iter__(self): yield i, self._chop(sdata, slice(start, end)) def _get_sorted_data(self): - return self.data.take(self.sort_idx, axis=self.axis, convert=False) + return self.data._take(self.sort_idx, axis=self.axis, convert=False) def _chop(self, sdata, slice_obj): return sdata.iloc[slice_obj] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b7a51afcedabf..6ea1a4d40d9eb 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1093,7 +1093,7 @@ def _getitem_iterable(self, key, axis=0): if is_bool_indexer(key): key = check_bool_indexer(labels, key) inds, = key.nonzero() - return self.obj.take(inds, axis=axis, convert=False) + return self.obj._take(inds, axis=axis, convert=False) else: # Have the index compute an indexer or return None # if it cannot handle; we only act on all found values @@ -1126,15 +1126,15 @@ def _getitem_iterable(self, key, axis=0): keyarr) if new_indexer is not None: - result = self.obj.take(indexer[indexer != -1], axis=axis, - convert=False) + result = self.obj._take(indexer[indexer != -1], axis=axis, + convert=False) result = result._reindex_with_indexers( {axis: [new_target, new_indexer]}, copy=True, allow_dups=True) else: - result = self.obj.take(indexer, axis=axis, convert=False) + result = self.obj.take(indexer, axis=axis) return result @@ -1265,7 +1265,7 @@ def _get_slice_axis(self, slice_obj, axis=0): if isinstance(indexer, slice): return self._slice(indexer, axis=axis, kind='iloc') else: - return self.obj.take(indexer, axis=axis, convert=False) + return self.obj.take(indexer, axis=axis) class _IXIndexer(_NDFrameIndexer): @@ -1350,7 +1350,7 @@ def _getbool_axis(self, key, axis=0): key = check_bool_indexer(labels, key) inds, = key.nonzero() try: - return self.obj.take(inds, axis=axis, convert=False) + return self.obj.take(inds, axis=axis) except Exception as detail: raise self._exception(detail) @@ -1367,7 +1367,7 @@ def _get_slice_axis(self, slice_obj, axis=0): if isinstance(indexer, slice): return self._slice(indexer, axis=axis, kind='iloc') else: - return self.obj.take(indexer, axis=axis, convert=False) + return self.obj.take(indexer, axis=axis) class _LocIndexer(_LocationIndexer): @@ -1707,7 +1707,7 @@ def _get_slice_axis(self, slice_obj, axis=0): if isinstance(slice_obj, slice): return self._slice(slice_obj, axis=axis, kind='iloc') else: - return self.obj.take(slice_obj, axis=axis, convert=False) + return self.obj.take(slice_obj, axis=axis) def _get_list_axis(self, key, axis=0): """ @@ -1723,7 +1723,7 @@ def _get_list_axis(self, key, axis=0): Series object """ try: - return self.obj.take(key, axis=axis, convert=False) + return self.obj.take(key, axis=axis) except IndexError: # re-raise with different error message raise IndexError("positional indexers are out-of-bounds") diff --git a/pandas/core/series.py b/pandas/core/series.py index a05324142b223..97f39a680c8c9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2563,35 +2563,24 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v - def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): - """ - return Series corresponding to requested indices - - Parameters - ---------- - indices : list / array of ints - convert : translate negative to positive indices (default) - - Returns - ------- - taken : Series - - See also - -------- - numpy.ndarray.take - """ - if kwargs: - nv.validate_take(tuple(), kwargs) - - # check/convert indicies here + @Appender(generic._shared_docs['_take']) + def _take(self, indices, axis=0, convert=True, is_copy=False): if convert: indices = maybe_convert_indices(indices, len(self._get_axis(axis))) indices = _ensure_platform_int(indices) new_index = self.index.take(indices) new_values = self._values.take(indices) - return (self._constructor(new_values, index=new_index, fastpath=True) - .__finalize__(self)) + + result = (self._constructor(new_values, index=new_index, + fastpath=True).__finalize__(self)) + + # Maybe set copy if we didn't actually change the index. + if is_copy: + if not result._get_axis(axis).equals(self._get_axis(axis)): + result._set_is_copy(self) + + return result def isin(self, values): """ diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 2aecb9d7c4ffb..5166dc927989e 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -602,16 +602,15 @@ def sparse_reindex(self, new_index): sparse_index=new_index, fill_value=self.fill_value).__finalize__(self) + @Appender(generic._shared_docs['take']) def take(self, indices, axis=0, convert=True, *args, **kwargs): - """ - Sparse-compatible version of ndarray.take + convert = nv.validate_take_with_convert(convert, args, kwargs) - Returns - ------- - taken : ndarray - """ + if not convert: + msg = ("The 'convert' parameter is deprecated " + "and will be removed in a future version.") + warnings.warn(msg, FutureWarning, stacklevel=2) - convert = nv.validate_take_with_convert(convert, args, kwargs) new_values = SparseArray.take(self.values, indices) new_index = self.index.take(indices) return self._constructor(new_values, diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index fb9b8c2ed7aff..219c1df301c4b 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -822,7 +822,7 @@ def test_take(self): expected = df.loc[:, ['D', 'B', 'C', 'A']] assert_frame_equal(result, expected, check_names=False) - # neg indicies + # negative indices order = [2, 1, -1] for df in [self.frame]: @@ -830,6 +830,10 @@ def test_take(self): expected = df.reindex(df.index.take(order)) assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): + result = df.take(order, convert=False, axis=0) + assert_frame_equal(result, expected) + # axis = 1 result = df.take(order, axis=1) expected = df.loc[:, ['C', 'B', 'D']] @@ -854,7 +858,7 @@ def test_take(self): expected = df.loc[:, ['foo', 'B', 'C', 'A', 'D']] assert_frame_equal(result, expected) - # neg indicies + # negative indices order = [4, 1, -2] for df in [self.mixed_frame]: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3e863a59df67e..17316a714e260 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -581,11 +581,11 @@ def gen_test(l, l2): def gen_expected(df, mask): l = len(mask) - return pd.concat([df.take([0], convert=False), + return pd.concat([df.take([0]), DataFrame(np.ones((l, len(columns))), index=[0] * l, columns=columns), - df.take(mask[1:], convert=False)]) + df.take(mask[1:])]) df = gen_test(900, 100) assert not df.index.is_unique diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 2182e3fbfc212..2a49a747893eb 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1066,6 +1066,23 @@ def test_setitem_with_tz_dst(self): s.iloc[[1, 2]] = vals tm.assert_series_equal(s, exp) + def test_take(self): + s = Series([-1, 5, 6, 2, 4]) + + actual = s.take([1, 3, 4]) + expected = Series([5, 2, 4], index=[1, 3, 4]) + tm.assert_series_equal(actual, expected) + + actual = s.take([-1, 3, 4]) + expected = Series([4, 2, 4], index=[4, 3, 4]) + tm.assert_series_equal(actual, expected) + + pytest.raises(IndexError, s.take, [1, 10]) + pytest.raises(IndexError, s.take, [2, 5]) + + with tm.assert_produces_warning(FutureWarning): + s.take([-1, 3, 4], convert=False) + def test_where(self): s = Series(np.random.randn(5)) cond = s > 0 diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 451f369593347..8c0ed322028e8 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -520,6 +520,9 @@ def _compare(idx): exp = pd.Series(np.repeat(nan, 5)) tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp) + with tm.assert_produces_warning(FutureWarning): + sp.take([1, 5], convert=False) + def test_numpy_take(self): sp = SparseSeries([1.0, 2.0, 3.0]) indices = [1, 2]