Skip to content

Commit

Permalink
CLN: enforce the deprecation of the Series.argsort NA behavior (#58232
Browse files Browse the repository at this point in the history
)

* enforce deprecation of the Series.argsort NA behavior

* remove comments

* add a note to v3.0.0

* correct def argsort and tests

* correct def argsort/tests

* fix pre-commit error

* Restore numpy test

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
  • Loading branch information
natmokval and mroeschke authored Jun 26, 2024
1 parent 2d6e61e commit bef88ef
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 39 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ Other Removals
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`)
- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`)
- Enforced deprecation of the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype` that would introduce new categories. (:issue:`58270`)
- Enforced deprecation of the behavior of :meth:`Series.argsort` in the presence of NA values (:issue:`58232`)
- Enforced deprecation of values "pad", "ffill", "bfill", and "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`57869`)
- Enforced deprecation removing :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
Expand Down
21 changes: 1 addition & 20 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
deprecate_nonkeyword_arguments,
doc,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import (
validate_ascending,
validate_bool_kwarg,
Expand Down Expand Up @@ -3722,25 +3721,7 @@ def argsort(
# GH#54257 We allow -1 here so that np.argsort(series) works
self._get_axis_number(axis)

values = self._values
mask = isna(values)

if mask.any():
# TODO(3.0): once this deprecation is enforced we can call
# self.array.argsort directly, which will close GH#43840 and
# GH#12694
warnings.warn(
"The behavior of Series.argsort in the presence of NA values is "
"deprecated. In a future version, NA values will be ordered "
"last instead of set to -1.",
FutureWarning,
stacklevel=find_stack_level(),
)
result = np.full(len(self), -1, dtype=np.intp)
notmask = ~mask
result[notmask] = np.argsort(values[notmask], kind=kind)
else:
result = np.argsort(values, kind=kind)
result = self.array.argsort(kind=kind)

res = self._constructor(
result, index=self.index, name=self.name, dtype=np.intp, copy=False
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,8 @@ def test_argsort_missing_array(self, data_missing_for_sorting):
tm.assert_numpy_array_equal(result, expected)

def test_argsort_missing(self, data_missing_for_sorting):
msg = "The behavior of Series.argsort in the presence of NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pd.Series(data_missing_for_sorting).argsort()
expected = pd.Series(np.array([1, -1, 0], dtype=np.intp))
result = pd.Series(data_missing_for_sorting).argsort()
expected = pd.Series(np.array([2, 0, 1], dtype=np.intp))
tm.assert_series_equal(result, expected)

def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value):
Expand Down
22 changes: 7 additions & 15 deletions pandas/tests/series/methods/test_argsort.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,15 @@ def test_argsort_axis(self):

def test_argsort_numpy(self, datetime_series):
ser = datetime_series

res = np.argsort(ser).values
expected = np.argsort(np.array(ser))
tm.assert_numpy_array_equal(res, expected)

# with missing values
ts = ser.copy()
ts[::2] = np.nan

msg = "The behavior of Series.argsort in the presence of NA values"
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
result = np.argsort(ts)[1::2]
expected = np.argsort(np.array(ts.dropna()))
def test_argsort_numpy_missing(self):
data = [0.1, np.nan, 0.2, np.nan, 0.3]
ser = Series(data)
result = np.argsort(ser)
expected = np.argsort(np.array(data))

tm.assert_numpy_array_equal(result.values, expected)

Expand All @@ -56,10 +50,8 @@ def test_argsort_dt64(self, unit):
expected = Series(range(5), dtype=np.intp)
tm.assert_series_equal(result, expected)

msg = "The behavior of Series.argsort in the presence of NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = shifted.argsort()
expected = Series(list(range(4)) + [-1], dtype=np.intp)
result = shifted.argsort()
expected = Series(list(range(4)) + [4], dtype=np.intp)
tm.assert_series_equal(result, expected)

def test_argsort_stable(self):
Expand Down

0 comments on commit bef88ef

Please sign in to comment.