From 5f628c2db0c9db82b0f4c5108913cd15b9efebc8 Mon Sep 17 00:00:00 2001 From: Lucas Kushner Date: Sat, 15 Jul 2017 19:14:56 +0000 Subject: [PATCH] Adding argmax and argmin with proper behavior (#16830) --- doc/source/whatsnew/v0.21.0.txt | 2 - pandas/core/frame.py | 60 +++++++++++++++++++++++- pandas/core/series.py | 66 +++++++++++++++++++++++---- pandas/tests/frame/test_analytics.py | 28 ++++++++++++ pandas/tests/series/test_analytics.py | 22 +++------ 5 files changed, 149 insertions(+), 29 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 993c35dce7bac..6ddf6029b99bb 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -116,8 +116,6 @@ Other API Changes Deprecations ~~~~~~~~~~~~ - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). -- :method:`Series.argmax` has been deprecated in favor of :method:`Series.idxmax` (:issue:`16830`) -- :method:`Series.argmin` has been deprecated in favor of :method:`Series.idxmin` (:issue:`16830`) .. _whatsnew_0210.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9920ddf854850..2966527fa017e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5329,7 +5329,7 @@ def idxmin(self, axis=0, skipna=True): def idxmax(self, axis=0, skipna=True): """ - Return index of first occurrence of maximum over requested axis. + Return label of first occurrence of maximum over requested axis. NA/null values are excluded. Parameters @@ -5358,6 +5358,64 @@ def idxmax(self, axis=0, skipna=True): result = [index[i] if i >= 0 else NA for i in indices] return Series(result, index=self._get_agg_axis(axis)) + def argmin(self, axis=0, skipna=True): + """ + Return index of first occurrence of minimum over requested axis. + NA/null values are excluded. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + 0 or 'index' for row-wise, 1 or 'columns' for column-wise + skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA + + Returns + ------- + argmin : Series + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmin``. + + See Also + -------- + Series.idxmin + """ + axis = self._get_axis_number(axis) + indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) + return Series(indices, index=self._get_agg_axis(axis)) + + def argmax(self, axis=0, skipna=True): + """ + Return index of first occurrence of maximum over requested axis. + NA/null values are excluded. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + 0 or 'index' for row-wise, 1 or 'columns' for column-wise + skipna : boolean, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be first index. + + Returns + ------- + argmax : Series + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmax``. + + See Also + -------- + Series.argmax + """ + axis = self._get_axis_number(axis) + indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) + return Series(indices, index=self._get_agg_axis(axis)) + def _get_agg_axis(self, axis_num): """ let's be explict about this """ if axis_num == 0: diff --git a/pandas/core/series.py b/pandas/core/series.py index 5294031be0ff8..cab4a4205ab95 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -72,7 +72,7 @@ import pandas.core.nanops as nanops import pandas.io.formats.format as fmt from pandas.util._decorators import ( - Appender, deprecate, deprecate_kwarg, Substitution) + Appender, deprecate_kwarg, Substitution) from pandas.util._validators import validate_bool_kwarg from pandas._libs import index as libindex, tslib as libts, lib, iNaT @@ -1239,7 +1239,7 @@ def duplicated(self, keep='first'): def idxmin(self, axis=None, skipna=True, *args, **kwargs): """ - Index of first occurrence of minimum of values. + Label of first occurrence of minimum of values. Parameters ---------- @@ -1259,15 +1259,14 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs): DataFrame.idxmin numpy.ndarray.argmin """ - skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) - i = nanops.nanargmin(_values_from_object(self), skipna=skipna) + i = self.argmin(axis, skipna, *args, **kwargs) if i == -1: return np.nan return self.index[i] def idxmax(self, axis=None, skipna=True, *args, **kwargs): """ - Index of first occurrence of maximum of values. + Label of first occurrence of maximum of values. Parameters ---------- @@ -1287,15 +1286,62 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs): DataFrame.idxmax numpy.ndarray.argmax """ - skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) - i = nanops.nanargmax(_values_from_object(self), skipna=skipna) + i = self.argmax(axis, skipna, *args, **kwargs) if i == -1: return np.nan return self.index[i] - # ndarray compat - argmin = deprecate('argmin', idxmin) - argmax = deprecate('argmax', idxmax) + def argmin(self, axis=None, skipna=True, *args, **kwargs): + """ + Index of first occurrence of minimum of values. + + Parameters + ---------- + skipna : boolean, default True + Exclude NA/null values + + Returns + ------- + idxmin : Index of minimum of values + + Notes + ----- + This method is the Series version of ``ndarray.argmin``. + + See Also + -------- + DataFrame.argmin + numpy.ndarray.argmin + """ + skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) + i = nanops.nanargmin(_values_from_object(self), skipna=skipna) + return i + + def argmax(self, axis=None, skipna=True, *args, **kwargs): + """ + Index of first occurrence of maximum of values. + + Parameters + ---------- + skipna : boolean, default True + Exclude NA/null values + + Returns + ------- + idxmax : Index of maximum of values + + Notes + ----- + This method is the Series version of ``ndarray.argmax``. + + See Also + -------- + DataFrame.argmax + numpy.ndarray.argmax + """ + skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) + i = nanops.nanargmax(_values_from_object(self), skipna=skipna) + return i def round(self, decimals=0, *args, **kwargs): """ diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b09325bfa2ddc..9b82c99c96fb7 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1031,6 +1031,34 @@ def test_idxmax(self): pytest.raises(ValueError, frame.idxmax, axis=2) + def test_argmin(self): + frame = self.frame + frame.loc[5:10] = np.nan + frame.loc[15:20, -2:] = np.nan + for skipna in [True, False]: + for axis in [0, 1]: + for df in [frame, self.intframe]: + result = df.argmin(axis=axis, skipna=skipna) + expected = df.apply(Series.argmin, axis=axis, + skipna=skipna) + tm.assert_series_equal(result, expected) + + pytest.raises(ValueError, frame.argmin, axis=2) + + def test_argmax(self): + frame = self.frame + frame.loc[5:10] = np.nan + frame.loc[15:20, -2:] = np.nan + for skipna in [True, False]: + for axis in [0, 1]: + for df in [frame, self.intframe]: + result = df.argmax(axis=axis, skipna=skipna) + expected = df.apply(Series.argmax, axis=axis, + skipna=skipna) + tm.assert_series_equal(result, expected) + + pytest.raises(ValueError, frame.argmax, axis=2) + # ---------------------------------------------------------------------- # Logical reductions diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index fa95e0027ef3a..78f73a7dd6664 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1212,14 +1212,9 @@ def test_idxmin(self): def test_numpy_argmin(self): data = np.random.randint(0, 11, size=10) - - with pytest.warns(FutureWarning): - result = np.argmin(Series(data)) - assert result == np.argmin(data) - - with tm.assert_produces_warning(FutureWarning): - # argmin is aliased to idxmin - Series(data).argmin() + result = np.argmin(Series(data)) + assert result == np.argmin(data) + assert result == Series(data).argmin() if not _np_version_under1p10: msg = "the 'out' parameter is not supported" @@ -1272,14 +1267,9 @@ def test_idxmax(self): def test_numpy_argmax(self): data = np.random.randint(0, 11, size=10) - - with pytest.warns(FutureWarning): - result = np.argmax(Series(data)) - assert result == np.argmax(data) - - with tm.assert_produces_warning(FutureWarning): - # argmax is aliased to idxmax - Series(data).argmax() + result = np.argmax(Series(data)) + assert result == np.argmax(data) + assert result == Series(data).argmax() if not _np_version_under1p10: msg = "the 'out' parameter is not supported"