From 767ee03ed58e1509e835a6b833b2a7babbb90cda Mon Sep 17 00:00:00 2001 From: Lucas Kushner Date: Wed, 27 Sep 2017 10:07:47 -0500 Subject: [PATCH] Deprecating Series.argmin and Series.argmax (#16830) (#16955) * Deprecating Series.argmin and Series.argmax (#16830) Added statements about correcting behavior in future commit Add reference to github ticket Fixing placement of github comment Made test code more explicit Fixing unrelated tests that are also throwing warnings Updating whatsnew to give more detail about deprecation Fixing whatsnew and breaking out tests to catch warnings Additional comments and more concise whatsnew Updating deprecate decorator to support custom message DOC: Update docstrings, depr message, and whatsnew * Added debug prints * Try splitting the filters * Reword whatsnew * Change sparse series test * Skip on py2 * Change to idxmin * Remove py2 skips * Catch more warnings * Final switch to idxmax * Consistent tests, refactor to_string * Fixed tests --- doc/source/whatsnew/v0.21.0.txt | 22 ++++++++++ pandas/core/series.py | 29 +++++++++---- pandas/io/formats/format.py | 4 +- pandas/tests/series/test_analytics.py | 60 ++++++++++++++++++++------- pandas/tests/series/test_api.py | 2 +- pandas/tests/series/test_operators.py | 28 ++++++------- pandas/tests/sparse/test_series.py | 16 ++++++- pandas/util/_decorators.py | 8 ++-- 8 files changed, 124 insertions(+), 45 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 06f19782682b03..ae55b4a0aa4691 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -487,11 +487,33 @@ Other API Changes Deprecations ~~~~~~~~~~~~ + - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) +.. _whatsnew_0210.deprecations.argmin_min + +Series.argmax and Series.argmin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- The behavior of :func:`Series.argmax` has been deprecated in favor of :func:`Series.idxmax` (:issue:`16830`) +- The behavior of :func:`Series.argmin` has been deprecated in favor of :func:`Series.idxmin` (:issue:`16830`) + +For compatibility with NumPy arrays, ``pd.Series`` implements ``argmax`` and +``argmin``. Since pandas 0.13.0, ``argmax`` has been an alias for +:meth:`pandas.Series.idxmax`, and ``argmin`` has been an alias for +:meth:`pandas.Series.idxmin`. They return the *label* of the maximum or minimum, +rather than the *position*. + +We've deprecated the current behavior of ``Series.argmax`` and +``Series.argmin``. Using either of these will emit a ``FutureWarning``. Use +:meth:`Series.idxmax` if you want the label of the maximum. Use +``Series.values.argmax()`` if you want the position of the maximum. Likewise for +the minimum. In a future release ``Series.argmax`` and ``Series.argmin`` will +return the position of the maximum or minimum. + .. _whatsnew_0210.prior_deprecations: Removal of prior version deprecations/changes diff --git a/pandas/core/series.py b/pandas/core/series.py index 89add1ef4c5907..a05324142b223a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -69,7 +69,8 @@ import pandas.core.common as com import pandas.core.nanops as nanops import pandas.io.formats.format as fmt -from pandas.util._decorators import Appender, deprecate_kwarg, Substitution +from pandas.util._decorators import ( + Appender, deprecate, deprecate_kwarg, Substitution) from pandas.util._validators import validate_bool_kwarg from pandas._libs import index as libindex, tslib as libts, lib, iNaT @@ -1274,7 +1275,7 @@ def duplicated(self, keep='first'): def idxmin(self, axis=None, skipna=True, *args, **kwargs): """ - Index of first occurrence of minimum of values. + Index *label* of the first occurrence of minimum of values. Parameters ---------- @@ -1287,7 +1288,9 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs): Notes ----- - This method is the Series version of ``ndarray.argmin``. + This method is the Series version of ``ndarray.argmin``. This method + returns the label of the minimum, while ``ndarray.argmin`` returns + the position. To get the position, use ``series.values.argmin()``. See Also -------- @@ -1302,7 +1305,7 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs): def idxmax(self, axis=None, skipna=True, *args, **kwargs): """ - Index of first occurrence of maximum of values. + Index *label* of the first occurrence of maximum of values. Parameters ---------- @@ -1315,7 +1318,9 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs): Notes ----- - This method is the Series version of ``ndarray.argmax``. + This method is the Series version of ``ndarray.argmax``. This method + returns the label of the maximum, while ``ndarray.argmax`` returns + the position. To get the position, use ``series.values.argmax()``. See Also -------- @@ -1329,8 +1334,18 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs): return self.index[i] # ndarray compat - argmin = idxmin - argmax = idxmax + argmin = deprecate('argmin', idxmin, + msg="'argmin' is deprecated. Use 'idxmin' instead. " + "The behavior of 'argmin' will be corrected to " + "return the positional minimum in the future. " + "Use 'series.values.argmin' to get the position of " + "the minimum now.") + argmax = deprecate('argmax', idxmax, + msg="'argmax' is deprecated. Use 'idxmax' instead. " + "The behavior of 'argmax' will be corrected to " + "return the positional maximum in the future. " + "Use 'series.values.argmax' to get the position of " + "the maximum now.") def round(self, decimals=0, *args, **kwargs): """ diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 547b9676717c99..386d9c3ffe30df 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -598,9 +598,7 @@ def to_string(self): text = self._join_multiline(*strcols) else: # max_cols == 0. Try to fit frame to terminal text = self.adj.adjoin(1, *strcols).split('\n') - row_lens = Series(text).apply(len) - max_len_col_ix = np.argmax(row_lens) - max_len = row_lens[max_len_col_ix] + max_len = Series(text).str.len().max() headers = [ele[0] for ele in strcols] # Size of last col determines dot col size. See # `self._to_str_columns diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 914181dc941549..9f5e4f2ac4b6e6 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1242,16 +1242,31 @@ def test_idxmin(self): result = s.idxmin() assert result == 1 - def test_numpy_argmin(self): - # argmin is aliased to idxmin - data = np.random.randint(0, 11, size=10) - result = np.argmin(Series(data)) - assert result == np.argmin(data) + def test_numpy_argmin_deprecated(self): + # See gh-16830 + data = np.arange(1, 11) + + s = Series(data, index=data) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # The deprecation of Series.argmin also causes a deprecation + # warning when calling np.argmin. This behavior is temporary + # until the implemention of Series.argmin is corrected. + result = np.argmin(s) + + assert result == 1 + + with tm.assert_produces_warning(FutureWarning): + # argmin is aliased to idxmin + result = s.argmin() + + assert result == 1 if not _np_version_under1p10: - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argmin, - Series(data), out=data) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argmin, + s, out=data) def test_idxmax(self): # test idxmax @@ -1297,17 +1312,30 @@ def test_idxmax(self): result = s.idxmin() assert result == 1.1 - def test_numpy_argmax(self): + def test_numpy_argmax_deprecated(self): + # See gh-16830 + data = np.arange(1, 11) + + s = Series(data, index=data) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # The deprecation of Series.argmax also causes a deprecation + # warning when calling np.argmax. This behavior is temporary + # until the implemention of Series.argmax is corrected. + result = np.argmax(s) + assert result == 10 + + with tm.assert_produces_warning(FutureWarning): + # argmax is aliased to idxmax + result = s.argmax() - # argmax is aliased to idxmax - data = np.random.randint(0, 11, size=10) - result = np.argmax(Series(data)) - assert result == np.argmax(data) + assert result == 10 if not _np_version_under1p10: - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argmax, - Series(data), out=data) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argmax, + s, out=data) def test_ptp(self): N = 1000 diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 56b8a90ec0c9f1..6b950be15ca465 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -345,7 +345,7 @@ def test_ndarray_compat(self): index=date_range('1/1/2000', periods=1000)) def f(x): - return x[x.argmax()] + return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 114a055de81953..c8cc80b1cf4b1b 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1872,33 +1872,33 @@ def test_op_duplicate_index(self): ), ] ) - def test_assert_argminmax_raises(self, test_input, error_type): + def test_assert_idxminmax_raises(self, test_input, error_type): """ Cases where ``Series.argmax`` and related should raise an exception """ with pytest.raises(error_type): - test_input.argmin() + test_input.idxmin() with pytest.raises(error_type): - test_input.argmin(skipna=False) + test_input.idxmin(skipna=False) with pytest.raises(error_type): - test_input.argmax() + test_input.idxmax() with pytest.raises(error_type): - test_input.argmax(skipna=False) + test_input.idxmax(skipna=False) - def test_argminmax_with_inf(self): + def test_idxminmax_with_inf(self): # For numeric data with NA and Inf (GH #13595) s = pd.Series([0, -np.inf, np.inf, np.nan]) - assert s.argmin() == 1 - assert np.isnan(s.argmin(skipna=False)) + assert s.idxmin() == 1 + assert np.isnan(s.idxmin(skipna=False)) - assert s.argmax() == 2 - assert np.isnan(s.argmax(skipna=False)) + assert s.idxmax() == 2 + assert np.isnan(s.idxmax(skipna=False)) # Using old-style behavior that treats floating point nan, -inf, and # +inf as missing with pd.option_context('mode.use_inf_as_na', True): - assert s.argmin() == 0 - assert np.isnan(s.argmin(skipna=False)) - assert s.argmax() == 0 - np.isnan(s.argmax(skipna=False)) + assert s.idxmin() == 0 + assert np.isnan(s.idxmin(skipna=False)) + assert s.idxmax() == 0 + np.isnan(s.idxmax(skipna=False)) diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index b44314d4e733be..451f3695933470 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -1379,11 +1379,25 @@ def test_numpy_func_call(self): # numpy passes in 'axis=None' or `axis=-1' funcs = ['sum', 'cumsum', 'var', 'mean', 'prod', 'cumprod', 'std', 'argsort', - 'argmin', 'argmax', 'min', 'max'] + 'min', 'max'] for func in funcs: for series in ('bseries', 'zbseries'): getattr(np, func)(getattr(self, series)) + def test_deprecated_numpy_func_call(self): + # NOTE: These should be add to the 'test_numpy_func_call' test above + # once the behavior of argmin/argmax is corrected. + funcs = ['argmin', 'argmax'] + for func in funcs: + for series in ('bseries', 'zbseries'): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + getattr(np, func)(getattr(self, series)) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + getattr(getattr(self, series), func)() + @pytest.mark.parametrize( 'datetime_type', (np.datetime64, diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 3733e4311aa732..9e4e5515a292bc 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -7,7 +7,7 @@ def deprecate(name, alternative, alt_name=None, klass=None, - stacklevel=2): + stacklevel=2, msg=None): """ Return a new function that emits a deprecation warning on use. @@ -21,14 +21,16 @@ def deprecate(name, alternative, alt_name=None, klass=None, Name to use in preference of alternative.__name__ klass : Warning, default FutureWarning stacklevel : int, default 2 + msg : str + The message to display in the warning. + Default is '{name} is deprecated. Use {alt_name} instead.' """ alt_name = alt_name or alternative.__name__ klass = klass or FutureWarning + msg = msg or "{} is deprecated. Use {} instead".format(name, alt_name) def wrapper(*args, **kwargs): - msg = "{name} is deprecated. Use {alt_name} instead".format( - name=name, alt_name=alt_name) warnings.warn(msg, klass, stacklevel=stacklevel) return alternative(*args, **kwargs) return wrapper