From d409035e133c663687fe08584b510011f0800d84 Mon Sep 17 00:00:00 2001
From: Daniel Grady <daniel.grady@idanalytics.com>
Date: Mon, 22 May 2017 16:00:17 -0700
Subject: [PATCH] BUG: Fix behavior of argmax and argmin with inf (#16449)

Closes #13595

The implementations of `nanargmin` and `nanargmax` in `nanops` were
forcing the `_get_values` utility function to always mask out infinite
values. For example, in `nanargmax`,

    >>> nanops._get_values(np.array([1, np.nan, np.inf]), True,
    isfinite=True, fill_value_typ='-inf')

    (array([  1., -inf, -inf]),
     array([False,  True,  True], dtype=bool),
     dtype('float64'),
     numpy.float64)

The first element of the result tuple (the masked version of the
values array) is used for actually finding the max or min argument. As
a result, infinite values could never be correctly recognized as the
maximum or minimum values in an array.

This also affects the behavior of `DataFrame.groupby.idxmax`: since
`Series.idxmax` previously raised a `ValueError` with string data, the
group by would silently drop columns that contained strings.
---
 doc/source/whatsnew/v0.21.0.txt       |  2 +-
 pandas/core/nanops.py                 |  6 +--
 pandas/tests/groupby/test_groupby.py  |  3 +-
 pandas/tests/series/test_operators.py | 68 ++++++++++++++++++++++++++-
 4 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 9d330cf3fdf2d..08f58086aa8a5 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -127,7 +127,7 @@ Sparse
 Reshaping
 ^^^^^^^^^
 
-
+- `argmin`, `argmax`, `idxmin`, and `idxmax` on Series, DataFrame, and GroupBy objects work correctly with floating point data that contains infinite values (:issue:`13595`). These functions now also work with string data, as long as there are no missing values.
 
 Numeric
 ^^^^^^^
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 1d64f87b15761..9630dba7917cf 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -474,8 +474,7 @@ def nanargmax(values, axis=None, skipna=True):
     """
     Returns -1 in the NA case
     """
-    values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf',
-                                         isfinite=True)
+    values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf')
     result = values.argmax(axis)
     result = _maybe_arg_null_out(result, axis, mask, skipna)
     return result
@@ -485,8 +484,7 @@ def nanargmin(values, axis=None, skipna=True):
     """
     Returns -1 in the NA case
     """
-    values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf',
-                                         isfinite=True)
+    values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf')
     result = values.argmin(axis)
     result = _maybe_arg_null_out(result, axis, mask, skipna)
     return result
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 19124a33bdbcb..1dfb07f2d729f 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2339,7 +2339,8 @@ def test_non_cython_api(self):
         assert_frame_equal(result, expected)
 
         # idxmax
-        expected = DataFrame([[0], [nan]], columns=['B'], index=[1, 3])
+        expected = DataFrame([[0.0, 0.0], [nan, 2.0]], columns=['B', 'C'],
+                             index=[1, 3])
         expected.index.name = 'A'
         result = g.idxmax()
         assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index 2e400812e0331..dec8ca1dc3f4b 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -20,7 +20,7 @@
 from pandas.core.indexes.timedeltas import Timedelta
 import pandas.core.nanops as nanops
 
-from pandas.compat import range, zip
+from pandas.compat import range, zip, PY3
 from pandas import compat
 from pandas.util.testing import (assert_series_equal, assert_almost_equal,
                                  assert_frame_equal, assert_index_equal)
@@ -1857,3 +1857,69 @@ def test_op_duplicate_index(self):
         result = s1 + s2
         expected = pd.Series([11, 12, np.nan], index=[1, 1, 2])
         assert_series_equal(result, expected)
+
+    def test_argminmax(self):
+        # Series.argmin, Series.argmax are aliased to Series.idxmin,
+        # Series.idxmax
+
+        # Expected behavior for empty Series
+        s = pd.Series([])
+
+        with pytest.raises(ValueError):
+            s.argmin()
+        with pytest.raises(ValueError):
+            s.argmin(skipna=False)
+        with pytest.raises(ValueError):
+            s.argmax()
+        with pytest.raises(ValueError):
+            s.argmax(skipna=False)
+
+        # For numeric data with NA and Inf (GH #13595)
+        s = pd.Series([0, -np.inf, np.inf, np.nan])
+
+        assert s.argmin() == 1
+        assert np.isnan(s.argmin(skipna=False))
+
+        assert s.argmax() == 2
+        assert np.isnan(s.argmax(skipna=False))
+
+        # Using old-style behavior that treats floating point nan, -inf, and
+        # +inf as missing
+        s = pd.Series([0, -np.inf, np.inf, np.nan])
+
+        with pd.option_context('mode.use_inf_as_null', True):
+            assert s.argmin() == 0
+            assert np.isnan(s.argmin(skipna=False))
+            assert s.argmax() == 0
+            np.isnan(s.argmax(skipna=False))
+
+        # For non-NA strings
+        s = pd.Series(['foo', 'foo', 'bar', 'bar', 'baz'])
+
+        assert s.argmin() == 2
+        assert s.argmin(skipna=False) == 2
+
+        assert s.argmax() == 0
+        assert s.argmax(skipna=False) == 0
+
+        # For mixed string and NA
+        # This works differently under Python 2 and 3: under Python 2,
+        # comparing strings and None, for example, is valid, and we can
+        # compute an argmax. Under Python 3, such comparisons are not valid
+        # and raise a TypeError.
+        s = pd.Series(['foo', 'foo', 'bar', 'bar', None, np.nan, 'baz'])
+
+        if PY3:
+            with pytest.raises(TypeError):
+                s.argmin()
+            with pytest.raises(TypeError):
+                s.argmin(skipna=False)
+            with pytest.raises(TypeError):
+                s.argmax()
+            with pytest.raises(TypeError):
+                s.argmax(skipna=False)
+        else:
+            assert s.argmin() == 4
+            assert np.isnan(s.argmin(skipna=False))
+            assert s.argmax() == 0
+            assert np.isnan(s.argmax(skipna=False))