From b247e840ae4eb4ae19e9a7298e950ee75384378f Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Mon, 11 Jun 2018 17:28:30 -0600
Subject: [PATCH 1/4] BUG: Fix Series.nlargest for integer boundary values

---
 pandas/core/algorithms.py             |  5 ++++-
 pandas/tests/series/test_analytics.py | 29 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index b33c10da7813e..9e34b8eb55ccb 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1133,9 +1133,12 @@ def compute(self, method):
             return dropped[slc].sort_values(ascending=ascending).head(n)
 
         # fast method
-        arr, _, _ = _ensure_data(dropped.values)
+        arr, pandas_dtype, _ = _ensure_data(dropped.values)
         if method == 'nlargest':
             arr = -arr
+            if is_integer_dtype(pandas_dtype):
+                # GH 21426: ensure reverse ordering at boundaries
+                arr -= 1
 
         if self.keep == 'last':
             arr = arr[::-1]
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index aba472f2ce8f9..1d855271fd56b 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -13,6 +13,8 @@
 from pandas import (Series, Categorical, DataFrame, isna, notna,
                     bdate_range, date_range, _np_version_under1p10,
                     CategoricalIndex)
+from pandas.core.dtypes.common import (
+    is_float_dtype, is_integer_dtype, is_datetimelike)
 from pandas.core.index import MultiIndex
 from pandas.core.indexes.datetimes import Timestamp
 from pandas.core.indexes.timedeltas import Timedelta
@@ -2028,6 +2030,33 @@ def test_n(self, n):
         expected = s.sort_values().head(n)
         assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('dtype', [
+        'int8', 'int16', 'int32', 'int64',
+        'uint8', 'uint16', 'uint32', 'uint64',
+        'float16', 'float32', 'float64',
+        'datetime64[ns]', 'timedelta64[ns]'])
+    @pytest.mark.parametrize('method', ['nsmallest', 'nlargest'])
+    def test_boundary(self, method, dtype):
+        # GH 21426
+        if is_float_dtype(dtype):
+            min_val, max_val = np.finfo(dtype).min, np.finfo(dtype).max
+            min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=dtype)
+            vals = [min_val, min_2nd, max_2nd, max_val]
+        elif is_integer_dtype(dtype):
+            min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max
+            vals = [min_val, min_val + 1, max_val - 1, max_val]
+        elif is_datetimelike(dtype):
+            # use int64 bounds and +1 to min_val since true minimum is NaT
+            # (include min_val/NaT at end to maintain same expected_idxr)
+            min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max
+            vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
+
+        s = Series(vals, dtype=dtype)
+        result = getattr(s, method)(3)
+        expected_idxr = [0, 1, 2] if method == 'nsmallest' else [3, 2, 1]
+        expected = s.loc[expected_idxr]
+        tm.assert_series_equal(result, expected)
+
 
 class TestCategoricalSeriesAnalytics(object):
 

From b336dfbd05282a072abf613b20b5a1d49ec9e7f6 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Mon, 11 Jun 2018 18:32:43 -0600
Subject: [PATCH 2/4] add whatsnew

---
 doc/source/whatsnew/v0.23.2.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 79a4c3da2ffa4..b8d865195cddd 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -82,4 +82,5 @@ Bug Fixes
 
 **Other**
 
+- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
 -

From b5e803cf34801d62ab1d37923107fef73ed31b55 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Mon, 11 Jun 2018 18:49:36 -0600
Subject: [PATCH 3/4] split tests

---
 pandas/tests/series/test_analytics.py | 55 ++++++++++++++++-----------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 1d855271fd56b..980805b340bf7 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -13,8 +13,6 @@
 from pandas import (Series, Categorical, DataFrame, isna, notna,
                     bdate_range, date_range, _np_version_under1p10,
                     CategoricalIndex)
-from pandas.core.dtypes.common import (
-    is_float_dtype, is_integer_dtype, is_datetimelike)
 from pandas.core.index import MultiIndex
 from pandas.core.indexes.datetimes import Timestamp
 from pandas.core.indexes.timedeltas import Timedelta
@@ -1948,6 +1946,10 @@ def test_mode_sortwarning(self):
 
 class TestNLargestNSmallest(object):
 
+    @pytest.fixture(params=['nlargest', 'nsmallest'])
+    def method(self, request):
+        return request.param
+
     @pytest.mark.parametrize(
         "r", [Series([3., 2, 1, 2, '5'], dtype='object'),
               Series([3., 2, 1, 2, 5], dtype='object'),
@@ -2030,33 +2032,40 @@ def test_n(self, n):
         expected = s.sort_values().head(n)
         assert_series_equal(result, expected)
 
-    @pytest.mark.parametrize('dtype', [
-        'int8', 'int16', 'int32', 'int64',
-        'uint8', 'uint16', 'uint32', 'uint64',
-        'float16', 'float32', 'float64',
-        'datetime64[ns]', 'timedelta64[ns]'])
-    @pytest.mark.parametrize('method', ['nsmallest', 'nlargest'])
-    def test_boundary(self, method, dtype):
-        # GH 21426
-        if is_float_dtype(dtype):
-            min_val, max_val = np.finfo(dtype).min, np.finfo(dtype).max
-            min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=dtype)
-            vals = [min_val, min_2nd, max_2nd, max_val]
-        elif is_integer_dtype(dtype):
-            min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max
-            vals = [min_val, min_val + 1, max_val - 1, max_val]
-        elif is_datetimelike(dtype):
-            # use int64 bounds and +1 to min_val since true minimum is NaT
-            # (include min_val/NaT at end to maintain same expected_idxr)
-            min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max
-            vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
-
+    def _check_nselect_boundary(self, vals, dtype, method):
+        # helper function for 'test_boundary_dtype' tests
         s = Series(vals, dtype=dtype)
         result = getattr(s, method)(3)
         expected_idxr = [0, 1, 2] if method == 'nsmallest' else [3, 2, 1]
         expected = s.loc[expected_idxr]
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('dtype', [
+        'int8', 'int16', 'int32', 'int64',
+        'uint8', 'uint16', 'uint32', 'uint64'])
+    def test_boundary_integer(self, method, dtype):
+        # GH 21426
+        min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max
+        vals = [min_val, min_val + 1, max_val - 1, max_val]
+        self._check_nselect_boundary(vals, dtype, method)
+
+    @pytest.mark.parametrize('dtype', ['float16', 'float32', 'float64'])
+    def test_boundary_float(self, method, dtype):
+        # GH 21426
+        min_val, max_val = np.finfo(dtype).min, np.finfo(dtype).max
+        min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=dtype)
+        vals = [min_val, min_2nd, max_2nd, max_val]
+        self._check_nselect_boundary(vals, dtype, method)
+
+    @pytest.mark.parametrize('dtype', ['datetime64[ns]', 'timedelta64[ns]'])
+    def test_boundary_datetimelike(self, method, dtype):
+        # GH 21426
+        # use int64 bounds and +1 to min_val since true minimum is NaT
+        # (include min_val/NaT at end to maintain same expected_idxr)
+        min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max
+        vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
+        self._check_nselect_boundary(vals, dtype, method)
+
 
 class TestCategoricalSeriesAnalytics(object):
 

From fb9d532cfb1ddcfc2a6957b5f25b93d8a04df5b1 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Wed, 13 Jun 2018 17:39:29 -0600
Subject: [PATCH 4/4] review edits

---
 pandas/conftest.py                    | 71 ++++++++++++++++++++++++
 pandas/tests/frame/test_analytics.py  | 78 +++++++++++++--------------
 pandas/tests/series/test_analytics.py | 49 ++++++++---------
 3 files changed, 130 insertions(+), 68 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index d5f399c7cd63d..9d806a91f37f7 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -129,6 +129,14 @@ def join_type(request):
     return request.param
 
 
+@pytest.fixture(params=['nlargest', 'nsmallest'])
+def nselect_method(request):
+    """
+    Fixture for trying all nselect methods
+    """
+    return request.param
+
+
 @pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
 def nulls_fixture(request):
     """
@@ -170,3 +178,66 @@ def string_dtype(request):
     * 'U'
     """
     return request.param
+
+
+@pytest.fixture(params=["float32", "float64"])
+def float_dtype(request):
+    """
+    Parameterized fixture for float dtypes.
+
+    * float32
+    * float64
+    """
+
+    return request.param
+
+
+UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
+SIGNED_INT_DTYPES = ["int8", "int16", "int32", "int64"]
+ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
+
+
+@pytest.fixture(params=SIGNED_INT_DTYPES)
+def sint_dtype(request):
+    """
+    Parameterized fixture for signed integer dtypes.
+
+    * int8
+    * int16
+    * int32
+    * int64
+    """
+
+    return request.param
+
+
+@pytest.fixture(params=UNSIGNED_INT_DTYPES)
+def uint_dtype(request):
+    """
+    Parameterized fixture for unsigned integer dtypes.
+
+    * uint8
+    * uint16
+    * uint32
+    * uint64
+    """
+
+    return request.param
+
+
+@pytest.fixture(params=ALL_INT_DTYPES)
+def any_int_dtype(request):
+    """
+    Parameterized fixture for any integer dtypes.
+
+    * int8
+    * uint8
+    * int16
+    * uint16
+    * int32
+    * uint32
+    * int64
+    * uint64
+    """
+
+    return request.param
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index b8f1acc2aa679..6dc24ed856017 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -12,7 +12,7 @@
 from numpy.random import randn
 import numpy as np
 
-from pandas.compat import lrange, product, PY35
+from pandas.compat import lrange, PY35
 from pandas import (compat, isna, notna, DataFrame, Series,
                     MultiIndex, date_range, Timestamp, Categorical,
                     _np_version_under1p12, _np_version_under1p15,
@@ -2260,54 +2260,49 @@ class TestNLargestNSmallest(object):
 
     # ----------------------------------------------------------------------
     # Top / bottom
-    @pytest.mark.parametrize(
-        'method, n, order',
-        product(['nsmallest', 'nlargest'], range(1, 11),
-                [['a'],
-                 ['c'],
-                 ['a', 'b'],
-                 ['a', 'c'],
-                 ['b', 'a'],
-                 ['b', 'c'],
-                 ['a', 'b', 'c'],
-                 ['c', 'a', 'b'],
-                 ['c', 'b', 'a'],
-                 ['b', 'c', 'a'],
-                 ['b', 'a', 'c'],
-
-                 # dups!
-                 ['b', 'c', 'c'],
-
-                 ]))
-    def test_n(self, df_strings, method, n, order):
+    @pytest.mark.parametrize('order', [
+        ['a'],
+        ['c'],
+        ['a', 'b'],
+        ['a', 'c'],
+        ['b', 'a'],
+        ['b', 'c'],
+        ['a', 'b', 'c'],
+        ['c', 'a', 'b'],
+        ['c', 'b', 'a'],
+        ['b', 'c', 'a'],
+        ['b', 'a', 'c'],
+
+        # dups!
+        ['b', 'c', 'c']])
+    @pytest.mark.parametrize('n', range(1, 11))
+    def test_n(self, df_strings, nselect_method, n, order):
         # GH10393
         df = df_strings
         if 'b' in order:
 
             error_msg = self.dtype_error_msg_template.format(
-                column='b', method=method, dtype='object')
+                column='b', method=nselect_method, dtype='object')
             with tm.assert_raises_regex(TypeError, error_msg):
-                getattr(df, method)(n, order)
+                getattr(df, nselect_method)(n, order)
         else:
-            ascending = method == 'nsmallest'
-            result = getattr(df, method)(n, order)
+            ascending = nselect_method == 'nsmallest'
+            result = getattr(df, nselect_method)(n, order)
             expected = df.sort_values(order, ascending=ascending).head(n)
             tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        'method, columns',
-        product(['nsmallest', 'nlargest'],
-                product(['group'], ['category_string', 'string'])
-                ))
-    def test_n_error(self, df_main_dtypes, method, columns):
+    @pytest.mark.parametrize('columns', [
+        ('group', 'category_string'), ('group', 'string')])
+    def test_n_error(self, df_main_dtypes, nselect_method, columns):
         df = df_main_dtypes
+        col = columns[1]
         error_msg = self.dtype_error_msg_template.format(
-            column=columns[1], method=method, dtype=df[columns[1]].dtype)
+            column=col, method=nselect_method, dtype=df[col].dtype)
         # escape some characters that may be in the repr
         error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)")
                               .replace("[", "\\[").replace("]", "\\]"))
         with tm.assert_raises_regex(TypeError, error_msg):
-            getattr(df, method)(2, columns)
+            getattr(df, nselect_method)(2, columns)
 
     def test_n_all_dtypes(self, df_main_dtypes):
         df = df_main_dtypes
@@ -2328,15 +2323,14 @@ def test_n_identical_values(self):
         expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]})
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        'n, order',
-        product([1, 2, 3, 4, 5],
-                [['a', 'b', 'c'],
-                 ['c', 'b', 'a'],
-                 ['a'],
-                 ['b'],
-                 ['a', 'b'],
-                 ['c', 'b']]))
+    @pytest.mark.parametrize('order', [
+        ['a', 'b', 'c'],
+        ['c', 'b', 'a'],
+        ['a'],
+        ['b'],
+        ['a', 'b'],
+        ['c', 'b']])
+    @pytest.mark.parametrize('n', range(1, 6))
     def test_n_duplicate_index(self, df_duplicates, n, order):
         # GH 13412
 
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 980805b340bf7..b9c7b837b8b81 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1944,11 +1944,16 @@ def test_mode_sortwarning(self):
         tm.assert_series_equal(result, expected)
 
 
-class TestNLargestNSmallest(object):
+def assert_check_nselect_boundary(vals, dtype, method):
+    # helper function for 'test_boundary_{dtype}' tests
+    s = Series(vals, dtype=dtype)
+    result = getattr(s, method)(3)
+    expected_idxr = [0, 1, 2] if method == 'nsmallest' else [3, 2, 1]
+    expected = s.loc[expected_idxr]
+    tm.assert_series_equal(result, expected)
+
 
-    @pytest.fixture(params=['nlargest', 'nsmallest'])
-    def method(self, request):
-        return request.param
+class TestNLargestNSmallest(object):
 
     @pytest.mark.parametrize(
         "r", [Series([3., 2, 1, 2, '5'], dtype='object'),
@@ -2032,39 +2037,31 @@ def test_n(self, n):
         expected = s.sort_values().head(n)
         assert_series_equal(result, expected)
 
-    def _check_nselect_boundary(self, vals, dtype, method):
-        # helper function for 'test_boundary_dtype' tests
-        s = Series(vals, dtype=dtype)
-        result = getattr(s, method)(3)
-        expected_idxr = [0, 1, 2] if method == 'nsmallest' else [3, 2, 1]
-        expected = s.loc[expected_idxr]
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('dtype', [
-        'int8', 'int16', 'int32', 'int64',
-        'uint8', 'uint16', 'uint32', 'uint64'])
-    def test_boundary_integer(self, method, dtype):
+    def test_boundary_integer(self, nselect_method, any_int_dtype):
         # GH 21426
-        min_val, max_val = np.iinfo(dtype).min, np.iinfo(dtype).max
+        dtype_info = np.iinfo(any_int_dtype)
+        min_val, max_val = dtype_info.min, dtype_info.max
         vals = [min_val, min_val + 1, max_val - 1, max_val]
-        self._check_nselect_boundary(vals, dtype, method)
+        assert_check_nselect_boundary(vals, any_int_dtype, nselect_method)
 
-    @pytest.mark.parametrize('dtype', ['float16', 'float32', 'float64'])
-    def test_boundary_float(self, method, dtype):
+    def test_boundary_float(self, nselect_method, float_dtype):
         # GH 21426
-        min_val, max_val = np.finfo(dtype).min, np.finfo(dtype).max
-        min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=dtype)
+        dtype_info = np.finfo(float_dtype)
+        min_val, max_val = dtype_info.min, dtype_info.max
+        min_2nd, max_2nd = np.nextafter(
+            [min_val, max_val], 0, dtype=float_dtype)
         vals = [min_val, min_2nd, max_2nd, max_val]
-        self._check_nselect_boundary(vals, dtype, method)
+        assert_check_nselect_boundary(vals, float_dtype, nselect_method)
 
     @pytest.mark.parametrize('dtype', ['datetime64[ns]', 'timedelta64[ns]'])
-    def test_boundary_datetimelike(self, method, dtype):
+    def test_boundary_datetimelike(self, nselect_method, dtype):
         # GH 21426
         # use int64 bounds and +1 to min_val since true minimum is NaT
         # (include min_val/NaT at end to maintain same expected_idxr)
-        min_val, max_val = np.iinfo('int64').min, np.iinfo('int64').max
+        dtype_info = np.iinfo('int64')
+        min_val, max_val = dtype_info.min, dtype_info.max
         vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
-        self._check_nselect_boundary(vals, dtype, method)
+        assert_check_nselect_boundary(vals, dtype, nselect_method)
 
 
 class TestCategoricalSeriesAnalytics(object):