diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index 8572c136c28a9..0c326e15d90ed 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) - diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6c1dfc4c0da72..0c0b93f41c657 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1181,7 +1181,6 @@ def compute(self, method: str) -> Series: arr = arr[::-1] nbase = n - findex = len(self.obj) narr = len(arr) n = min(n, narr) @@ -1194,6 +1193,11 @@ def compute(self, method: str) -> Series: if self.keep != "all": inds = inds[:n] findex = nbase + else: + if len(inds) < nbase and len(nan_index) + len(inds) >= nbase: + findex = len(nan_index) + len(inds) + else: + findex = len(inds) if self.keep == "last": # reverse indices diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 1b2db80d782ce..a317dae562ae0 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -216,3 +216,24 @@ def test_nlargest_nan(self): result = df.nlargest(5, 0) expected = df.sort_values(0, ascending=False).head(5) tm.assert_frame_equal(result, expected) + + def test_nsmallest_nan_after_n_element(self): + # GH#46589 + df = pd.DataFrame( + { + "a": [1, 2, 3, 4, 5, None, 7], + "b": [7, 6, 5, 4, 3, 2, 1], + "c": [1, 1, 2, 2, 3, 3, 3], + }, + index=range(7), + ) + result = df.nsmallest(5, columns=["a", "b"]) + expected = pd.DataFrame( + { + "a": [1, 2, 3, 4, 5], + "b": [7, 6, 5, 4, 3], + "c": [1, 1, 2, 2, 3], + }, + index=range(5), + ).astype({"a": "float"}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index ee96ab08ad66c..4f07257038bc9 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -231,3 +231,15 @@ def test_nlargest_nullable(self, any_numeric_ea_dtype): .astype(dtype) ) tm.assert_series_equal(result, expected) + + def test_nsmallest_nan_when_keep_is_all(self): + # GH#46589 + s = Series([1, 2, 3, 3, 3, None]) + result = s.nsmallest(3, keep="all") + expected = Series([1.0, 2.0, 3.0, 3.0, 3.0]) + tm.assert_series_equal(result, expected) + + s = Series([1, 2, None, None, None]) + result = s.nsmallest(3, keep="all") + expected = Series([1, 2, None, None, None]) + tm.assert_series_equal(result, expected)