From e77ba11945c8922b21ac6c82eeb39a0817df27e5 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 18 Oct 2020 16:01:33 +0200 Subject: [PATCH 1/5] Fix regression for is_monotonic_increasing with nan in MultiIndex --- doc/source/whatsnew/v1.1.4.rst | 1 + pandas/_libs/join.pyx | 7 ++++++- pandas/core/indexes/multi.py | 5 ++++- pandas/tests/indexes/multi/test_monotonic.py | 6 ++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index 6892fb62028c9..9d0825c50916c 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in :class:`RollingGroupby` with ``sort=False`` not being respected (:issue:`36889`) - Fixed regression in :meth:`Series.astype` converting ``None`` to ``"nan"`` when casting to string (:issue:`36904`) - Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`) +- Fixed regression in :attr:`MultiIndex.is_monotonic_increasing` returning wrong results with ``"nan"`` in at least one of the levels (:issue:`37220`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 13c7187923473..22cb2a58a6853 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -278,6 +278,7 @@ def left_join_indexer_unique(join_t[:] left, join_t[:] right): j = 0 nleft = len(left) nright = len(right) + print(nleft) indexer = np.empty(nleft, dtype=np.int64) while True: @@ -294,7 +295,9 @@ def left_join_indexer_unique(join_t[:] left, join_t[:] right): while i < nleft - 1 and left[i] == rval: indexer[i] = j i += 1 - + print(left[i]) + print(rval) + print(left[i] > rval) if left[i] == right[j]: indexer[i] = j i += 1 @@ -303,9 +306,11 @@ def left_join_indexer_unique(join_t[:] left, join_t[:] right): i += 1 j += 1 elif left[i] > rval: + print("test") indexer[i] = -1 j += 1 else: + print(left[i]) indexer[i] = -1 i += 1 return indexer diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d012d5704f716..334b96bcfdc70 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1539,7 +1539,10 @@ def is_monotonic_increasing(self) -> bool: return if the index is monotonic increasing (only equal or increasing) values. """ - if all(x.is_monotonic for x in self.levels): + if all( + level.is_monotonic and -1 not in code + for (level, code) in zip(self.levels, self.codes) + ): # If each level is sorted, we can operate on the codes directly. GH27495 return libalgos.is_lexsorted( [x.astype("int64", copy=False) for x in self.codes] diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index ca1cb0932f63d..edd8eb33f3075 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -174,3 +174,9 @@ def test_is_strictly_monotonic_decreasing(): ) assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False + + +def test_is_monotonic_with_nans(): + # GH: 37220 + idx = pd.MultiIndex.from_tuples([(np.nan,), (1,)], names=["test"]) + assert idx.is_monotonic_increasing is False From e2cd2eba82befaa259347c087ee3722a19ba0992 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 18 Oct 2020 16:02:25 +0200 Subject: [PATCH 2/5] Fix regression for is_monotonic_increasing with nan in MultiIndex --- pandas/_libs/join.pyx | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 22cb2a58a6853..13c7187923473 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -278,7 +278,6 @@ def left_join_indexer_unique(join_t[:] left, join_t[:] right): j = 0 nleft = len(left) nright = len(right) - print(nleft) indexer = np.empty(nleft, dtype=np.int64) while True: @@ -295,9 +294,7 @@ def left_join_indexer_unique(join_t[:] left, join_t[:] right): while i < nleft - 1 and left[i] == rval: indexer[i] = j i += 1 - print(left[i]) - print(rval) - print(left[i] > rval) + if left[i] == right[j]: indexer[i] = j i += 1 @@ -306,11 +303,9 @@ def left_join_indexer_unique(join_t[:] left, join_t[:] right): i += 1 j += 1 elif left[i] > rval: - print("test") indexer[i] = -1 j += 1 else: - print(left[i]) indexer[i] = -1 i += 1 return indexer From d6dfe5c72d6a74db794d86489e79f3e54f61a4be Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 18 Oct 2020 20:50:53 +0200 Subject: [PATCH 3/5] Adjust whatsnew --- doc/source/whatsnew/v1.1.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index 9d0825c50916c..03da7827a2ddb 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -20,7 +20,7 @@ Fixed regressions - Fixed regression in :class:`RollingGroupby` with ``sort=False`` not being respected (:issue:`36889`) - Fixed regression in :meth:`Series.astype` converting ``None`` to ``"nan"`` when casting to string (:issue:`36904`) - Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`) -- Fixed regression in :attr:`MultiIndex.is_monotonic_increasing` returning wrong results with ``"nan"`` in at least one of the levels (:issue:`37220`) +- Fixed regression in :attr:`MultiIndex.is_monotonic_increasing` returning wrong results with ``NaN`` in at least one of the levels (:issue:`37220`) .. --------------------------------------------------------------------------- From 339ae88403422d05a71ea91ffb9cb3765967951f Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 18 Oct 2020 22:39:08 +0200 Subject: [PATCH 4/5] Return False when nan in MultiIndex --- pandas/core/indexes/multi.py | 8 ++++---- pandas/tests/indexes/multi/test_monotonic.py | 9 +++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 334b96bcfdc70..fec578b21c5c5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1539,10 +1539,10 @@ def is_monotonic_increasing(self) -> bool: return if the index is monotonic increasing (only equal or increasing) values. """ - if all( - level.is_monotonic and -1 not in code - for (level, code) in zip(self.levels, self.codes) - ): + if any(-1 in code for code in self.codes): + return False + + if all(level.is_monotonic for level in self.levels): # If each level is sorted, we can operate on the codes directly. GH27495 return libalgos.is_lexsorted( [x.astype("int64", copy=False) for x in self.codes] diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index edd8eb33f3075..bfc913268a576 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd from pandas import Index, MultiIndex @@ -176,7 +177,11 @@ def test_is_strictly_monotonic_decreasing(): assert idx._is_strictly_monotonic_decreasing is False -def test_is_monotonic_with_nans(): +@pytest.mark.parametrize( + "values", + [[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]], +) +def test_is_monotonic_with_nans(values): # GH: 37220 - idx = pd.MultiIndex.from_tuples([(np.nan,), (1,)], names=["test"]) + idx = pd.MultiIndex.from_tuples(values, names=["test"]) assert idx.is_monotonic_increasing is False From c3d1ab66f5a456917cc520e77731b65a6f5b5014 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 18 Oct 2020 22:50:01 +0200 Subject: [PATCH 5/5] Parametrize test --- pandas/tests/indexes/multi/test_monotonic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index bfc913268a576..8659573d8123a 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -177,11 +177,12 @@ def test_is_strictly_monotonic_decreasing(): assert idx._is_strictly_monotonic_decreasing is False +@pytest.mark.parametrize("attr", ["is_monotonic_increasing", "is_monotonic_decreasing"]) @pytest.mark.parametrize( "values", [[(np.nan,), (1,), (2,)], [(1,), (np.nan,), (2,)], [(1,), (2,), (np.nan,)]], ) -def test_is_monotonic_with_nans(values): +def test_is_monotonic_with_nans(values, attr): # GH: 37220 idx = pd.MultiIndex.from_tuples(values, names=["test"]) - assert idx.is_monotonic_increasing is False + assert getattr(idx, attr) is False