From ec8ad010e76dd6d36058c1c1efe7a7716159e044 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 20 Jun 2017 01:09:34 +0200 Subject: [PATCH] BUG: do not raise UnsortedIndexError if sorting is not required closes #16734 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/multi.py | 19 ++++++++++--------- pandas/tests/indexes/test_multi.py | 7 ++++++- pandas/tests/indexing/test_multiindex.py | 17 +++++++++++++---- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d6b699abdba2d9..87ef5a158e0f68 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -98,6 +98,7 @@ Indexing ^^^^^^^^ - When called with a null slice (e.g. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). I/O diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f30da5b05f8ae0..43ad901209b5ca 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1035,11 +1035,11 @@ def is_lexsorted(self): """ return self.lexsort_depth == self.nlevels - def is_lexsorted_for_tuple(self, tup): + def _true_slices_in_tup(self, tup): """ - Return True if we are correctly lexsorted given the passed tuple + Return indices of (non-trivial) slices in "tup" """ - return len(tup) <= self.lexsort_depth + return [isinstance(k, slice) and not is_null_slice(k) for k in tup] @cache_readonly def lexsort_depth(self): @@ -2262,12 +2262,13 @@ def get_locs(self, tup): """ # must be lexsorted to at least as many levels - if not self.is_lexsorted_for_tuple(tup): - raise UnsortedIndexError('MultiIndex Slicing requires the index ' - 'to be fully lexsorted tuple len ({0}), ' - 'lexsort depth ({1})' - .format(len(tup), self.lexsort_depth)) - + true_slices = [i for (i, s) in enumerate(self._true_slices_in_tup(tup)) + if s] + if true_slices and true_slices[-1] >= self.lexsort_depth: + raise UnsortedIndexError('MultiIndex slicing requires the index ' + 'to be lexsorted: slicing on levels ' + '{0}, lexsort depth {1}' + .format(true_slices, self.lexsort_depth)) # indexer # this is the list of all values that we want to select n = len(self) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 3f6fd8c8aa8279..ef8806246c2c51 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2826,8 +2826,13 @@ def test_unsortedindex(self): df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, columns=['one', 'two']) + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)['z', 'a'] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + with pytest.raises(UnsortedIndexError): - df.loc(axis=0)['z', :] + df.loc(axis=0)['z', slice('a')] df.sort_index(inplace=True) assert len(df.loc(axis=0)['z', :]) == 2 diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index fc6c627075c968..c12bb8910ffc9c 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -817,9 +817,13 @@ def f(): assert df.index.lexsort_depth == 0 with tm.assert_raises_regex( UnsortedIndexError, - 'MultiIndex Slicing requires the index to be fully ' - r'lexsorted tuple len \(2\), lexsort depth \(0\)'): - df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + 'MultiIndex slicing requires the index to be ' + r'lexsorted: slicing on levels \[1\], lexsort depth 0'): + df.loc[(slice(None), slice('bar')), :] + + # GH 16734: not sorted, but no real slicing + result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + tm.assert_frame_equal(result, df.iloc[[1, 3], :]) def test_multiindex_slicers_non_unique(self): @@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self): # not sorted def f(): - df.loc['A1', (slice(None), 'foo')] + df.loc['A1', ('a', slice('foo'))] pytest.raises(UnsortedIndexError, f) + + # GH 16734: not sorted, but no real slicing + tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')], + df.loc['A1'].iloc[:, [0, 2]]) + df = df.sort_index(axis=1) # slicing