diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d6b699abdba2d..87ef5a158e0f6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -98,6 +98,7 @@ Indexing ^^^^^^^^ - When called with a null slice (e.g. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). I/O diff --git a/pandas/core/common.py b/pandas/core/common.py index 0dc6a7a1e9c7b..ed768a5743666 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -411,6 +411,13 @@ def is_null_slice(obj): obj.stop is None and obj.step is None) +def is_true_slices(l): + """ + Find non-trivial slices in "l": return a list of booleans with same length. + """ + return [isinstance(k, slice) and not is_null_slice(k) for k in l] + + def is_full_slice(obj, l): """ we have a full length slice """ return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f30da5b05f8ae..1a762732b1213 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -23,7 +23,8 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, - is_null_slice) + is_null_slice, + is_true_slices) import pandas.core.base as base from pandas.util._decorators import (Appender, cache_readonly, @@ -1035,12 +1036,6 @@ def is_lexsorted(self): """ return self.lexsort_depth == self.nlevels - def is_lexsorted_for_tuple(self, tup): - """ - Return True if we are correctly lexsorted given the passed tuple - """ - return len(tup) <= self.lexsort_depth - @cache_readonly def lexsort_depth(self): if self.sortorder is not None: @@ -2262,12 +2257,12 @@ def get_locs(self, tup): """ # must be lexsorted to at least as many levels - if not self.is_lexsorted_for_tuple(tup): - raise UnsortedIndexError('MultiIndex Slicing requires the index ' - 'to be fully lexsorted tuple len ({0}), ' - 'lexsort depth ({1})' - .format(len(tup), self.lexsort_depth)) - + true_slices = [i for (i, s) in enumerate(is_true_slices(tup)) if s] + if true_slices and true_slices[-1] >= self.lexsort_depth: + raise UnsortedIndexError('MultiIndex slicing requires the index ' + 'to be lexsorted: slicing on levels {0}, ' + 'lexsort depth {1}' + .format(true_slices, self.lexsort_depth)) # indexer # this is the list of all values that we want to select n = len(self) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 3f6fd8c8aa827..ef8806246c2c5 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2826,8 +2826,13 @@ def test_unsortedindex(self): df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, columns=['one', 'two']) + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)['z', 'a'] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + with pytest.raises(UnsortedIndexError): - df.loc(axis=0)['z', :] + df.loc(axis=0)['z', slice('a')] df.sort_index(inplace=True) assert len(df.loc(axis=0)['z', :]) == 2 diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index fc6c627075c96..c12bb8910ffc9 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -817,9 +817,13 @@ def f(): assert df.index.lexsort_depth == 0 with tm.assert_raises_regex( UnsortedIndexError, - 'MultiIndex Slicing requires the index to be fully ' - r'lexsorted tuple len \(2\), lexsort depth \(0\)'): - df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + 'MultiIndex slicing requires the index to be ' + r'lexsorted: slicing on levels \[1\], lexsort depth 0'): + df.loc[(slice(None), slice('bar')), :] + + # GH 16734: not sorted, but no real slicing + result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + tm.assert_frame_equal(result, df.iloc[[1, 3], :]) def test_multiindex_slicers_non_unique(self): @@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self): # not sorted def f(): - df.loc['A1', (slice(None), 'foo')] + df.loc['A1', ('a', slice('foo'))] pytest.raises(UnsortedIndexError, f) + + # GH 16734: not sorted, but no real slicing + tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')], + df.loc['A1'].iloc[:, [0, 2]]) + df = df.sort_index(axis=1) # slicing