Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: do not raise UnsortedIndexError if sorting is not required #16736

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ Indexing
^^^^^^^^

- When called with a null slice (e.g. ``df.iloc[:]``), the``iloc`` and ``loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`).
- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`).


I/O
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,13 @@ def is_null_slice(obj):
obj.stop is None and obj.step is None)


def is_true_slices(l):
"""
Find non-trivial slices in "l": return a list of booleans with same length.
"""
return [isinstance(k, slice) and not is_null_slice(k) for k in l]


def is_full_slice(obj, l):
""" we have a full length slice """
return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and
Expand Down
21 changes: 8 additions & 13 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from pandas.errors import PerformanceWarning, UnsortedIndexError
from pandas.core.common import (_values_from_object,
is_bool_indexer,
is_null_slice)
is_null_slice,
is_true_slices)

import pandas.core.base as base
from pandas.util._decorators import (Appender, cache_readonly,
Expand Down Expand Up @@ -1035,12 +1036,6 @@ def is_lexsorted(self):
"""
return self.lexsort_depth == self.nlevels

def is_lexsorted_for_tuple(self, tup):
"""
Return True if we are correctly lexsorted given the passed tuple
"""
return len(tup) <= self.lexsort_depth

@cache_readonly
def lexsort_depth(self):
if self.sortorder is not None:
Expand Down Expand Up @@ -2262,12 +2257,12 @@ def get_locs(self, tup):
"""

# must be lexsorted to at least as many levels
if not self.is_lexsorted_for_tuple(tup):
raise UnsortedIndexError('MultiIndex Slicing requires the index '
'to be fully lexsorted tuple len ({0}), '
'lexsort depth ({1})'
.format(len(tup), self.lexsort_depth))

true_slices = [i for (i, s) in enumerate(is_true_slices(tup)) if s]
if true_slices and true_slices[-1] >= self.lexsort_depth:
raise UnsortedIndexError('MultiIndex slicing requires the index '
'to be lexsorted: slicing on levels {0}, '
'lexsort depth {1}'
.format(true_slices, self.lexsort_depth))
# indexer
# this is the list of all values that we want to select
n = len(self)
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2826,8 +2826,13 @@ def test_unsortedindex(self):
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
columns=['one', 'two'])

# GH 16734: not sorted, but no real slicing
result = df.loc(axis=0)['z', 'a']
expected = df.iloc[0]
tm.assert_series_equal(result, expected)

with pytest.raises(UnsortedIndexError):
df.loc(axis=0)['z', :]
df.loc(axis=0)['z', slice('a')]
df.sort_index(inplace=True)
assert len(df.loc(axis=0)['z', :]) == 2

Expand Down
17 changes: 13 additions & 4 deletions pandas/tests/indexing/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,9 +817,13 @@ def f():
assert df.index.lexsort_depth == 0
with tm.assert_raises_regex(
UnsortedIndexError,
'MultiIndex Slicing requires the index to be fully '
r'lexsorted tuple len \(2\), lexsort depth \(0\)'):
df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
'MultiIndex slicing requires the index to be '
r'lexsorted: slicing on levels \[1\], lexsort depth 0'):
df.loc[(slice(None), slice('bar')), :]

# GH 16734: not sorted, but no real slicing
result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :]
tm.assert_frame_equal(result, df.iloc[[1, 3], :])

def test_multiindex_slicers_non_unique(self):

Expand Down Expand Up @@ -1001,9 +1005,14 @@ def test_per_axis_per_level_doc_examples(self):

# not sorted
def f():
df.loc['A1', (slice(None), 'foo')]
df.loc['A1', ('a', slice('foo'))]

pytest.raises(UnsortedIndexError, f)

# GH 16734: not sorted, but no real slicing
tm.assert_frame_equal(df.loc['A1', (slice(None), 'foo')],
df.loc['A1'].iloc[:, [0, 2]])

df = df.sort_index(axis=1)

# slicing
Expand Down