Skip to content

Commit

Permalink
BUG: Respect dups in reindexing CategoricalIndex
Browse files Browse the repository at this point in the history
When the indexer is identical to the elements.
We should still return duplicates when the indexer
contains duplicates.

Closes gh-17323.
  • Loading branch information
gfyoung committed Aug 28, 2017
1 parent 473a7f3 commit 83798b4
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ Indexing
- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)

I/O
^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
method = missing.clean_reindex_fill_method(method)
target = ibase._ensure_index(target)

if self.equals(target):
if self.is_unique and self.equals(target):
return np.arange(len(self), dtype='intp')

if method == 'pad' or method == 'backfill':
Expand Down
22 changes: 17 additions & 5 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,18 +365,18 @@ def test_astype(self):
tm.assert_index_equal(result, expected)

def test_reindex_base(self):

# determined by cat ordering
idx = self.create_index()
# Determined by cat ordering.
idx = CategoricalIndex(list("cab"), categories=list("cab"))
expected = np.arange(len(idx), dtype=np.intp)

actual = idx.get_indexer(idx)
tm.assert_numpy_array_equal(expected, actual)

with tm.assert_raises_regex(ValueError, 'Invalid fill method'):
idx.get_indexer(idx, method='invalid')
with tm.assert_raises_regex(ValueError, "Invalid fill method"):
idx.get_indexer(idx, method="invalid")

def test_reindexing(self):
np.random.seed(123456789)

ci = self.create_index()
oidx = Index(np.array(ci))
Expand All @@ -388,6 +388,18 @@ def test_reindexing(self):
actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(expected, actual)

# see gh-17323
#
# Even when indexer is equal to the
# members in the index, we should
# respect duplicates instead of taking
# the fast-track path.
for finder in [list("aabbca"), list("aababca")]:
expected = oidx.get_indexer_non_unique(finder)[0]

actual = ci.get_indexer(finder)
tm.assert_numpy_array_equal(expected, actual)

def test_reindex_dtype(self):
c = CategoricalIndex(['a', 'b', 'c', 'a'])
res, indexer = c.reindex(['a', 'c'])
Expand Down

0 comments on commit 83798b4

Please sign in to comment.