From 83798b4c227bc28f0e77b5766fa6245affc23728 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 27 Aug 2017 21:52:06 -0700 Subject: [PATCH] BUG: Respect dups in reindexing CategoricalIndex When the indexer is identical to the elements. We should still return duplicates when the indexer contains duplicates. Closes gh-17323. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/category.py | 2 +- pandas/tests/indexes/test_category.py | 22 +++++++++++++++++----- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fcadd26156b1d..942e37a29f8d5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -358,6 +358,7 @@ Indexing - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) +- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) I/O ^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f22407308e094..0681202289311 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) - if self.equals(target): + if self.is_unique and self.equals(target): return np.arange(len(self), dtype='intp') if method == 'pad' or method == 'backfill': diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 64bd6df361aeb..05d31af57b36c 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -365,18 +365,18 @@ def test_astype(self): tm.assert_index_equal(result, expected) def test_reindex_base(self): - - # determined by cat ordering - idx = self.create_index() + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) expected = np.arange(len(idx), dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): - idx.get_indexer(idx, method='invalid') + with tm.assert_raises_regex(ValueError, "Invalid fill method"): + idx.get_indexer(idx, method="invalid") def test_reindexing(self): + np.random.seed(123456789) ci = self.create_index() oidx = Index(np.array(ci)) @@ -388,6 +388,18 @@ def test_reindexing(self): actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual) + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(['a', 'c'])