From edc9ab2d3d3871efb4e81e9be259ed309e904ae9 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 27 Aug 2017 21:52:06 -0700 Subject: [PATCH] BUG: Respect dups in reindexing CategoricalIndex When the indexer is identical to the elements. We should still return duplicates when the indexer contains duplicates. Closes gh-17323. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/category.py | 2 +- pandas/tests/indexes/test_category.py | 19 +++++++++++++------ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fcadd26156b1d..942e37a29f8d5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -358,6 +358,7 @@ Indexing - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) +- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) I/O ^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f22407308e094..0681202289311 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = ibase._ensure_index(target) - if self.equals(target): + if self.is_unique and self.equals(target): return np.arange(len(self), dtype='intp') if method == 'pad' or method == 'backfill': diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 64bd6df361aeb..3a1efced81a0e 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -365,29 +365,36 @@ def test_astype(self): tm.assert_index_equal(result, expected) def test_reindex_base(self): - - # determined by cat ordering - idx = self.create_index() + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) expected = np.arange(len(idx), dtype=np.intp) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): - idx.get_indexer(idx, method='invalid') + with tm.assert_raises_regex(ValueError, "Invalid fill method"): + idx.get_indexer(idx, method="invalid") def test_reindexing(self): + np.random.seed(123456789) ci = self.create_index() oidx = Index(np.array(ci)) - for n in [1, 2, 5, len(ci)]: + for n in [1, 2, 5]: finder = oidx[np.random.randint(0, len(ci), size=n)] expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual) + # see gh-17323 + for finder in [list("aabbca"), list("aababca")]: + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(['a', 'c'])