From df5bfcf623a181d8cfae1241f269f172deb8abc8 Mon Sep 17 00:00:00 2001
From: ri938 <r_irv938@hotmail.com>
Date: Mon, 3 Jul 2017 21:10:28 +0100
Subject: [PATCH 1/8] BUG: reindex would throw when a categorical index was
 empty #16770

---
 doc/source/whatsnew/v0.20.3.txt       | 2 +-
 pandas/core/indexes/category.py       | 9 +++++++--
 pandas/tests/indexes/test_category.py | 8 ++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt
index c730142450ea6..636f36d4fe3cc 100644
--- a/doc/source/whatsnew/v0.20.3.txt
+++ b/doc/source/whatsnew/v0.20.3.txt
@@ -37,7 +37,7 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 - Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)
-
+- Handle reindexing an empty categorical index rather than throwing (:issue:`16770`)
 
 
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index d9e0c218bfafc..da3c25eb473c4 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -419,7 +419,11 @@ def reindex(self, target, method=None, level=None, limit=None,
             raise ValueError("cannot reindex with a non-unique indexer")
 
         indexer, missing = self.get_indexer_non_unique(np.array(target))
-        new_target = self.take(indexer)
+
+        if len(self.codes):
+            new_target = self.take(indexer)
+        else:
+            new_target = target
 
         # filling in missing if needed
         if len(missing):
@@ -430,7 +434,8 @@ def reindex(self, target, method=None, level=None, limit=None,
                 result = Index(np.array(self), name=self.name)
                 new_target, indexer, _ = result._reindex_non_unique(
                     np.array(target))
-
+                # see GH 16819, indexer needs to be converted to correct type
+                indexer = np.array(indexer, dtype=np.int64)
             else:
 
                 codes = new_target.codes.copy()
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 4e4f9b29f9a4c..139ec49542abe 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -420,6 +420,14 @@ def test_reindex_dtype(self):
         tm.assert_numpy_array_equal(indexer,
                                     np.array([0, 3, 2], dtype=np.int64))
 
+    def test_reindex_empty_index(self):
+        # See GH16770
+        c = CategoricalIndex([])
+        res, indexer = c.reindex(['a', 'b'])
+        tm.assert_index_equal(res, Index(['a', 'b']), exact=True)
+        tm.assert_numpy_array_equal(indexer,
+                                    np.array([-1, -1], dtype=np.int64))
+
     def test_duplicates(self):
 
         idx = CategoricalIndex([0, 0, 0], name='foo')

From 6b5bd715ce26e426e1f6659382b7142ca27291e7 Mon Sep 17 00:00:00 2001
From: ri938 <r_irv938@hotmail.com>
Date: Tue, 4 Jul 2017 17:56:30 +0100
Subject: [PATCH 2/8] BUG: get_indexer_not_unique inconsistent return types vs
 get_indexer #16819

---
 doc/source/whatsnew/v0.21.0.txt       |  1 +
 pandas/core/indexes/base.py           |  2 +-
 pandas/tests/indexes/test_base.py     | 11 +++++++++++
 pandas/tests/indexes/test_category.py |  3 +--
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index de2516d75040b..4e9dbe1b32ee5 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -91,6 +91,7 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
+- Bug in get_indexer_non_unique inconsistent return type with get_indexer (:issue:`16819`)
 
 Conversion
 ^^^^^^^^^^
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 695f9f119baa2..91ff308579146 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2704,7 +2704,7 @@ def get_indexer_non_unique(self, target):
             tgt_values = target._values
 
         indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
-        return Index(indexer), missing
+        return indexer, missing
 
     def get_indexer_for(self, target, **kwargs):
         """
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 18dbe6624008a..7a81a125467d5 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1131,6 +1131,17 @@ def test_get_indexer_strings(self):
         with pytest.raises(TypeError):
             idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2)
 
+    def test_get_indexer_consistency(self):
+        # See GH 16819
+        for name, index in self.indices.items():
+            indexer = index.get_indexer(index[0:2])
+            assert isinstance(indexer, np.ndarray)
+            assert indexer.dtype == np.intp
+
+            indexer, _ = index.get_indexer_non_unique(index[0:2])
+            assert isinstance(indexer, np.ndarray)
+            assert indexer.dtype == np.intp
+
     def test_get_loc(self):
         idx = pd.Index([0, 1, 2])
         all_methods = [None, 'pad', 'backfill', 'nearest']
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 139ec49542abe..c6bfae2804adc 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -386,8 +386,7 @@ def test_reindexing(self):
             expected = oidx.get_indexer_non_unique(finder)[0]
 
             actual = ci.get_indexer(finder)
-            tm.assert_numpy_array_equal(
-                expected.values, actual, check_dtype=False)
+            tm.assert_numpy_array_equal(expected, actual, check_dtype=True)
 
     def test_reindex_dtype(self):
         c = CategoricalIndex(['a', 'b', 'c', 'a'])

From e32df12c794a2adf6d4d342eb37d6adfba74da93 Mon Sep 17 00:00:00 2001
From: ri938 <r_irv938@hotmail.com>
Date: Tue, 4 Jul 2017 23:45:39 +0100
Subject: [PATCH 3/8] Remove mistaken code which belonged on another branch
 (minor correction)

---
 doc/source/whatsnew/v0.20.3.txt       | 1 -
 pandas/core/indexes/category.py       | 8 +-------
 pandas/tests/indexes/test_category.py | 8 --------
 3 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt
index 636f36d4fe3cc..e9bd5b04a5596 100644
--- a/doc/source/whatsnew/v0.20.3.txt
+++ b/doc/source/whatsnew/v0.20.3.txt
@@ -37,7 +37,6 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 - Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)
-- Handle reindexing an empty categorical index rather than throwing (:issue:`16770`)
 
 
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index da3c25eb473c4..cb7deba0415d4 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -419,11 +419,7 @@ def reindex(self, target, method=None, level=None, limit=None,
             raise ValueError("cannot reindex with a non-unique indexer")
 
         indexer, missing = self.get_indexer_non_unique(np.array(target))
-
-        if len(self.codes):
-            new_target = self.take(indexer)
-        else:
-            new_target = target
+        new_target = self.take(indexer)
 
         # filling in missing if needed
         if len(missing):
@@ -434,8 +430,6 @@ def reindex(self, target, method=None, level=None, limit=None,
                 result = Index(np.array(self), name=self.name)
                 new_target, indexer, _ = result._reindex_non_unique(
                     np.array(target))
-                # see GH 16819, indexer needs to be converted to correct type
-                indexer = np.array(indexer, dtype=np.int64)
             else:
 
                 codes = new_target.codes.copy()
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index c6bfae2804adc..40af125ed65b3 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -419,14 +419,6 @@ def test_reindex_dtype(self):
         tm.assert_numpy_array_equal(indexer,
                                     np.array([0, 3, 2], dtype=np.int64))
 
-    def test_reindex_empty_index(self):
-        # See GH16770
-        c = CategoricalIndex([])
-        res, indexer = c.reindex(['a', 'b'])
-        tm.assert_index_equal(res, Index(['a', 'b']), exact=True)
-        tm.assert_numpy_array_equal(indexer,
-                                    np.array([-1, -1], dtype=np.int64))
-
     def test_duplicates(self):
 
         idx = CategoricalIndex([0, 0, 0], name='foo')

From 7e650bde2e408fe244c8685344816d551882e423 Mon Sep 17 00:00:00 2001
From: ri938 <r_irv938@hotmail.com>
Date: Tue, 4 Jul 2017 23:48:26 +0100
Subject: [PATCH 4/8] Minor removing blank lines inserted in minor corection

---
 doc/source/whatsnew/v0.20.3.txt | 1 +
 pandas/core/indexes/category.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt
index e9bd5b04a5596..c730142450ea6 100644
--- a/doc/source/whatsnew/v0.20.3.txt
+++ b/doc/source/whatsnew/v0.20.3.txt
@@ -40,6 +40,7 @@ Bug Fixes
 
 
 
+
 Conversion
 ^^^^^^^^^^
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index cb7deba0415d4..d9e0c218bfafc 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -430,6 +430,7 @@ def reindex(self, target, method=None, level=None, limit=None,
                 result = Index(np.array(self), name=self.name)
                 new_target, indexer, _ = result._reindex_non_unique(
                     np.array(target))
+
             else:
 
                 codes = new_target.codes.copy()

From b7106263d7ef6631051eb5b52549f97da970c240 Mon Sep 17 00:00:00 2001
From: ri938 <r_irv938@hotmail.com>
Date: Wed, 5 Jul 2017 23:24:37 +0100
Subject: [PATCH 5/8] Fix bigs caused by code assuming return type of
 get_indexer_non_unique is Index

---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 pandas/core/indexes/base.py     | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 4e9dbe1b32ee5..1b9ac11600d91 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -53,6 +53,7 @@ Backwards incompatible API changes
 - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
 - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
 - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
+- Index.get_indexer_non_unique() now returns a ndarray indexer rather than an Index; this is consistent with Index.get_indexer() (:issue:`16819`)
 
 .. _whatsnew_0210.api:
 
@@ -91,7 +92,6 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
-- Bug in get_indexer_non_unique inconsistent return type with get_indexer (:issue:`16819`)
 
 Conversion
 ^^^^^^^^^^
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 91ff308579146..fb83b00aecf7a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2256,15 +2256,15 @@ def intersection(self, other):
             indexer = indexer.take((indexer != -1).nonzero()[0])
         except:
             # duplicates
-            indexer = Index(other._values).get_indexer_non_unique(
-                self._values)[0].unique()
+            indexer = algos.unique1d(Index(other._values).get_indexer_non_unique(
+                self._values)[0])
             indexer = indexer[indexer != -1]
 
         taken = other.take(indexer)
         if self.name != other.name:
             taken.name = None
         return taken
-
+ 
     def difference(self, other):
         """
         Return a new Index with elements from the index that are not in
@@ -2942,7 +2942,6 @@ def _reindex_non_unique(self, target):
             else:
 
                 # need to retake to have the same size as the indexer
-                indexer = indexer.values
                 indexer[~check] = 0
 
                 # reset the new indexer to account for the new size

From d3a77be8b21f48d844eee15bf65973416a102141 Mon Sep 17 00:00:00 2001
From: ri938 <r_irv938@hotmail.com>
Date: Thu, 6 Jul 2017 11:31:12 +0100
Subject: [PATCH 6/8] Fix issues caused by code still assuming return type of
 get_indexer_non_unique is an Index

---
 doc/source/whatsnew/v0.21.0.txt       | 2 +-
 pandas/core/groupby.py                | 4 ++--
 pandas/core/indexes/base.py           | 6 +++---
 pandas/tests/indexes/test_category.py | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 1b9ac11600d91..5dad5af3b2794 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -53,7 +53,7 @@ Backwards incompatible API changes
 - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
 - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
 - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
-- Index.get_indexer_non_unique() now returns a ndarray indexer rather than an Index; this is consistent with Index.get_indexer() (:issue:`16819`)
+- `Index.get_indexer_non_unique()` now returns a ndarray indexer rather than an `Index`; this is consistent with `Index.get_indexer()` (:issue:`16819`)
 
 .. _whatsnew_0210.api:
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index c4b3e25acae7e..8ee7d86401083 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -896,8 +896,8 @@ def reset_identity(values):
                 # we can't reindex, so we resort to this
                 # GH 14776
                 if isinstance(ax, MultiIndex) and not ax.is_unique:
-                    result = result.take(result.index.get_indexer_for(
-                        ax.values).unique(), axis=self.axis)
+                    result = result.take(algorithms.unique1d(result.index.get_indexer_for(
+                        ax.values)), axis=self.axis)
                 else:
                     result = result.reindex_axis(ax, axis=self.axis)
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index fb83b00aecf7a..8a4878d9cfbcf 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2256,15 +2256,15 @@ def intersection(self, other):
             indexer = indexer.take((indexer != -1).nonzero()[0])
         except:
             # duplicates
-            indexer = algos.unique1d(Index(other._values).get_indexer_non_unique(
-                self._values)[0])
+            indexer = algos.unique1d(
+                Index(other._values).get_indexer_non_unique(self._values)[0])
             indexer = indexer[indexer != -1]
 
         taken = other.take(indexer)
         if self.name != other.name:
             taken.name = None
         return taken
- 
+
     def difference(self, other):
         """
         Return a new Index with elements from the index that are not in
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 40af125ed65b3..493274fff43e0 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -386,7 +386,7 @@ def test_reindexing(self):
             expected = oidx.get_indexer_non_unique(finder)[0]
 
             actual = ci.get_indexer(finder)
-            tm.assert_numpy_array_equal(expected, actual, check_dtype=True)
+            tm.assert_numpy_array_equal(expected, actual)
 
     def test_reindex_dtype(self):
         c = CategoricalIndex(['a', 'b', 'c', 'a'])

From 05cb9d3bdc3bcf8991d4984cca0b4599e3375677 Mon Sep 17 00:00:00 2001
From: ri938 <r_irv938@hotmail.com>
Date: Thu, 6 Jul 2017 11:39:04 +0100
Subject: [PATCH 7/8] Minor documentation change

---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 5dad5af3b2794..36f3db98a39b5 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -53,7 +53,7 @@ Backwards incompatible API changes
 - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
 - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
 - Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`)
-- `Index.get_indexer_non_unique()` now returns a ndarray indexer rather than an `Index`; this is consistent with `Index.get_indexer()` (:issue:`16819`)
+- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`)
 
 .. _whatsnew_0210.api:
 

From e1b85cb0b35b42c82872096e0127785b04d610e0 Mon Sep 17 00:00:00 2001
From: ri938 <r_irv938@hotmail.com>
Date: Thu, 6 Jul 2017 12:24:16 +0100
Subject: [PATCH 8/8] Minor correction: line too long.

---
 pandas/core/groupby.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 8ee7d86401083..daf3381ae4e89 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -896,8 +896,9 @@ def reset_identity(values):
                 # we can't reindex, so we resort to this
                 # GH 14776
                 if isinstance(ax, MultiIndex) and not ax.is_unique:
-                    result = result.take(algorithms.unique1d(result.index.get_indexer_for(
-                        ax.values)), axis=self.axis)
+                    indexer = algorithms.unique1d(
+                        result.index.get_indexer_for(ax.values))
+                    result = result.take(indexer, axis=self.axis)
                 else:
                     result = result.reindex_axis(ax, axis=self.axis)