From 7818486859d1aba53ce359b93cfc772e688958e5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sat, 19 Aug 2017 06:27:05 -0500
Subject: [PATCH] BUG: Have object dtype for empty Categorical.categories
 (#17249)

* BUG: Have object dtype for empty Categorical ctor

Previously we had a `Float64Index`, which is inconsistent with, e.g., the
regular Index constructor.

* TST: Update tests in multi for new return

Previously these relied worked around the return type by wrapping list-likes
in `np.array` and relying on that to cast to float. These workarounds are no
longer nescessary.

* TST: Update union_categorical tests

This relied on `NaN` being a float and empty being a float. Not a necessary
test anymore.

* TST: set object dtype
---
 doc/source/whatsnew/v0.21.0.txt                 |  3 +++
 pandas/core/categorical.py                      |  5 ++++-
 pandas/tests/indexes/test_multi.py              |  9 ++++-----
 pandas/tests/reshape/test_concat.py             |  2 +-
 pandas/tests/reshape/test_union_categoricals.py | 12 +++---------
 pandas/tests/test_categorical.py                | 10 ++++++++++
 6 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 4f55c6388c728..6008ea5d4cbcd 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -385,6 +385,9 @@ Numeric
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
+- Bug in the categorical constructor with empty values and categories causing
+  the ``.categories`` to be an empty ``Float64Index`` rather than an empty
+  ``Index`` with object dtype (:issue:`17248`)
 
 
 Other
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 230361931125e..1c2a29333001c 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
                 # On list with NaNs, int values will be converted to float. Use
                 # "object" dtype to prevent this. In the end objects will be
                 # casted to int/... in the category assignment step.
-                dtype = 'object' if isna(values).any() else None
+                if len(values) == 0 or isna(values).any():
+                    dtype = 'object'
+                else:
+                    dtype = None
                 values = _sanitize_array(values, None, dtype=dtype)
 
         if categories is None:
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index da1b309f5a621..c66775f4690cc 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -776,7 +776,7 @@ def test_from_arrays_empty(self):
             arrays = [[]] * N
             names = list('ABC')[:N]
             result = MultiIndex.from_arrays(arrays=arrays, names=names)
-            expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N,
+            expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
                                   names=names)
             tm.assert_index_equal(result, expected)
 
@@ -829,7 +829,7 @@ def test_from_product_empty(self):
 
         # 1 level
         result = MultiIndex.from_product([[]], names=['A'])
-        expected = pd.Float64Index([], name='A')
+        expected = pd.Index([], name='A')
         tm.assert_index_equal(result, expected)
 
         # 2 levels
@@ -838,7 +838,7 @@ def test_from_product_empty(self):
         names = ['A', 'B']
         for first, second in zip(l1, l2):
             result = MultiIndex.from_product([first, second], names=names)
-            expected = MultiIndex(levels=[np.array(first), np.array(second)],
+            expected = MultiIndex(levels=[first, second],
                                   labels=[[], []], names=names)
             tm.assert_index_equal(result, expected)
 
@@ -847,8 +847,7 @@ def test_from_product_empty(self):
         for N in range(4):
             lvl2 = lrange(N)
             result = MultiIndex.from_product([[], lvl2, []], names=names)
-            expected = MultiIndex(levels=[np.array(A)
-                                          for A in [[], lvl2, []]],
+            expected = MultiIndex(levels=[[], lvl2, []],
                                   labels=[[], [], []], names=names)
             tm.assert_index_equal(result, expected)
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 46fea86c45925..52cd18126859a 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -680,7 +680,7 @@ def test_concat_categorical_empty(self):
         tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
 
         s1 = pd.Series([], dtype='category')
-        s2 = pd.Series([])
+        s2 = pd.Series([], dtype='object')
 
         # different dtype => not-category
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py
index fe8d54005ba9b..eb80fb54b4016 100644
--- a/pandas/tests/reshape/test_union_categoricals.py
+++ b/pandas/tests/reshape/test_union_categoricals.py
@@ -107,17 +107,11 @@ def test_union_categoricals_empty(self):
         exp = Categorical([])
         tm.assert_categorical_equal(res, exp)
 
-        res = union_categoricals([pd.Categorical([]),
-                                  pd.Categorical([1.0])])
-        exp = Categorical([1.0])
+        res = union_categoricals([Categorical([]),
+                                  Categorical(['1'])])
+        exp = Categorical(['1'])
         tm.assert_categorical_equal(res, exp)
 
-        # to make dtype equal
-        nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
-        res = union_categoricals([nanc,
-                                  pd.Categorical([])])
-        tm.assert_categorical_equal(res, nanc)
-
     def test_union_categorical_same_category(self):
         # check fastpath
         c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index a0b585a16ad9a..7bbe220378993 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -112,6 +112,16 @@ def test_setitem_listlike(self):
         result = c.codes[np.array([100000]).astype(np.int64)]
         tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
 
+    def test_constructor_empty(self):
+        # GH 17248
+        c = Categorical([])
+        expected = Index([])
+        tm.assert_index_equal(c.categories, expected)
+
+        c = Categorical([], categories=[1, 2, 3])
+        expected = pd.Int64Index([1, 2, 3])
+        tm.assert_index_equal(c.categories, expected)
+
     def test_constructor_unsortable(self):
 
         # it works!