From d765dc33c2326778df30bee1c0046db75a837363 Mon Sep 17 00:00:00 2001
From: JustinZhengBC <justinzhengbc@gmail.com>
Date: Thu, 13 Dec 2018 13:11:19 -0800
Subject: [PATCH 1/5] BUG-24241 make Categorical.map transform nans

---
 doc/source/whatsnew/v0.24.0.rst       |  1 +
 pandas/core/arrays/categorical.py     | 21 +++++++++++++++---
 pandas/tests/indexes/test_category.py | 31 +++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 6095865fde87c..11b61b34dc00d 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1273,6 +1273,7 @@ Categorical
 - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
 - Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
 - Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`)
+- Bug in :meth:`Categorical.apply` where the given function would not be applied to ``NaN`` values (:issue:`24241`)
 
 Datetimelike
 ^^^^^^^^^^^^
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6ccb8dc5d2725..59ab02d62b44a 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1229,11 +1229,26 @@ def map(self, mapper):
         Index(['first', 'second', nan], dtype='object')
         """
         new_categories = self.categories.map(mapper)
+
         try:
-            return self.from_codes(self._codes.copy(),
-                                   categories=new_categories,
-                                   ordered=self.ordered)
+            if isinstance(mapper, (dict, ABCSeries)):
+                new_value = mapper[np.nan]
+            else:
+                new_value = mapper(np.nan)
+        except (AttributeError, KeyError, TypeError, ValueError):
+            new_value = np.nan
+
+        try:
+            ret = self.from_codes(self._codes.copy(),
+                                  categories=new_categories,
+                                  ordered=self.ordered)
+            if new_value not in ret.categories and any(self._codes == -1):
+                ret.add_categories(new_value, inplace=True)
+                ret = ret.fillna(new_value)
+            return ret
         except ValueError:
+            new_categories = new_categories.insert(len(new_categories),
+                                                   new_value)
             return np.take(new_categories, self._codes)
 
     __eq__ = _cat_compare_op('__eq__')
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index bb537f30821e4..477c3de82f004 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -311,6 +311,37 @@ def test_map_with_categorical_series(self):
         exp = pd.Index(["odd", "even", "odd", np.nan])
         tm.assert_index_equal(a.map(c), exp)
 
+    @pytest.mark.parametrize('data, f', [[[1, 1, np.nan], pd.isna],
+                                         [[1, 2, np.nan], pd.isna],
+                                         [[1, 1, np.nan], {1: False,
+                                                           np.nan: True}],
+                                         [[1, 2, np.nan], {1: False,
+                                                           2: False,
+                                                           np.nan: True}]])
+    def test_map_fill_nan(self, data, f):
+        values = pd.Categorical(data)
+        result = values.map(f)
+        if data[1] == 1:
+            expected = pd.Categorical([False, False, True])
+            tm.assert_categorical_equal(result, expected)
+        else:
+            expected = pd.Index([False, False, True])
+            tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('data, f', [[[1, 1, np.nan], {1: False}],
+                                         [[1, 2, np.nan], {1: False,
+                                                           2: False}],
+                                         [[1, 1, np.nan], pd.Series([False,
+                                                                     False])],
+                                         [[1, 2, np.nan], pd.Series([False,
+                                                                     False,
+                                                                     False])]])
+    def test_map_dont_fill_nan(self, data, f):
+        values = pd.Categorical(data)
+        result = values.map(f)
+        expected = pd.Index([False, False, np.nan])
+        tm.assert_index_equal(result, expected)
+
     @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
     def test_where(self, klass):
         i = self.create_index()

From 628bfac22b466c4f15a26373e099eb2b05598ad8 Mon Sep 17 00:00:00 2001
From: JustinZhengBC <justinzhengbc@gmail.com>
Date: Fri, 14 Dec 2018 18:43:34 -0800
Subject: [PATCH 2/5] BUG-24241 make requested changes

---
 pandas/core/arrays/categorical.py     |  2 +-
 pandas/tests/indexes/test_category.py | 41 ++++++++++++++++-----------
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 59ab02d62b44a..e13a45cbad231 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1231,7 +1231,7 @@ def map(self, mapper):
         new_categories = self.categories.map(mapper)
 
         try:
-            if isinstance(mapper, (dict, ABCSeries)):
+            if is_dict_like(mapper):
                 new_value = mapper[np.nan]
             else:
                 new_value = mapper(np.nan)
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 477c3de82f004..f7c82af475b79 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -311,14 +311,18 @@ def test_map_with_categorical_series(self):
         exp = pd.Index(["odd", "even", "odd", np.nan])
         tm.assert_index_equal(a.map(c), exp)
 
-    @pytest.mark.parametrize('data, f', [[[1, 1, np.nan], pd.isna],
-                                         [[1, 2, np.nan], pd.isna],
-                                         [[1, 1, np.nan], {1: False,
-                                                           np.nan: True}],
-                                         [[1, 2, np.nan], {1: False,
-                                                           2: False,
-                                                           np.nan: True}]])
-    def test_map_fill_nan(self, data, f):
+    @pytest.mark.parametrize(
+        (
+            'data',
+            'f'
+        ),
+        (
+            ([1, 1, np.nan], pd.isna),
+            ([1, 2, np.nan], pd.isna),
+            ([1, 1, np.nan], {1: False, np.nan: True}),
+            ([1, 2, np.nan], {1: False, 2: False, np.nan: True})
+        ))
+    def test_map_fill_nan(self, data, f):  # GH 24241
         values = pd.Categorical(data)
         result = values.map(f)
         if data[1] == 1:
@@ -328,15 +332,18 @@ def test_map_fill_nan(self, data, f):
             expected = pd.Index([False, False, True])
             tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize('data, f', [[[1, 1, np.nan], {1: False}],
-                                         [[1, 2, np.nan], {1: False,
-                                                           2: False}],
-                                         [[1, 1, np.nan], pd.Series([False,
-                                                                     False])],
-                                         [[1, 2, np.nan], pd.Series([False,
-                                                                     False,
-                                                                     False])]])
-    def test_map_dont_fill_nan(self, data, f):
+    @pytest.mark.parametrize(
+        (
+            'data',
+            'f'
+        ),
+        (
+            ([1, 1, np.nan], {1: False}),
+            ([1, 2, np.nan], {1: False, 2: False}),
+            ([1, 1, np.nan], pd.Series([False, False])),
+            ([1, 2, np.nan], pd.Series([False, False, False]))
+        ))
+    def test_map_dont_fill_nan(self, data, f):  # GH 24241
         values = pd.Categorical(data)
         result = values.map(f)
         expected = pd.Index([False, False, np.nan])

From 20119968d880a88a1f59e11a2ad36f8db5e927cf Mon Sep 17 00:00:00 2001
From: JustinZhengBC <justinzhengbc@gmail.com>
Date: Tue, 18 Dec 2018 20:16:28 -0800
Subject: [PATCH 3/5] BUG-24241 update documentation instead

---
 doc/source/categorical.rst            |  3 ++-
 doc/source/whatsnew/v0.24.0.rst       |  2 +-
 pandas/core/arrays/categorical.py     | 26 +++++++-----------------
 pandas/tests/indexes/test_category.py | 29 +++++++--------------------
 4 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index 721e032b8bb92..ff37fbbb4aa24 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -1145,7 +1145,8 @@ dtype in apply
 
 Pandas currently does not preserve the dtype in apply functions: If you apply along rows you get
 a `Series` of ``object`` `dtype` (same as getting a row -> getting one element will return a
-basic type) and applying along columns will also convert to object.
+basic type) and applying along columns will also convert to object. ``NaN`` values are unaffected.
+You can use ``fillna`` to handle missing values before applying a function.
 
 .. ipython:: python
 
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 11b61b34dc00d..933d6a486ad07 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1273,7 +1273,7 @@ Categorical
 - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
 - Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
 - Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`)
-- Bug in :meth:`Categorical.apply` where the given function would not be applied to ``NaN`` values (:issue:`24241`)
+- Bug in :meth:`Categorical.apply` where ``NaN`` values could be handled unpredictably. They now remain unchanged (:issue:`24241`)
 
 Datetimelike
 ^^^^^^^^^^^^
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index e13a45cbad231..65679753368e8 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1166,7 +1166,7 @@ def map(self, mapper):
         Maps the categories to new categories. If the mapping correspondence is
         one-to-one the result is a :class:`~pandas.Categorical` which has the
         same order property as the original, otherwise a :class:`~pandas.Index`
-        is returned.
+        is returned. NaN values are unaffected.
 
         If a `dict` or :class:`~pandas.Series` is used any unmapped category is
         mapped to `NaN`. Note that if this happens an :class:`~pandas.Index`
@@ -1229,26 +1229,14 @@ def map(self, mapper):
         Index(['first', 'second', nan], dtype='object')
         """
         new_categories = self.categories.map(mapper)
-
         try:
-            if is_dict_like(mapper):
-                new_value = mapper[np.nan]
-            else:
-                new_value = mapper(np.nan)
-        except (AttributeError, KeyError, TypeError, ValueError):
-            new_value = np.nan
-
-        try:
-            ret = self.from_codes(self._codes.copy(),
-                                  categories=new_categories,
-                                  ordered=self.ordered)
-            if new_value not in ret.categories and any(self._codes == -1):
-                ret.add_categories(new_value, inplace=True)
-                ret = ret.fillna(new_value)
-            return ret
+            return self.from_codes(self._codes.copy(),
+                                   categories=new_categories,
+                                   ordered=self.ordered)
         except ValueError:
-            new_categories = new_categories.insert(len(new_categories),
-                                                   new_value)
+            if any(self._codes == -1):
+                new_categories = new_categories.insert(len(new_categories),
+                                                       np.nan)
             return np.take(new_categories, self._codes)
 
     __eq__ = _cat_compare_op('__eq__')
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index f7c82af475b79..d9dfeadd10b84 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -319,35 +319,20 @@ def test_map_with_categorical_series(self):
         (
             ([1, 1, np.nan], pd.isna),
             ([1, 2, np.nan], pd.isna),
-            ([1, 1, np.nan], {1: False, np.nan: True}),
-            ([1, 2, np.nan], {1: False, 2: False, np.nan: True})
-        ))
-    def test_map_fill_nan(self, data, f):  # GH 24241
-        values = pd.Categorical(data)
-        result = values.map(f)
-        if data[1] == 1:
-            expected = pd.Categorical([False, False, True])
-            tm.assert_categorical_equal(result, expected)
-        else:
-            expected = pd.Index([False, False, True])
-            tm.assert_index_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        (
-            'data',
-            'f'
-        ),
-        (
             ([1, 1, np.nan], {1: False}),
             ([1, 2, np.nan], {1: False, 2: False}),
             ([1, 1, np.nan], pd.Series([False, False])),
             ([1, 2, np.nan], pd.Series([False, False, False]))
         ))
-    def test_map_dont_fill_nan(self, data, f):  # GH 24241
+    def test_map_with_nan(self, data, f):  # GH 24241
         values = pd.Categorical(data)
         result = values.map(f)
-        expected = pd.Index([False, False, np.nan])
-        tm.assert_index_equal(result, expected)
+        if data[1] == 1:
+            expected = pd.Categorical([False, False, np.nan])
+            tm.assert_categorical_equal(result, expected)
+        else:
+            expected = pd.Index([False, False, np.nan])
+            tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
     def test_where(self, klass):

From e5b5415a6a6bbe6f415ebfc8037a1bb56af6abaf Mon Sep 17 00:00:00 2001
From: JustinZhengBC <justinzhengbc@gmail.com>
Date: Wed, 19 Dec 2018 15:36:33 -0800
Subject: [PATCH 4/5] BUG-24241 add comment

---
 pandas/core/arrays/categorical.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 65679753368e8..163db1ca43415 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1234,6 +1234,8 @@ def map(self, mapper):
                                    categories=new_categories,
                                    ordered=self.ordered)
         except ValueError:
+            # NA values are represented in self._codes with -1
+            # np.take causes NA values to take final element in new_categories
             if any(self._codes == -1):
                 new_categories = new_categories.insert(len(new_categories),
                                                        np.nan)

From 82859d9168b6943b8405a0c830c16e8c3967b713 Mon Sep 17 00:00:00 2001
From: JustinZhengBC <justinzhengbc@gmail.com>
Date: Wed, 19 Dec 2018 23:59:11 -0800
Subject: [PATCH 5/5] BUG-24241 use np.any instead of any

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 163db1ca43415..9a8b345cea1b3 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1236,7 +1236,7 @@ def map(self, mapper):
         except ValueError:
             # NA values are represented in self._codes with -1
             # np.take causes NA values to take final element in new_categories
-            if any(self._codes == -1):
+            if np.any(self._codes == -1):
                 new_categories = new_categories.insert(len(new_categories),
                                                        np.nan)
             return np.take(new_categories, self._codes)