pandas-dev · jreback · Apr 25, 2018 · Mar 26, 2018 · Mar 30, 2018 · Mar 30, 2018
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -148,3 +148,18 @@ def time_rank_int_cat(self):
 
     def time_rank_int_cat_ordered(self):
         self.s_int_cat_ordered.rank()
+
+
+class IsIn(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        n = 5 * 10**5
+        sample_size = 100
+        arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
+        self.sample = np.random.choice(arr, sample_size)
+        self.ts = pd.Series(arr).astype('category')
+
+    def time_isin_categorical_strings(self):
+        self.ts.isin(self.sample)
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -896,6 +896,7 @@ Performance Improvements
 - Improved performance of :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` (:issue:`11296`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`)
+- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`)
 
 .. _whatsnew_0230.docs:
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -39,6 +39,8 @@
 from pandas.util._decorators import (
     Appender, cache_readonly, deprecate_kwarg, Substitution)
 
+import pandas.core.algorithms as algorithms
+
 from pandas.io.formats.terminal import get_terminal_size
 from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
 from pandas.core.config import get_option
@@ -2216,6 +2218,15 @@ def _concat_same_type(self, to_concat):
     def _formatting_values(self):
         return self
 
+    def isin(self, values):
+        from pandas.core.series import _sanitize_array
+        values = _sanitize_array(values, None, None)
+        null_mask = isna(values)
+        code_values = self.categories.get_indexer(values)
+        code_values = code_values[null_mask | (code_values >= 0)]
+        return algorithms.isin(self.codes, code_values)
+
+
 # The Series.cat accessor
 
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3564,7 +3564,10 @@ def isin(self, values):
         5    False
         Name: animal, dtype: bool
         """
-        result = algorithms.isin(com._values_from_object(self), values)
+        if is_categorical_dtype(self):
+            result = self._values.isin(values)
+        else:
+            result = algorithms.isin(com._values_from_object(self), values)
         return self._constructor(result, index=self.index).__finalize__(self)
 
     def between(self, left, right, inclusive=True):

diff --git a/pandas/tests/categorical/test_algos.py b/pandas/tests/categorical/test_algos.py
@@ -47,3 +47,24 @@ def test_factorized_sort_ordered():
 
     tm.assert_numpy_array_equal(labels, expected_labels)
     tm.assert_categorical_equal(uniques, expected_uniques)
+
+
+def test_isin_cats():
+    cat = pd.Categorical(["a", "b", np.nan])
+
+    result = cat.isin(["a", np.nan])
+    expected = np.array([True, False, True], dtype=bool)
+    tm.assert_numpy_array_equal(expected, result)
+
+    result = cat.isin(["a", "c"])
+    expected = np.array([True, False, False], dtype=bool)
+    tm.assert_numpy_array_equal(expected, result)
+
+
+@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
+def test_isin_empty(empty):
+    s = pd.Categorical(["a", "b"])
+    expected = np.array([False, False], dtype=bool)
+
+    result = s.isin(empty)
+    tm.assert_numpy_array_equal(expected, result)