From 19ac11a25cc4239b419612d71670171a59b82ff7 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Mon, 26 Mar 2018 23:56:38 +0300
Subject: [PATCH 01/18] PERF: GH2003 Series.isin for categorical dtypes

---
 doc/source/whatsnew/v0.23.0.txt       |  1 +
 pandas/core/algorithms.py             | 11 +++++++++--
 pandas/core/series.py                 |  6 +++++-
 pandas/tests/series/test_analytics.py | 11 +++++++++++
 4 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 9159c03edee2e..3955d318e6fa7 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -345,6 +345,7 @@ Other Enhancements
   ``SQLAlchemy`` dialects supporting multivalue inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`)
 - :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`)
 - zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`)
+- Performance enhancement for :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`)
 
 .. _whatsnew_0230.api_breaking:
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index de2e638265f1e..00089e40cb0b2 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -403,8 +403,15 @@ def isin(comps, values):
     if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)):
         values = construct_1d_object_array_from_listlike(list(values))
 
-    comps, dtype, _ = _ensure_data(comps)
-    values, _, _ = _ensure_data(values, dtype=dtype)
+    if not is_categorical_dtype(comps):
+        comps, dtype, _ = _ensure_data(comps)
+        values, _, _ = _ensure_data(values, dtype=dtype)
+    else:
+        cats = comps.cat.categories
+        comps = comps.cat.codes.values
+        mask = isna(values)
+        values = cats.get_indexer(values)
+        values = values[mask | (values >= 0)]
 
     # faster for larger cases to use np.in1d
     f = lambda x, y: htable.ismember_object(x, values)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index da598259d272d..be9b70c620302 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3507,7 +3507,11 @@ def isin(self, values):
         5    False
         Name: animal, dtype: bool
         """
-        result = algorithms.isin(com._values_from_object(self), values)
+        if is_categorical_dtype(self.dtype):
+            result = algorithms.isin(self, values)
+        else:
+            result = algorithms.isin(com._values_from_object(self), values)
+
         return self._constructor(result, index=self.index).__finalize__(self)
 
     def between(self, left, right, inclusive=True):
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 0e6e44e839464..b997039c54902 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1255,6 +1255,17 @@ def test_isin_empty(self, empty):
         result = s.isin(empty)
         tm.assert_series_equal(expected, result)
 
+    def test_isin_cats(self):
+        s = Series(["a", "b", np.nan]).astype("category")
+
+        result = s.isin(["a", np.nan])
+        expected = Series([True, False, True])
+        tm.assert_series_equal(expected, result)
+
+        result = s.isin(["a", "c"])
+        expected = Series([True, False, False])
+        tm.assert_series_equal(expected, result)
+
     def test_timedelta64_analytics(self):
         from pandas import date_range
 

From 54021b9adc1a2756e267aedc8e5cf8172caf5d48 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Fri, 30 Mar 2018 23:38:16 +0300
Subject: [PATCH 02/18] Add Categorical.isin method

---
 pandas/core/algorithms.py              | 11 ++---------
 pandas/core/arrays/categorical.py      | 11 +++++++++++
 pandas/core/series.py                  |  5 ++---
 pandas/tests/categorical/test_algos.py | 21 +++++++++++++++++++++
 pandas/tests/series/test_analytics.py  | 11 -----------
 5 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 00089e40cb0b2..de2e638265f1e 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -403,15 +403,8 @@ def isin(comps, values):
     if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)):
         values = construct_1d_object_array_from_listlike(list(values))
 
-    if not is_categorical_dtype(comps):
-        comps, dtype, _ = _ensure_data(comps)
-        values, _, _ = _ensure_data(values, dtype=dtype)
-    else:
-        cats = comps.cat.categories
-        comps = comps.cat.codes.values
-        mask = isna(values)
-        values = cats.get_indexer(values)
-        values = values[mask | (values >= 0)]
+    comps, dtype, _ = _ensure_data(comps)
+    values, _, _ = _ensure_data(values, dtype=dtype)
 
     # faster for larger cases to use np.in1d
     f = lambda x, y: htable.ismember_object(x, values)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6eadef37da344..205b80ccd859b 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -40,6 +40,8 @@
 from pandas.util._decorators import (
     Appender, cache_readonly, deprecate_kwarg, Substitution)
 
+import pandas.core.algorithms as algorithms
+
 from pandas.io.formats.terminal import get_terminal_size
 from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
 from pandas.core.config import get_option
@@ -2261,6 +2263,15 @@ def _concat_same_type(self, to_concat):
     def _formatting_values(self):
         return self
 
+    def isin(self, values):
+        from pandas.core.series import _sanitize_array
+        values = _sanitize_array(values, None, None)
+        null_mask = isna(values)
+        code_values = self.categories.get_indexer(values)
+        code_values = code_values[null_mask | (code_values >= 0)]
+        return algorithms.isin(self.codes, code_values)
+
+
 # The Series.cat accessor
 
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index be9b70c620302..56a3bc8bc4e1e 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3507,11 +3507,10 @@ def isin(self, values):
         5    False
         Name: animal, dtype: bool
         """
-        if is_categorical_dtype(self.dtype):
-            result = algorithms.isin(self, values)
+        if is_categorical_dtype(self):
+            result = self._values.isin(values)
         else:
             result = algorithms.isin(com._values_from_object(self), values)
-
         return self._constructor(result, index=self.index).__finalize__(self)
 
     def between(self, left, right, inclusive=True):
diff --git a/pandas/tests/categorical/test_algos.py b/pandas/tests/categorical/test_algos.py
index 61764ec0ff632..109d97b898a90 100644
--- a/pandas/tests/categorical/test_algos.py
+++ b/pandas/tests/categorical/test_algos.py
@@ -47,3 +47,24 @@ def test_factorized_sort_ordered():
 
     tm.assert_numpy_array_equal(labels, expected_labels)
     tm.assert_categorical_equal(uniques, expected_uniques)
+
+
+def test_isin_cats():
+    cat = pd.Categorical(["a", "b", np.nan])
+
+    result = cat.isin(["a", np.nan])
+    expected = np.array([True, False, True], dtype=bool)
+    tm.assert_numpy_array_equal(expected, result)
+
+    result = cat.isin(["a", "c"])
+    expected = np.array([True, False, False], dtype=bool)
+    tm.assert_numpy_array_equal(expected, result)
+
+
+@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
+def test_isin_empty(empty):
+    s = pd.Categorical(["a", "b"])
+    expected = np.array([False, False], dtype=bool)
+
+    result = s.isin(empty)
+    tm.assert_numpy_array_equal(expected, result)
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index b997039c54902..0e6e44e839464 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1255,17 +1255,6 @@ def test_isin_empty(self, empty):
         result = s.isin(empty)
         tm.assert_series_equal(expected, result)
 
-    def test_isin_cats(self):
-        s = Series(["a", "b", np.nan]).astype("category")
-
-        result = s.isin(["a", np.nan])
-        expected = Series([True, False, True])
-        tm.assert_series_equal(expected, result)
-
-        result = s.isin(["a", "c"])
-        expected = Series([True, False, False])
-        tm.assert_series_equal(expected, result)
-
     def test_timedelta64_analytics(self):
         from pandas import date_range
 

From 2514b45c583b73c97694334e094d9af199a8ffa7 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Fri, 30 Mar 2018 23:45:45 +0300
Subject: [PATCH 03/18] Add benchmark

---
 asv_bench/benchmarks/categoricals.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 7743921003353..ef6e1aee7fc5f 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -148,3 +148,18 @@ def time_rank_int_cat(self):
 
     def time_rank_int_cat_ordered(self):
         self.s_int_cat_ordered.rank()
+
+
+class IsIn(object):
+
+    goal_time = 0.2
+
+    def setup(self):
+        n = 5 * 10**5
+        sample_size = 100
+        arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
+        self.sample = np.random.choice(arr, sample_size)
+        self.ts = pd.Series(arr).astype('category')
+
+    def time_set_categories(self):
+        self.ts.isin(self.sample)

From 80f687a436634670ef73e85e89c048720abb1131 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sat, 31 Mar 2018 00:46:40 +0300
Subject: [PATCH 04/18] Rename benchmark

---
 asv_bench/benchmarks/categoricals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index ef6e1aee7fc5f..25268ac0cd10b 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -161,5 +161,5 @@ def setup(self):
         self.sample = np.random.choice(arr, sample_size)
         self.ts = pd.Series(arr).astype('category')
 
-    def time_set_categories(self):
+    def time_isin_categorical_strings(self):
         self.ts.isin(self.sample)

From d6c39534651a5fedca537d0ce092e13cb20aae01 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sat, 31 Mar 2018 00:47:27 +0300
Subject: [PATCH 05/18] change what's new

---
 doc/source/whatsnew/v0.23.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 3955d318e6fa7..7b113bf2d5831 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -345,7 +345,6 @@ Other Enhancements
   ``SQLAlchemy`` dialects supporting multivalue inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`)
 - :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`)
 - zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`)
-- Performance enhancement for :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`)
 
 .. _whatsnew_0230.api_breaking:
 
@@ -803,6 +802,7 @@ Performance Improvements
 - Improved performance of :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` (:issue:`11296`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`)
+- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`)
 
 .. _whatsnew_0230.docs:
 

From ceffccd1a4f563d6087dfb5aa2a1a8c118010093 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Tue, 3 Apr 2018 01:32:59 +0300
Subject: [PATCH 06/18] rf: more generic check

---
 pandas/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index ec9e9522e1d8b..647f4cc019c67 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3564,7 +3564,7 @@ def isin(self, values):
         5    False
         Name: animal, dtype: bool
         """
-        if is_categorical_dtype(self):
+        if hasattr(self._values, 'isin'):
             result = self._values.isin(values)
         else:
             result = algorithms.isin(com._values_from_object(self), values)

From 3247dce814377881150e8e05bfeb5a4899692ae0 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Thu, 5 Apr 2018 19:41:27 +0300
Subject: [PATCH 07/18] Move the rest of isin logic to algorithms

---
 pandas/core/algorithms.py   | 6 ++++++
 pandas/core/indexes/base.py | 2 +-
 pandas/core/series.py       | 5 +----
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 065a5782aced1..df34b3540fa3b 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -407,6 +407,12 @@ def isin(comps, values):
     if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)):
         values = construct_1d_object_array_from_listlike(list(values))
 
+    if is_categorical_dtype(comps):
+        # handle categoricals
+        return comps._values.isin(values)
+
+    comps = com._values_from_object(comps)
+
     comps, dtype, _ = _ensure_data(comps)
     values, _, _ = _ensure_data(values, dtype=dtype)
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 12bb09e8f8a8a..586190fbc65d8 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3487,7 +3487,7 @@ def isin(self, values, level=None):
         """
         if level is not None:
             self._validate_index_level(level)
-        return algos.isin(np.array(self), values)
+        return algos.isin(self, values)
 
     def _can_reindex(self, indexer):
         """
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 647f4cc019c67..89916872651ac 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3564,10 +3564,7 @@ def isin(self, values):
         5    False
         Name: animal, dtype: bool
         """
-        if hasattr(self._values, 'isin'):
-            result = self._values.isin(values)
-        else:
-            result = algorithms.isin(com._values_from_object(self), values)
+        result = algorithms.isin(self, values)
         return self._constructor(result, index=self.index).__finalize__(self)
 
     def between(self, left, right, inclusive=True):

From 2b7b1c48e11ccb5d2e9ea1d6eeb9146e977c6aad Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Fri, 6 Apr 2018 01:23:30 +0300
Subject: [PATCH 08/18] Fix for null mask

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 0163d151029c6..506d04519fe32 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2221,7 +2221,7 @@ def _formatting_values(self):
     def isin(self, values):
         from pandas.core.series import _sanitize_array
         values = _sanitize_array(values, None, None)
-        null_mask = isna(values)
+        null_mask = np.asarray(isna(values))
         code_values = self.categories.get_indexer(values)
         code_values = code_values[null_mask | (code_values >= 0)]
         return algorithms.isin(self.codes, code_values)

From 4478a49c6829a7afdfc7c240a57d34eff2c330c8 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sat, 7 Apr 2018 12:30:54 +0300
Subject: [PATCH 09/18] Add docs and raise error on non-list-like

---
 pandas/core/arrays/categorical.py | 44 +++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 506d04519fe32..6d998085ad175 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2219,6 +2219,50 @@ def _formatting_values(self):
         return self
 
     def isin(self, values):
+        """
+        Check whether `values` are contained in Categorical.
+
+        Return a boolean NumPy Array showing whether each element in the Categorical
+        matches an element in the passed sequence of `values` exactly.
+
+        Parameters
+        ----------
+        values : set or list-like
+            The sequence of values to test. Passing in a single string will
+            raise a ``TypeError``. Instead, turn a single string into a
+            list of one element.
+
+        Returns
+        -------
+        isin : numpy.ndarray (bool dtype)
+
+        Raises
+        ------
+        TypeError
+          * If `values` is a string
+
+        See Also
+        --------
+        pandas.Series.isin : equivalent method on Series
+
+        Examples
+        --------
+
+        >>> s = pd.Categorical(['lama', 'cow', 'lama', 'beetle', 'lama',
+        ...                'hippo'])
+        >>> s.isin(['cow', 'lama'])
+        array([ True,  True,  True, False,  True, False])
+
+        Passing a single string as ``s.isin('lama')`` will raise an error. Use
+        a list of one element instead:
+
+        >>> s.isin(['lama'])
+        array([ True, False,  True, False,  True, False])
+        """
+        if not is_list_like(values):
+            raise TypeError("only list-like objects are allowed to be passed"
+                            " to isin(), you passed a [{values_type}]"
+                            .format(values_type=type(values).__name__))
         from pandas.core.series import _sanitize_array
         values = _sanitize_array(values, None, None)
         null_mask = np.asarray(isna(values))

From 64fef493ce02fd2136996fa62643579ef06b6805 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sat, 7 Apr 2018 12:46:39 +0300
Subject: [PATCH 10/18] fix doc line

---
 pandas/core/arrays/categorical.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6d998085ad175..baa73e9e4a1fc 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2222,8 +2222,9 @@ def isin(self, values):
         """
         Check whether `values` are contained in Categorical.
 
-        Return a boolean NumPy Array showing whether each element in the Categorical
-        matches an element in the passed sequence of `values` exactly.
+        Return a boolean NumPy Array showing whether each element in
+        the Categorical matches an element in the passed sequence of
+        `values` exactly.
 
         Parameters
         ----------

From b25da12a69af24219fecfb1bf3dfdfde39e06d94 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Mon, 9 Apr 2018 02:58:48 +0300
Subject: [PATCH 11/18] refactor benchmark name and add reference to issue

---
 asv_bench/benchmarks/categoricals.py   | 2 +-
 pandas/tests/categorical/test_algos.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 25268ac0cd10b..1889830ee17d4 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -150,7 +150,7 @@ def time_rank_int_cat_ordered(self):
         self.s_int_cat_ordered.rank()
 
 
-class IsIn(object):
+class Isin(object):
 
     goal_time = 0.2
 
diff --git a/pandas/tests/categorical/test_algos.py b/pandas/tests/categorical/test_algos.py
index dcda226bfd23e..1c68377786dd4 100644
--- a/pandas/tests/categorical/test_algos.py
+++ b/pandas/tests/categorical/test_algos.py
@@ -50,6 +50,7 @@ def test_factorized_sort_ordered():
 
 
 def test_isin_cats():
+    # GH2003
     cat = pd.Categorical(["a", "b", np.nan])
 
     result = cat.isin(["a", np.nan])

From 9f8e7906f3d2f3a87182a3d5e940b9744c43626b Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sun, 15 Apr 2018 20:21:03 +0300
Subject: [PATCH 12/18] add todo

---
 pandas/core/algorithms.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index df34b3540fa3b..5493348334223 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -408,6 +408,7 @@ def isin(comps, values):
         values = construct_1d_object_array_from_listlike(list(values))
 
     if is_categorical_dtype(comps):
+        # TODO(extension)
         # handle categoricals
         return comps._values.isin(values)
 

From 60ac65864330051eb2177edb28b18e95e6d4665d Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sun, 15 Apr 2018 20:24:15 +0300
Subject: [PATCH 13/18] move import from the function to the top of the file

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index baa73e9e4a1fc..d2cc92ebaaab0 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -44,6 +44,7 @@
 from pandas.io.formats.terminal import get_terminal_size
 from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
 from pandas.core.config import get_option
+from pandas.core.series import _sanitize_array
 
 from .base import ExtensionArray
 
@@ -2264,7 +2265,6 @@ def isin(self, values):
             raise TypeError("only list-like objects are allowed to be passed"
                             " to isin(), you passed a [{values_type}]"
                             .format(values_type=type(values).__name__))
-        from pandas.core.series import _sanitize_array
         values = _sanitize_array(values, None, None)
         null_mask = np.asarray(isna(values))
         code_values = self.categories.get_indexer(values)

From 50aca261b584b564f843877d34f65bb21069bcf0 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sun, 15 Apr 2018 20:44:35 +0300
Subject: [PATCH 14/18] add int64  benchmark

---
 asv_bench/benchmarks/categoricals.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 1889830ee17d4..42906787ce32d 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -154,12 +154,20 @@ class Isin(object):
 
     goal_time = 0.2
 
-    def setup(self):
+    params = ['int64', 'object']
+    param_names = ['dtype']
+
+    def setup(self, dtype):
         n = 5 * 10**5
         sample_size = 100
-        arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
+        if dtype == "int64":
+            arr = [i for i in np.random.randint(0, n // 10, size=n)]
+        else:
+            arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
+        np.random.seed(1234)
         self.sample = np.random.choice(arr, sample_size)
         self.ts = pd.Series(arr).astype('category')
 
-    def time_isin_categorical_strings(self):
+    def time_isin_categorical(self):
         self.ts.isin(self.sample)
+

From 713712eac335891904e784b9844308c46627e8e6 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sun, 15 Apr 2018 21:20:57 +0300
Subject: [PATCH 15/18] move import to the top of the function

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d2cc92ebaaab0..5a06e47ba395c 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -44,7 +44,6 @@
 from pandas.io.formats.terminal import get_terminal_size
 from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
 from pandas.core.config import get_option
-from pandas.core.series import _sanitize_array
 
 from .base import ExtensionArray
 
@@ -2261,6 +2260,7 @@ def isin(self, values):
         >>> s.isin(['lama'])
         array([ True, False,  True, False,  True, False])
         """
+        from pandas.core.series import _sanitize_array
         if not is_list_like(values):
             raise TypeError("only list-like objects are allowed to be passed"
                             " to isin(), you passed a [{values_type}]"

From 18c827d391a9bd6841eb48cabf46eb2b88416464 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Sun, 15 Apr 2018 22:43:21 +0300
Subject: [PATCH 16/18] add int64 categorical test

---
 asv_bench/benchmarks/categoricals.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 42906787ce32d..1db3f783e1604 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -154,20 +154,19 @@ class Isin(object):
 
     goal_time = 0.2
 
-    params = ['int64', 'object']
+    params = ['object', 'int64']
     param_names = ['dtype']
 
     def setup(self, dtype):
+        np.random.seed(1234)
         n = 5 * 10**5
         sample_size = 100
-        if dtype == "int64":
-            arr = [i for i in np.random.randint(0, n // 10, size=n)]
-        else:
-            arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
-        np.random.seed(1234)
+        arr = [i for i in np.random.randint(0, n // 10, size=n)]
+        if dtype == 'object':
+            arr = ['s%04d' % i for i in arr]
         self.sample = np.random.choice(arr, sample_size)
         self.ts = pd.Series(arr).astype('category')
 
-    def time_isin_categorical(self):
+    def time_isin_categorical(self, dtype):
         self.ts.isin(self.sample)
 

From a2b70ee12f4496d54ebee3cf6a6db76f4fbee6c5 Mon Sep 17 00:00:00 2001
From: Artem Bogachev <bourbaki@me.com>
Date: Wed, 18 Apr 2018 13:35:10 +0300
Subject: [PATCH 17/18] rename variable in benchmark

---
 asv_bench/benchmarks/categoricals.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 1db3f783e1604..04d25480f78a5 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -165,8 +165,8 @@ def setup(self, dtype):
         if dtype == 'object':
             arr = ['s%04d' % i for i in arr]
         self.sample = np.random.choice(arr, sample_size)
-        self.ts = pd.Series(arr).astype('category')
+        self.series = pd.Series(arr).astype('category')
 
     def time_isin_categorical(self, dtype):
-        self.ts.isin(self.sample)
+        self.series.isin(self.sample)
 

From 7b680cd239a6150b42b7cf8554f38c5c3beec368 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 25 Apr 2018 06:01:44 -0400
Subject: [PATCH 18/18] whitespace

---
 asv_bench/benchmarks/categoricals.py | 1 -
 pandas/core/arrays/categorical.py    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 04d25480f78a5..0ffd5f881d626 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -169,4 +169,3 @@ def setup(self, dtype):
 
     def time_isin_categorical(self, dtype):
         self.series.isin(self.sample)
-
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 90656c15d9e37..7f0d54de9def8 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2240,7 +2240,7 @@ def isin(self, values):
         Raises
         ------
         TypeError
-          * If `values` is a string
+          * If `values` is not a set or list-like
 
         See Also
         --------