From e5de21a991408b3d3783489989201826af8ada67 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 18 Jul 2017 17:45:01 -0700 Subject: [PATCH] BUG: Don't with empty Series for .isin (#17006) Empty Series initializes to float64, even when the data type is object for .isin, leading to an error with membership. Closes gh-16991. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/algorithms.py | 2 ++ pandas/tests/frame/test_analytics.py | 9 ++++++--- pandas/tests/indexes/test_base.py | 9 +++++++++ pandas/tests/series/test_analytics.py | 9 +++++++++ pandas/tests/test_algos.py | 9 +++++++++ 6 files changed, 36 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b02613b12ba38..4801e5c5300e7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -239,3 +239,4 @@ Categorical Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) +- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4ee2c54000fb6..79beb95d93ea1 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -65,6 +65,8 @@ def _ensure_data(values, dtype=None): # we check some simple dtypes first try: + if is_object_dtype(dtype): + return _ensure_object(np.asarray(values)), 'object', 'object' if is_bool_dtype(values) or is_bool_dtype(dtype): # we are actually coercing to uint64 # until our algos suppport uint8 directly (see TODO) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b09325bfa2ddc..da1c68005b9b2 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1151,10 +1151,13 @@ def test_isin(self): expected = DataFrame([df.loc[s].isin(other) for s in df.index]) tm.assert_frame_equal(result, expected) - def test_isin_empty(self): + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 df = DataFrame({'A': ['a', 'b', 'c'], 'B': ['a', 'e', 'f']}) - result = df.isin([]) - expected = pd.DataFrame(False, df.index, df.columns) + expected = DataFrame(False, df.index, df.columns) + + result = df.isin(empty) tm.assert_frame_equal(result, expected) def test_isin_dict(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 18dbe6624008a..692cdd4957947 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1407,6 +1407,15 @@ def check_idx(idx): # Float64Index overrides isin, so must be checked separately check_idx(Float64Index([1.0, 2.0, 3.0, 4.0])) + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + idx = Index(["a", "b"]) + expected = np.array([False, False]) + + result = idx.isin(empty) + tm.assert_numpy_array_equal(expected, result) + def test_boolean_cmp(self): values = [1, 2, 3, 4] diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index ab75dbf1b51cc..7aab7df7169d4 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1147,6 +1147,15 @@ def test_isin_with_i8(self): result = s.isin(s[0:2]) assert_series_equal(result, expected) + @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + s = Series(["a", "b"]) + expected = Series([False, False]) + + result = s.isin(empty) + tm.assert_series_equal(expected, result) + def test_timedelta64_analytics(self): from pandas import date_range diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 993dcc4f527b2..4588bf17fdbeb 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -597,6 +597,15 @@ def test_categorical_from_codes(self): result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) + @pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])]) + def test_empty(self, empty): + # see gh-16991 + vals = pd.Index(["a", "b"]) + expected = np.array([False, False]) + + result = algos.isin(vals, empty) + tm.assert_numpy_array_equal(expected, result) + class TestValueCounts(object):