-
-
Notifications
You must be signed in to change notification settings - Fork 18k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PERF: GH2003 Series.isin for categorical dtypes #20522
Changes from 1 commit
19ac11a
54021b9
2514b45
80f687a
d6c3953
33e3b07
ceffccd
3247dce
2b7b1c4
4478a49
64fef49
b25da12
9f8e790
60ac658
50aca26
713712e
18c827d
993afd8
a2b70ee
fa7f0f1
7b680cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -403,8 +403,15 @@ def isin(comps, values): | |
if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): | ||
values = construct_1d_object_array_from_listlike(list(values)) | ||
|
||
comps, dtype, _ = _ensure_data(comps) | ||
values, _, _ = _ensure_data(values, dtype=dtype) | ||
if not is_categorical_dtype(comps): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reverse this logic here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I am working on asv benchmark |
||
comps, dtype, _ = _ensure_data(comps) | ||
values, _, _ = _ensure_data(values, dtype=dtype) | ||
else: | ||
cats = comps.cat.categories | ||
comps = comps.cat.codes.values | ||
mask = isna(values) | ||
values = cats.get_indexer(values) | ||
values = values[mask | (values >= 0)] | ||
|
||
# faster for larger cases to use np.in1d | ||
f = lambda x, y: htable.ismember_object(x, values) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3507,7 +3507,11 @@ def isin(self, values): | |
5 False | ||
Name: animal, dtype: bool | ||
""" | ||
result = algorithms.isin(com._values_from_object(self), values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes let's try to do this, @Ma3aXaKa can you make this change |
||
if is_categorical_dtype(self.dtype): | ||
result = algorithms.isin(self, values) | ||
else: | ||
result = algorithms.isin(com._values_from_object(self), values) | ||
|
||
return self._constructor(result, index=self.index).__finalize__(self) | ||
|
||
def between(self, left, right, inclusive=True): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1255,6 +1255,17 @@ def test_isin_empty(self, empty): | |
result = s.isin(empty) | ||
tm.assert_series_equal(expected, result) | ||
|
||
def test_isin_cats(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can go in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. |
||
s = Series(["a", "b", np.nan]).astype("category") | ||
|
||
result = s.isin(["a", np.nan]) | ||
expected = Series([True, False, True]) | ||
tm.assert_series_equal(expected, result) | ||
|
||
result = s.isin(["a", "c"]) | ||
expected = Series([True, False, False]) | ||
tm.assert_series_equal(expected, result) | ||
|
||
def test_timedelta64_analytics(self): | ||
from pandas import date_range | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you move this to the "Performance Improvements" section? (starts around line 783).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure