-
-
Notifications
You must be signed in to change notification settings - Fork 18k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bug issue 16819 Index.get_indexer_not_unique inconsistent return types vs get_indexer #16826
Changes from 5 commits
df5bfcf
6b5bd71
e32df12
7e650bd
b710626
d3a77be
05cb9d3
e1b85cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2256,15 +2256,15 @@ def intersection(self, other): | |
indexer = indexer.take((indexer != -1).nonzero()[0]) | ||
except: | ||
# duplicates | ||
indexer = Index(other._values).get_indexer_non_unique( | ||
self._values)[0].unique() | ||
indexer = algos.unique1d(Index(other._values).get_indexer_non_unique( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just do
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reason I avoided that was because the behaviour of np unique is slightly different to the original unique function, it returns as sorted rather than the original order for example). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also would have to do numpy.unique(Index(other._values).get_indexer_non_unique(self._values)[0]) as get_indexer_non_inque returns np.ndarray which does not have a unique method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But this whole change is to make get_indexer_non_unique return a ndarray rather than an Index so can no longer call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok this is fine. |
||
self._values)[0]) | ||
indexer = indexer[indexer != -1] | ||
|
||
taken = other.take(indexer) | ||
if self.name != other.name: | ||
taken.name = None | ||
return taken | ||
|
||
def difference(self, other): | ||
""" | ||
Return a new Index with elements from the index that are not in | ||
|
@@ -2704,7 +2704,7 @@ def get_indexer_non_unique(self, target): | |
tgt_values = target._values | ||
|
||
indexer, missing = self._engine.get_indexer_non_unique(tgt_values) | ||
return Index(indexer), missing | ||
return indexer, missing | ||
|
||
def get_indexer_for(self, target, **kwargs): | ||
""" | ||
|
@@ -2942,7 +2942,6 @@ def _reindex_non_unique(self, target): | |
else: | ||
|
||
# need to retake to have the same size as the indexer | ||
indexer = indexer.values | ||
indexer[~check] = 0 | ||
|
||
# reset the new indexer to account for the new size | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1131,6 +1131,17 @@ def test_get_indexer_strings(self): | |
with pytest.raises(TypeError): | ||
idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) | ||
|
||
def test_get_indexer_consistency(self): | ||
# See GH 16819 | ||
for name, index in self.indices.items(): | ||
indexer = index.get_indexer(index[0:2]) | ||
assert isinstance(indexer, np.ndarray) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls update to use this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue with that is that some of the indexes are empty or categorical indexes so not unique positions so can't assume that [0, 1, 2] are returned There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
assert indexer.dtype == np.intp | ||
|
||
indexer, _ = index.get_indexer_non_unique(index[0:2]) | ||
assert isinstance(indexer, np.ndarray) | ||
assert indexer.dtype == np.intp | ||
|
||
def test_get_loc(self): | ||
idx = pd.Index([0, 1, 2]) | ||
all_methods = [None, 'pad', 'backfill', 'nearest'] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -386,8 +386,7 @@ def test_reindexing(self): | |
expected = oidx.get_indexer_non_unique(finder)[0] | ||
|
||
actual = ci.get_indexer(finder) | ||
tm.assert_numpy_array_equal( | ||
expected.values, actual, check_dtype=False) | ||
tm.assert_numpy_array_equal(expected, actual, check_dtype=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check_dtype is True by default |
||
|
||
def test_reindex_dtype(self): | ||
c = CategoricalIndex(['a', 'b', 'c', 'a']) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use double-backticks on
Index.get_indexer_non_unique()
,Index
, andIndex.get_indexer()