Skip to content

Commit

Permalink
Backport PR #42334: PERF/REGR: restore IntervalIndex._intersection_no…
Browse files Browse the repository at this point in the history
…n_unique (#42336)

Co-authored-by: jbrockmendel <jbrockmendel@gmail.com>
  • Loading branch information
meeseeksmachine and jbrockmendel committed Jul 1, 2021
1 parent 2c26ee7 commit 033041d
Showing 1 changed file with 31 additions and 1 deletion.
32 changes: 31 additions & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,8 @@ def _intersection(self, other, sort):
# multiple NaNs
taken = other._intersection_unique(self)
else:
return super()._intersection(other, sort)
# duplicates
taken = self._intersection_non_unique(other)

if sort is None:
taken = taken.sort_values()
Expand Down Expand Up @@ -837,6 +838,35 @@ def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:

return self.take(indexer)

def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
"""
Used when the IntervalIndex does have some common endpoints,
on either sides.
Return the intersection with another IntervalIndex.
Parameters
----------
other : IntervalIndex
Returns
-------
IntervalIndex
"""
# Note: this is about 3.25x faster than super()._intersection(other)
# in IntervalIndexMethod.time_intersection_both_duplicate(1000)
mask = np.zeros(len(self), dtype=bool)

if self.hasnans and other.hasnans:
first_nan_loc = np.arange(len(self))[self.isna()][0]
mask[first_nan_loc] = True

other_tups = set(zip(other.left, other.right))
for i, tup in enumerate(zip(self.left, self.right)):
if tup in other_tups:
mask[i] = True

return self[mask]

# --------------------------------------------------------------------

@property
Expand Down

0 comments on commit 033041d

Please sign in to comment.