diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 845d21e18d287..94d7814151a25 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -808,7 +808,8 @@ def _intersection(self, other, sort): # multiple NaNs taken = other._intersection_unique(self) else: - return super()._intersection(other, sort) + # duplicates + taken = self._intersection_non_unique(other) if sort is None: taken = taken.sort_values() @@ -837,6 +838,35 @@ def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex: return self.take(indexer) + def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex: + """ + Used when the IntervalIndex does have some common endpoints, + on either sides. + Return the intersection with another IntervalIndex. + + Parameters + ---------- + other : IntervalIndex + + Returns + ------- + IntervalIndex + """ + # Note: this is about 3.25x faster than super()._intersection(other) + # in IntervalIndexMethod.time_intersection_both_duplicate(1000) + mask = np.zeros(len(self), dtype=bool) + + if self.hasnans and other.hasnans: + first_nan_loc = np.arange(len(self))[self.isna()][0] + mask[first_nan_loc] = True + + other_tups = set(zip(other.left, other.right)) + for i, tup in enumerate(zip(self.left, self.right)): + if tup in other_tups: + mask[i] = True + + return self[mask] + # -------------------------------------------------------------------- @property