From d8bdac565c0e0fd963c58234e69addffe2acaeed Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Sun, 11 Jun 2023 22:03:56 +0200 Subject: [PATCH] New trait `merge_join::MergePredicate` merge_join_by can now also accept functions returning booleans. --- src/lib.rs | 33 ++++++++- src/merge_join.rs | 182 +++++++++++++++++++++++++++++++--------------- 2 files changed, 152 insertions(+), 63 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index daf935f73..097fd9631 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1012,7 +1012,10 @@ pub trait Itertools : Iterator { /// Create an iterator that merges items from both this and the specified /// iterator in ascending order. /// - /// It chooses whether to pair elements based on the `Ordering` returned by the + /// The function can either return an `Ordering` variant or a boolean. + /// + /// In the first case, + /// it chooses whether to pair elements based on the `Ordering` returned by the /// specified compare function. At any point, inspecting the tip of the /// iterators `I` and `J` as items `i` of type `I::Item` and `j` of type /// `J::Item` respectively, the resulting iterator will: @@ -1036,10 +1039,34 @@ pub trait Itertools : Iterator { /// vec![Both(0, 0), Left(2), Right(3), Left(4), Both(6, 6), Left(8), Right(9)] /// ); /// ``` + /// + /// In the second case, + /// it chooses whether to pair elements based on the boolean returned by the + /// specified function. At any point, inspecting the tip of the + /// iterators `I` and `J` as items `i` of type `I::Item` and `j` of type + /// `J::Item` respectively, the resulting iterator will: + /// + /// - Emit `Either::Left(i)` when `true`, + /// and remove `i` from its source iterator + /// - Emit `Either::Right(j)` when `false`, + /// and remove `j` from its source iterator + /// + /// ``` + /// use itertools::Itertools; + /// use itertools::Either::{Left, Right}; + /// + /// let multiples_of_2 = (0..10).step_by(2); + /// let multiples_of_3 = (0..10).step_by(3); + /// + /// itertools::assert_equal( + /// multiples_of_2.merge_join_by(multiples_of_3, |i, j| i <= j), + /// vec![Left(0), Right(0), Left(2), Right(3), Left(4), Left(6), Right(6), Left(8), Right(9)] + /// ); + /// ``` #[inline] - fn merge_join_by(self, other: J, cmp_fn: F) -> MergeJoinBy + fn merge_join_by(self, other: J, cmp_fn: F) -> MergeJoinBy where J: IntoIterator, - F: FnMut(&Self::Item, &J::Item) -> std::cmp::Ordering, + F: FnMut(&Self::Item, &J::Item) -> T, Self: Sized { merge_join_by(self, other, cmp_fn) diff --git a/src/merge_join.rs b/src/merge_join.rs index f2fbdea2c..dfb3e90b5 100644 --- a/src/merge_join.rs +++ b/src/merge_join.rs @@ -2,19 +2,30 @@ use std::cmp::Ordering; use std::iter::Fuse; use std::fmt; +use either::Either; + use super::adaptors::{PutBack, put_back}; use crate::either_or_both::EitherOrBoth; +use crate::size_hint::{self, SizeHint}; #[cfg(doc)] use crate::Itertools; +pub trait MergePredicate: FnMut(&I, &J) -> T { + type Item; + fn left(left: I) -> Self::Item; + fn right(right: J) -> Self::Item; + fn merge(&mut self, left: I, right: J) -> (Option, Option, Self::Item); + fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint; +} + /// Return an iterator adaptor that merge-joins items from the two base iterators in ascending order. /// /// [`IntoIterator`] enabled version of [`Itertools::merge_join_by`]. -pub fn merge_join_by(left: I, right: J, cmp_fn: F) - -> MergeJoinBy +pub fn merge_join_by(left: I, right: J, cmp_fn: F) + -> MergeJoinBy where I: IntoIterator, J: IntoIterator, - F: FnMut(&I::Item, &J::Item) -> Ordering + F: FnMut(&I::Item, &J::Item) -> T, { MergeJoinBy { left: put_back(left.into_iter().fuse()), @@ -27,74 +38,122 @@ pub fn merge_join_by(left: I, right: J, cmp_fn: F) /// /// See [`.merge_join_by()`](crate::Itertools::merge_join_by) for more information. #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -pub struct MergeJoinBy { +pub struct MergeJoinBy + where I: Iterator, + J: Iterator, + F: FnMut(&I::Item, &J::Item) -> T, +{ left: PutBack>, right: PutBack>, cmp_fn: F } -impl Clone for MergeJoinBy +impl Ordering> MergePredicate for F { + type Item = EitherOrBoth; + + fn left(left: I) -> Self::Item { + EitherOrBoth::Left(left) + } + + fn right(right: J) -> Self::Item { + EitherOrBoth::Right(right) + } + + fn merge(&mut self, left: I, right: J) -> (Option, Option, Self::Item) { + match self(&left, &right) { + Ordering::Equal => (None, None, EitherOrBoth::Both(left, right)), + Ordering::Less => (None, Some(right), EitherOrBoth::Left(left)), + Ordering::Greater => (Some(left), None, EitherOrBoth::Right(right)), + } + } + + fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint { + let (a_lower, a_upper) = left; + let (b_lower, b_upper) = right; + + let lower = ::std::cmp::max(a_lower, b_lower); + + let upper = match (a_upper, b_upper) { + (Some(x), Some(y)) => x.checked_add(y), + _ => None, + }; + + (lower, upper) + } +} + +impl bool> MergePredicate for F { + type Item = Either; + + fn left(left: I) -> Self::Item { + Either::Left(left) + } + + fn right(right: J) -> Self::Item { + Either::Right(right) + } + + fn merge(&mut self, left: I, right: J) -> (Option, Option, Self::Item) { + if self(&left, &right) { + (None, Some(right), Either::Left(left)) + } else { + (Some(left), None, Either::Right(right)) + } + } + + fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint { + // Not ExactSizeIterator because size may be larger than usize + size_hint::add(left, right) + } +} + +impl Clone for MergeJoinBy where I: Iterator, J: Iterator, PutBack>: Clone, PutBack>: Clone, - F: Clone, + F: FnMut(&I::Item, &J::Item) -> T + Clone, { clone_fields!(left, right, cmp_fn); } -impl fmt::Debug for MergeJoinBy +impl fmt::Debug for MergeJoinBy where I: Iterator + fmt::Debug, I::Item: fmt::Debug, J: Iterator + fmt::Debug, J::Item: fmt::Debug, + F: FnMut(&I::Item, &J::Item) -> T, { debug_fmt_fields!(MergeJoinBy, left, right); } -impl Iterator for MergeJoinBy +impl Iterator for MergeJoinBy where I: Iterator, J: Iterator, - F: FnMut(&I::Item, &J::Item) -> Ordering + F: MergePredicate, { - type Item = EitherOrBoth; + type Item = F::Item; fn next(&mut self) -> Option { match (self.left.next(), self.right.next()) { (None, None) => None, - (Some(left), None) => - Some(EitherOrBoth::Left(left)), - (None, Some(right)) => - Some(EitherOrBoth::Right(right)), + (Some(left), None) => Some(F::left(left)), + (None, Some(right)) => Some(F::right(right)), (Some(left), Some(right)) => { - match (self.cmp_fn)(&left, &right) { - Ordering::Equal => - Some(EitherOrBoth::Both(left, right)), - Ordering::Less => { - self.right.put_back(right); - Some(EitherOrBoth::Left(left)) - }, - Ordering::Greater => { - self.left.put_back(left); - Some(EitherOrBoth::Right(right)) - } + let (left, right, next) = self.cmp_fn.merge(left, right); + if let Some(left) = left { + self.left.put_back(left); + } + if let Some(right) = right { + self.right.put_back(right); } + Some(next) } } } - fn size_hint(&self) -> (usize, Option) { - let (a_lower, a_upper) = self.left.size_hint(); - let (b_lower, b_upper) = self.right.size_hint(); - - let lower = ::std::cmp::max(a_lower, b_lower); - - let upper = match (a_upper, b_upper) { - (Some(x), Some(y)) => x.checked_add(y), - _ => None, - }; - - (lower, upper) + fn size_hint(&self) -> SizeHint { + F::size_hint(self.left.size_hint(), self.right.size_hint()) } fn count(mut self) -> usize { @@ -106,10 +165,12 @@ impl Iterator for MergeJoinBy (None, Some(_right)) => break count + 1 + self.right.into_parts().1.count(), (Some(left), Some(right)) => { count += 1; - match (self.cmp_fn)(&left, &right) { - Ordering::Equal => {} - Ordering::Less => self.right.put_back(right), - Ordering::Greater => self.left.put_back(left), + let (left, right, _) = self.cmp_fn.merge(left, right); + if let Some(left) = left { + self.left.put_back(left); + } + if let Some(right) = right { + self.right.put_back(right); } } } @@ -122,26 +183,23 @@ impl Iterator for MergeJoinBy match (self.left.next(), self.right.next()) { (None, None) => break previous_element, (Some(left), None) => { - break Some(EitherOrBoth::Left( + break Some(F::left( self.left.into_parts().1.last().unwrap_or(left), )) } (None, Some(right)) => { - break Some(EitherOrBoth::Right( + break Some(F::right( self.right.into_parts().1.last().unwrap_or(right), )) } (Some(left), Some(right)) => { - previous_element = match (self.cmp_fn)(&left, &right) { - Ordering::Equal => Some(EitherOrBoth::Both(left, right)), - Ordering::Less => { - self.right.put_back(right); - Some(EitherOrBoth::Left(left)) - } - Ordering::Greater => { - self.left.put_back(left); - Some(EitherOrBoth::Right(right)) - } + let (left, right, elem) = self.cmp_fn.merge(left, right); + previous_element = Some(elem); + if let Some(left) = left { + self.left.put_back(left); + } + if let Some(right) = right { + self.right.put_back(right); } } } @@ -156,13 +214,17 @@ impl Iterator for MergeJoinBy n -= 1; match (self.left.next(), self.right.next()) { (None, None) => break None, - (Some(_left), None) => break self.left.nth(n).map(EitherOrBoth::Left), - (None, Some(_right)) => break self.right.nth(n).map(EitherOrBoth::Right), - (Some(left), Some(right)) => match (self.cmp_fn)(&left, &right) { - Ordering::Equal => {} - Ordering::Less => self.right.put_back(right), - Ordering::Greater => self.left.put_back(left), - }, + (Some(_left), None) => break self.left.nth(n).map(F::left), + (None, Some(_right)) => break self.right.nth(n).map(F::right), + (Some(left), Some(right)) => { + let (left, right, _) = self.cmp_fn.merge(left, right); + if let Some(left) = left { + self.left.put_back(left); + } + if let Some(right) = right { + self.right.put_back(right); + } + } } } }