Skip to content

Commit

Permalink
New trait merge_join::MergePredicate
Browse files Browse the repository at this point in the history
merge_join_by can now also accept functions returning booleans.
  • Loading branch information
Philippe-Cholet committed Jun 11, 2023
1 parent ad2e401 commit d8bdac5
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 63 deletions.
33 changes: 30 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,10 @@ pub trait Itertools : Iterator {
/// Create an iterator that merges items from both this and the specified
/// iterator in ascending order.
///
/// It chooses whether to pair elements based on the `Ordering` returned by the
/// The function can either return an `Ordering` variant or a boolean.
///
/// In the first case,
/// it chooses whether to pair elements based on the `Ordering` returned by the
/// specified compare function. At any point, inspecting the tip of the
/// iterators `I` and `J` as items `i` of type `I::Item` and `j` of type
/// `J::Item` respectively, the resulting iterator will:
Expand All @@ -1036,10 +1039,34 @@ pub trait Itertools : Iterator {
/// vec![Both(0, 0), Left(2), Right(3), Left(4), Both(6, 6), Left(8), Right(9)]
/// );
/// ```
///
/// In the second case,
/// it chooses whether to pair elements based on the boolean returned by the
/// specified function. At any point, inspecting the tip of the
/// iterators `I` and `J` as items `i` of type `I::Item` and `j` of type
/// `J::Item` respectively, the resulting iterator will:
///
/// - Emit `Either::Left(i)` when `true`,
/// and remove `i` from its source iterator
/// - Emit `Either::Right(j)` when `false`,
/// and remove `j` from its source iterator
///
/// ```
/// use itertools::Itertools;
/// use itertools::Either::{Left, Right};
///
/// let multiples_of_2 = (0..10).step_by(2);
/// let multiples_of_3 = (0..10).step_by(3);
///
/// itertools::assert_equal(
/// multiples_of_2.merge_join_by(multiples_of_3, |i, j| i <= j),
/// vec![Left(0), Right(0), Left(2), Right(3), Left(4), Left(6), Right(6), Left(8), Right(9)]
/// );
/// ```
#[inline]
fn merge_join_by<J, F>(self, other: J, cmp_fn: F) -> MergeJoinBy<Self, J::IntoIter, F>
fn merge_join_by<J, F, T>(self, other: J, cmp_fn: F) -> MergeJoinBy<Self, J::IntoIter, F, T>
where J: IntoIterator,
F: FnMut(&Self::Item, &J::Item) -> std::cmp::Ordering,
F: FnMut(&Self::Item, &J::Item) -> T,
Self: Sized
{
merge_join_by(self, other, cmp_fn)
Expand Down
182 changes: 122 additions & 60 deletions src/merge_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,30 @@ use std::cmp::Ordering;
use std::iter::Fuse;
use std::fmt;

use either::Either;

use super::adaptors::{PutBack, put_back};
use crate::either_or_both::EitherOrBoth;
use crate::size_hint::{self, SizeHint};
#[cfg(doc)]
use crate::Itertools;

pub trait MergePredicate<I, J, T>: FnMut(&I, &J) -> T {
type Item;
fn left(left: I) -> Self::Item;
fn right(right: J) -> Self::Item;
fn merge(&mut self, left: I, right: J) -> (Option<I>, Option<J>, Self::Item);
fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint;
}

/// Return an iterator adaptor that merge-joins items from the two base iterators in ascending order.
///
/// [`IntoIterator`] enabled version of [`Itertools::merge_join_by`].
pub fn merge_join_by<I, J, F>(left: I, right: J, cmp_fn: F)
-> MergeJoinBy<I::IntoIter, J::IntoIter, F>
pub fn merge_join_by<I, J, F, T>(left: I, right: J, cmp_fn: F)
-> MergeJoinBy<I::IntoIter, J::IntoIter, F, T>
where I: IntoIterator,
J: IntoIterator,
F: FnMut(&I::Item, &J::Item) -> Ordering
F: FnMut(&I::Item, &J::Item) -> T,
{
MergeJoinBy {
left: put_back(left.into_iter().fuse()),
Expand All @@ -27,74 +38,122 @@ pub fn merge_join_by<I, J, F>(left: I, right: J, cmp_fn: F)
///
/// See [`.merge_join_by()`](crate::Itertools::merge_join_by) for more information.
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
pub struct MergeJoinBy<I: Iterator, J: Iterator, F> {
pub struct MergeJoinBy<I, J, F, T>
where I: Iterator,
J: Iterator,
F: FnMut(&I::Item, &J::Item) -> T,
{
left: PutBack<Fuse<I>>,
right: PutBack<Fuse<J>>,
cmp_fn: F
}

impl<I, J, F> Clone for MergeJoinBy<I, J, F>
impl<I, J, F: FnMut(&I, &J) -> Ordering> MergePredicate<I, J, Ordering> for F {
type Item = EitherOrBoth<I, J>;

fn left(left: I) -> Self::Item {
EitherOrBoth::Left(left)
}

fn right(right: J) -> Self::Item {
EitherOrBoth::Right(right)
}

fn merge(&mut self, left: I, right: J) -> (Option<I>, Option<J>, Self::Item) {
match self(&left, &right) {
Ordering::Equal => (None, None, EitherOrBoth::Both(left, right)),
Ordering::Less => (None, Some(right), EitherOrBoth::Left(left)),
Ordering::Greater => (Some(left), None, EitherOrBoth::Right(right)),
}
}

fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint {
let (a_lower, a_upper) = left;
let (b_lower, b_upper) = right;

let lower = ::std::cmp::max(a_lower, b_lower);

let upper = match (a_upper, b_upper) {
(Some(x), Some(y)) => x.checked_add(y),
_ => None,
};

(lower, upper)
}
}

impl<I, J, F: FnMut(&I, &J) -> bool> MergePredicate<I, J, bool> for F {
type Item = Either<I, J>;

fn left(left: I) -> Self::Item {
Either::Left(left)
}

fn right(right: J) -> Self::Item {
Either::Right(right)
}

fn merge(&mut self, left: I, right: J) -> (Option<I>, Option<J>, Self::Item) {
if self(&left, &right) {
(None, Some(right), Either::Left(left))
} else {
(Some(left), None, Either::Right(right))
}
}

fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint {
// Not ExactSizeIterator because size may be larger than usize
size_hint::add(left, right)
}
}

impl<I, J, F, T> Clone for MergeJoinBy<I, J, F, T>
where I: Iterator,
J: Iterator,
PutBack<Fuse<I>>: Clone,
PutBack<Fuse<J>>: Clone,
F: Clone,
F: FnMut(&I::Item, &J::Item) -> T + Clone,
{
clone_fields!(left, right, cmp_fn);
}

impl<I, J, F> fmt::Debug for MergeJoinBy<I, J, F>
impl<I, J, F, T> fmt::Debug for MergeJoinBy<I, J, F, T>
where I: Iterator + fmt::Debug,
I::Item: fmt::Debug,
J: Iterator + fmt::Debug,
J::Item: fmt::Debug,
F: FnMut(&I::Item, &J::Item) -> T,
{
debug_fmt_fields!(MergeJoinBy, left, right);
}

impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
impl<I, J, F, T> Iterator for MergeJoinBy<I, J, F, T>
where I: Iterator,
J: Iterator,
F: FnMut(&I::Item, &J::Item) -> Ordering
F: MergePredicate<I::Item, J::Item, T>,
{
type Item = EitherOrBoth<I::Item, J::Item>;
type Item = F::Item;

fn next(&mut self) -> Option<Self::Item> {
match (self.left.next(), self.right.next()) {
(None, None) => None,
(Some(left), None) =>
Some(EitherOrBoth::Left(left)),
(None, Some(right)) =>
Some(EitherOrBoth::Right(right)),
(Some(left), None) => Some(F::left(left)),
(None, Some(right)) => Some(F::right(right)),
(Some(left), Some(right)) => {
match (self.cmp_fn)(&left, &right) {
Ordering::Equal =>
Some(EitherOrBoth::Both(left, right)),
Ordering::Less => {
self.right.put_back(right);
Some(EitherOrBoth::Left(left))
},
Ordering::Greater => {
self.left.put_back(left);
Some(EitherOrBoth::Right(right))
}
let (left, right, next) = self.cmp_fn.merge(left, right);
if let Some(left) = left {
self.left.put_back(left);
}
if let Some(right) = right {
self.right.put_back(right);
}
Some(next)
}
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
let (a_lower, a_upper) = self.left.size_hint();
let (b_lower, b_upper) = self.right.size_hint();

let lower = ::std::cmp::max(a_lower, b_lower);

let upper = match (a_upper, b_upper) {
(Some(x), Some(y)) => x.checked_add(y),
_ => None,
};

(lower, upper)
fn size_hint(&self) -> SizeHint {
F::size_hint(self.left.size_hint(), self.right.size_hint())
}

fn count(mut self) -> usize {
Expand All @@ -106,10 +165,12 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
(None, Some(_right)) => break count + 1 + self.right.into_parts().1.count(),
(Some(left), Some(right)) => {
count += 1;
match (self.cmp_fn)(&left, &right) {
Ordering::Equal => {}
Ordering::Less => self.right.put_back(right),
Ordering::Greater => self.left.put_back(left),
let (left, right, _) = self.cmp_fn.merge(left, right);
if let Some(left) = left {
self.left.put_back(left);
}
if let Some(right) = right {
self.right.put_back(right);
}
}
}
Expand All @@ -122,26 +183,23 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
match (self.left.next(), self.right.next()) {
(None, None) => break previous_element,
(Some(left), None) => {
break Some(EitherOrBoth::Left(
break Some(F::left(
self.left.into_parts().1.last().unwrap_or(left),
))
}
(None, Some(right)) => {
break Some(EitherOrBoth::Right(
break Some(F::right(
self.right.into_parts().1.last().unwrap_or(right),
))
}
(Some(left), Some(right)) => {
previous_element = match (self.cmp_fn)(&left, &right) {
Ordering::Equal => Some(EitherOrBoth::Both(left, right)),
Ordering::Less => {
self.right.put_back(right);
Some(EitherOrBoth::Left(left))
}
Ordering::Greater => {
self.left.put_back(left);
Some(EitherOrBoth::Right(right))
}
let (left, right, elem) = self.cmp_fn.merge(left, right);
previous_element = Some(elem);
if let Some(left) = left {
self.left.put_back(left);
}
if let Some(right) = right {
self.right.put_back(right);
}
}
}
Expand All @@ -156,13 +214,17 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
n -= 1;
match (self.left.next(), self.right.next()) {
(None, None) => break None,
(Some(_left), None) => break self.left.nth(n).map(EitherOrBoth::Left),
(None, Some(_right)) => break self.right.nth(n).map(EitherOrBoth::Right),
(Some(left), Some(right)) => match (self.cmp_fn)(&left, &right) {
Ordering::Equal => {}
Ordering::Less => self.right.put_back(right),
Ordering::Greater => self.left.put_back(left),
},
(Some(_left), None) => break self.left.nth(n).map(F::left),
(None, Some(_right)) => break self.right.nth(n).map(F::right),
(Some(left), Some(right)) => {
let (left, right, _) = self.cmp_fn.merge(left, right);
if let Some(left) = left {
self.left.put_back(left);
}
if let Some(right) = right {
self.right.put_back(right);
}
}
}
}
}
Expand Down

0 comments on commit d8bdac5

Please sign in to comment.