Skip to content

Commit

Permalink
Functions passed to merge_join_by can return booleans
Browse files Browse the repository at this point in the history
  • Loading branch information
Philippe-Cholet committed Jun 9, 2023
1 parent ad2e401 commit 5ad6f70
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 32 deletions.
33 changes: 30 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,10 @@ pub trait Itertools : Iterator {
/// Create an iterator that merges items from both this and the specified
/// iterator in ascending order.
///
/// It chooses whether to pair elements based on the `Ordering` returned by the
/// The function can either return an `Ordering` variant or a boolean.
///
/// In the first case,
/// it chooses whether to pair elements based on the `Ordering` returned by the
/// specified compare function. At any point, inspecting the tip of the
/// iterators `I` and `J` as items `i` of type `I::Item` and `j` of type
/// `J::Item` respectively, the resulting iterator will:
Expand All @@ -1036,10 +1039,34 @@ pub trait Itertools : Iterator {
/// vec![Both(0, 0), Left(2), Right(3), Left(4), Both(6, 6), Left(8), Right(9)]
/// );
/// ```
///
/// In the second case,
/// it chooses whether to pair elements based on the boolean returned by the
/// specified function. At any point, inspecting the tip of the
/// iterators `I` and `J` as items `i` of type `I::Item` and `j` of type
/// `J::Item` respectively, the resulting iterator will:
///
/// - Emit `Either::Left(i)` when `true`,
/// and remove `i` from its source iterator
/// - Emit `Either::Right(j)` when `false`,
/// and remove `j` from its source iterator
///
/// ```
/// use itertools::Itertools;
/// use itertools::Either::{Left, Right};
///
/// let multiples_of_2 = (0..10).step_by(2);
/// let multiples_of_3 = (0..10).step_by(3);
///
/// itertools::assert_equal(
/// multiples_of_2.merge_join_by(multiples_of_3, |i, j| i <= j),
/// vec![Left(0), Right(0), Left(2), Right(3), Left(4), Left(6), Right(6), Left(8), Right(9)]
/// );
/// ```
#[inline]
fn merge_join_by<J, F>(self, other: J, cmp_fn: F) -> MergeJoinBy<Self, J::IntoIter, F>
fn merge_join_by<J, F, R>(self, other: J, cmp_fn: F) -> MergeJoinBy<Self, J::IntoIter, F, R>
where J: IntoIterator,
F: FnMut(&Self::Item, &J::Item) -> std::cmp::Ordering,
F: FnMut(&Self::Item, &J::Item) -> R,

This comment has been minimized.

Copy link
@phimuemue

phimuemue Jun 11, 2023

Should we require R: OrderingOrBool here? Would possibly make sense if we also require that F has a specific signature already.

Self: Sized
{
merge_join_by(self, other, cmp_fn)
Expand Down
123 changes: 94 additions & 29 deletions src/merge_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use std::cmp::Ordering;
use std::iter::Fuse;
use std::fmt;

use either::Either;

use super::adaptors::{PutBack, put_back};
use crate::either_or_both::EitherOrBoth;
#[cfg(doc)]
Expand All @@ -10,11 +12,11 @@ use crate::Itertools;
/// Return an iterator adaptor that merge-joins items from the two base iterators in ascending order.
///
/// [`IntoIterator`] enabled version of [`Itertools::merge_join_by`].
pub fn merge_join_by<I, J, F>(left: I, right: J, cmp_fn: F)
-> MergeJoinBy<I::IntoIter, J::IntoIter, F>
pub fn merge_join_by<I, J, F, R>(left: I, right: J, cmp_fn: F)
-> MergeJoinBy<I::IntoIter, J::IntoIter, F, R>
where I: IntoIterator,
J: IntoIterator,
F: FnMut(&I::Item, &J::Item) -> Ordering
F: FnMut(&I::Item, &J::Item) -> R,
{
MergeJoinBy {
left: put_back(left.into_iter().fuse()),
Expand All @@ -27,56 +29,119 @@ pub fn merge_join_by<I, J, F>(left: I, right: J, cmp_fn: F)
///
/// See [`.merge_join_by()`](crate::Itertools::merge_join_by) for more information.
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
pub struct MergeJoinBy<I: Iterator, J: Iterator, F> {
pub struct MergeJoinBy<I, J, F, R>
where I: Iterator,

This comment has been minimized.

Copy link
@phimuemue

phimuemue Jun 11, 2023

Are these trait bounds really required here?

J: Iterator,
F: FnMut(&I::Item, &J::Item) -> R,
{
left: PutBack<Fuse<I>>,
right: PutBack<Fuse<J>>,
cmp_fn: F
}

impl<I, J, F> Clone for MergeJoinBy<I, J, F>
pub trait OrderingOrBool<I, J> {
type Item;
fn into_cmp(self) -> Ordering;
fn left(left: I) -> Self::Item;
fn right(right: J) -> Self::Item;
fn both(left: I, right: J) -> Self::Item;
}

impl<I, J, F, R> Clone for MergeJoinBy<I, J, F, R>
where I: Iterator,
J: Iterator,
PutBack<Fuse<I>>: Clone,
PutBack<Fuse<J>>: Clone,
F: Clone,
F: FnMut(&I::Item, &J::Item) -> R + Clone,
{
clone_fields!(left, right, cmp_fn);
}

impl<I, J, F> fmt::Debug for MergeJoinBy<I, J, F>
impl<I, J, F, R> fmt::Debug for MergeJoinBy<I, J, F, R>
where I: Iterator + fmt::Debug,
I::Item: fmt::Debug,
J: Iterator + fmt::Debug,
J::Item: fmt::Debug,
F: FnMut(&I::Item, &J::Item) -> R,
{
debug_fmt_fields!(MergeJoinBy, left, right);
}

impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
impl<I, J> OrderingOrBool<I, J> for Ordering {
type Item = EitherOrBoth<I, J>;

#[inline(always)]
fn into_cmp(self) -> Ordering {
self
}

#[inline(always)]
fn left(left: I) -> Self::Item {
EitherOrBoth::Left(left)
}

#[inline(always)]
fn right(right: J) -> Self::Item {
EitherOrBoth::Right(right)
}

#[inline(always)]
fn both(left: I, right: J) -> Self::Item {
EitherOrBoth::Both(left, right)
}
}

impl<I, J> OrderingOrBool<I, J> for bool {
type Item = Either<I, J>;

#[inline(always)]
fn into_cmp(self) -> Ordering {
if self {
Ordering::Less
} else {
Ordering::Greater
}
}

#[inline(always)]
fn left(left: I) -> Self::Item {
Either::Left(left)
}

#[inline(always)]
fn right(right: J) -> Self::Item {
Either::Right(right)
}

#[inline(always)]
fn both(_left: I, _right: J) -> Self::Item {
unreachable!("into_cmp never returns Ordering::Equal so this should never be called")
}
}

impl<I, J, F, R> Iterator for MergeJoinBy<I, J, F, R>

This comment has been minimized.

Copy link
@phimuemue

phimuemue Jun 11, 2023

Is R rella needed as an additional type parameter in MergeJoinBy?

where I: Iterator,
J: Iterator,
F: FnMut(&I::Item, &J::Item) -> Ordering
F: FnMut(&I::Item, &J::Item) -> R,
R: OrderingOrBool<I::Item, J::Item>,
{
type Item = EitherOrBoth<I::Item, J::Item>;
type Item = R::Item;

fn next(&mut self) -> Option<Self::Item> {
match (self.left.next(), self.right.next()) {
(None, None) => None,
(Some(left), None) =>
Some(EitherOrBoth::Left(left)),
(None, Some(right)) =>
Some(EitherOrBoth::Right(right)),
(Some(left), None) => Some(R::left(left)),
(None, Some(right)) => Some(R::right(right)),
(Some(left), Some(right)) => {
match (self.cmp_fn)(&left, &right) {
Ordering::Equal =>
Some(EitherOrBoth::Both(left, right)),
match (self.cmp_fn)(&left, &right).into_cmp() {
Ordering::Equal => Some(R::both(left, right)),
Ordering::Less => {
self.right.put_back(right);
Some(EitherOrBoth::Left(left))
Some(R::left(left))
},
Ordering::Greater => {
self.left.put_back(left);
Some(EitherOrBoth::Right(right))
Some(R::right(right))
}
}
}
Expand Down Expand Up @@ -106,7 +171,7 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
(None, Some(_right)) => break count + 1 + self.right.into_parts().1.count(),
(Some(left), Some(right)) => {
count += 1;
match (self.cmp_fn)(&left, &right) {
match (self.cmp_fn)(&left, &right).into_cmp() {
Ordering::Equal => {}
Ordering::Less => self.right.put_back(right),
Ordering::Greater => self.left.put_back(left),
Expand All @@ -122,25 +187,25 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
match (self.left.next(), self.right.next()) {
(None, None) => break previous_element,
(Some(left), None) => {
break Some(EitherOrBoth::Left(
break Some(R::left(
self.left.into_parts().1.last().unwrap_or(left),
))
}
(None, Some(right)) => {
break Some(EitherOrBoth::Right(
break Some(R::right(
self.right.into_parts().1.last().unwrap_or(right),
))
}
(Some(left), Some(right)) => {
previous_element = match (self.cmp_fn)(&left, &right) {
Ordering::Equal => Some(EitherOrBoth::Both(left, right)),
previous_element = match (self.cmp_fn)(&left, &right).into_cmp() {
Ordering::Equal => Some(R::both(left, right)),
Ordering::Less => {
self.right.put_back(right);
Some(EitherOrBoth::Left(left))
Some(R::left(left))
}
Ordering::Greater => {
self.left.put_back(left);
Some(EitherOrBoth::Right(right))
Some(R::right(right))
}
}
}
Expand All @@ -156,9 +221,9 @@ impl<I, J, F> Iterator for MergeJoinBy<I, J, F>
n -= 1;
match (self.left.next(), self.right.next()) {
(None, None) => break None,
(Some(_left), None) => break self.left.nth(n).map(EitherOrBoth::Left),
(None, Some(_right)) => break self.right.nth(n).map(EitherOrBoth::Right),
(Some(left), Some(right)) => match (self.cmp_fn)(&left, &right) {
(Some(_left), None) => break self.left.nth(n).map(R::left),
(None, Some(_right)) => break self.right.nth(n).map(R::right),
(Some(left), Some(right)) => match (self.cmp_fn)(&left, &right).into_cmp() {
Ordering::Equal => {}
Ordering::Less => self.right.put_back(right),
Ordering::Greater => self.left.put_back(left),
Expand Down

1 comment on commit 5ad6f70

@phimuemue
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For more info, see rust-itertools#701 (comment).

Please sign in to comment.