Skip to content

Commit

Permalink
Try #106:
Browse files Browse the repository at this point in the history
  • Loading branch information
bors[bot] authored Jun 1, 2021
2 parents 7f0fb5f + 4071a6d commit 2b57f43
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 76 deletions.
4 changes: 2 additions & 2 deletions benchmarks/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ fn successive_and(c: &mut Criterion) {

let mut bitmaps: Vec<_> = parsed_numbers
.into_iter()
.map(|(_, r)| r.map(RoaringBitmap::from_sorted_iter).unwrap())
.map(|(_, r)| r.map(|iter| RoaringBitmap::from_sorted_iter(iter).unwrap()).unwrap())
.collect();

// biggest bitmaps first.
Expand Down Expand Up @@ -401,7 +401,7 @@ fn successive_or(c: &mut Criterion) {

let bitmaps: Vec<_> = parsed_numbers
.into_iter()
.map(|(_, r)| r.map(RoaringBitmap::from_sorted_iter).unwrap())
.map(|(_, r)| r.map(|iter| RoaringBitmap::from_sorted_iter(iter).unwrap()).unwrap())
.collect();

let mut group = c.benchmark_group("Successive Or");
Expand Down
38 changes: 11 additions & 27 deletions src/bitmap/cmp.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::cmp::Ordering;
use std::iter::Peekable;
use std::slice;

Expand Down Expand Up @@ -32,9 +33,7 @@ impl RoaringBitmap {
///
/// ```
pub fn is_disjoint(&self, other: &Self) -> bool {
self.pairs(other)
.filter(|&(c1, c2)| c1.is_some() && c2.is_some())
.all(|(c1, c2)| c1.unwrap().is_disjoint(c2.unwrap()))
self.pairs(other).filter_map(|(c1, c2)| c1.zip(c2)).all(|(c1, c2)| c1.is_disjoint(c2))
}

/// Returns `true` if this set is a subset of `other`.
Expand Down Expand Up @@ -63,9 +62,7 @@ impl RoaringBitmap {
for pair in self.pairs(other) {
match pair {
(None, _) => (),
(_, None) => {
return false;
}
(_, None) => return false,
(Some(c1), Some(c2)) => {
if !c1.is_subset(c2) {
return false;
Expand Down Expand Up @@ -107,28 +104,15 @@ impl<'a> Iterator for Pairs<'a> {
type Item = (Option<&'a Container>, Option<&'a Container>);

fn next(&mut self) -> Option<Self::Item> {
enum Which {
Left,
Right,
Both,
None,
}
let which = match (self.0.peek(), self.1.peek()) {
(None, None) => Which::None,
(Some(_), None) => Which::Left,
(None, Some(_)) => Which::Right,
(Some(c1), Some(c2)) => match (c1.key, c2.key) {
(key1, key2) if key1 == key2 => Which::Both,
(key1, key2) if key1 < key2 => Which::Left,
(key1, key2) if key1 > key2 => Which::Right,
(_, _) => unreachable!(),
match (self.0.peek(), self.1.peek()) {
(None, None) => None,
(Some(_), None) => Some((self.0.next(), None)),
(None, Some(_)) => Some((None, self.1.next())),
(Some(c1), Some(c2)) => match c1.key.cmp(&c2.key) {
Ordering::Equal => Some((self.0.next(), self.1.next())),
Ordering::Less => Some((self.0.next(), None)),
Ordering::Greater => Some((None, self.1.next())),
},
};
match which {
Which::Left => Some((self.0.next(), None)),
Which::Right => Some((None, self.1.next())),
Which::Both => Some((self.0.next(), self.1.next())),
Which::None => None,
}
}
}
81 changes: 53 additions & 28 deletions src/bitmap/iter.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::convert::identity;
use std::iter::{self, FromIterator};
use std::slice;
use std::vec;
use std::{slice, vec};

use super::container::Container;
use crate::RoaringBitmap;
use crate::{NonSortedIntegers, RoaringBitmap};

/// An iterator for `RoaringBitmap`.
pub struct Iter<'a> {
Expand All @@ -21,27 +21,21 @@ pub struct IntoIter {
size_hint: u64,
}

impl<'a> Iter<'a> {
impl Iter<'_> {
fn new(containers: &[Container]) -> Iter {
fn identity<T>(t: T) -> T {
t
}
let size_hint = containers.iter().map(|c| c.len).sum();
Iter { inner: containers.iter().flat_map(identity as _), size_hint }
Iter { inner: containers.iter().flat_map(identity), size_hint }
}
}

impl IntoIter {
fn new(containers: Vec<Container>) -> IntoIter {
fn identity<T>(t: T) -> T {
t
}
let size_hint = containers.iter().map(|c| c.len).sum();
IntoIter { inner: containers.into_iter().flat_map(identity as _), size_hint }
IntoIter { inner: containers.into_iter().flat_map(identity), size_hint }
}
}

impl<'a> Iterator for Iter<'a> {
impl Iterator for Iter<'_> {
type Item = u32;

fn next(&mut self) -> Option<u32> {
Expand Down Expand Up @@ -131,44 +125,75 @@ impl Extend<u32> for RoaringBitmap {
}

impl RoaringBitmap {
/// Create the set from a sorted iterator. Values **must** be sorted.
/// Create the set from a sorted iterator. Values must be sorted and deduplicated.
///
/// This method can be faster than `from_iter` because it skips the binary searches.
/// The values of the iterator must be ordered and strictly greater than the greatest value
/// in the set. If a value in the iterator doesn't satisfy this requirement, it is not added
/// and the append operation is stopped.
///
/// # Examples
/// Returns `Ok` with the requested `RoaringBitmap`, `Err` with the number of elements
/// that were correctly appended before failure.
///
/// # Example: Create a set from an ordered list of integers.
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::from_sorted_iter(0..10);
/// let mut rb = RoaringBitmap::from_sorted_iter(0..10).unwrap();
///
/// assert_eq!(rb.iter().collect::<Vec<u32>>(), (0..10).collect::<Vec<u32>>());
/// assert!(rb.iter().eq(0..10));
/// ```
pub fn from_sorted_iter<I: IntoIterator<Item = u32>>(iterator: I) -> RoaringBitmap {
///
/// # Example: Try to create a set from a non-ordered list of integers.
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let integers = 0..10u32;
/// let error = RoaringBitmap::from_sorted_iter(integers.rev()).unwrap_err();
///
/// assert_eq!(error.valid_until(), 1);
/// ```
pub fn from_sorted_iter<I: IntoIterator<Item = u32>>(
iterator: I,
) -> Result<RoaringBitmap, NonSortedIntegers> {
let mut rb = RoaringBitmap::new();
rb.append(iterator);
rb
rb.append(iterator).map(|_| rb)
}

/// Extend the set with a sorted iterator.
/// All value of the iterator **must** be strictly bigger than the max element
/// contained in the set.
///
/// This method can be faster than `extend` because it skips the binary searches.
/// The values of the iterator must be ordered and strictly greater than the greatest value
/// in the set. If a value in the iterator doesn't satisfy this requirement, it is not added
/// and the append operation is stopped.
///
/// Returns `Ok` with the number of elements appended to the set, `Err` with
/// the number of elements we effectively appended before an error occurred.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::new();
/// rb.append(0..10);
/// assert_eq!(rb.append(0..10), Ok(10));
///
/// assert_eq!(rb.iter().collect::<Vec<u32>>(), (0..10).collect::<Vec<u32>>());
/// assert!(rb.iter().eq(0..10));
/// ```
pub fn append<I: IntoIterator<Item = u32>>(&mut self, iterator: I) {
pub fn append<I: IntoIterator<Item = u32>>(
&mut self,
iterator: I,
) -> Result<u64, NonSortedIntegers> {
let mut count = 0;

for value in iterator {
self.push(value);
if self.push(value) {
count += 1;
} else {
return Err(NonSortedIntegers { valid_until: count });
}
}

Ok(count)
}
}
24 changes: 24 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

extern crate byteorder;

use std::error::Error;
use std::fmt;

/// A compressed bitmap using the [Roaring bitmap compression scheme](http://roaringbitmap.org).
pub mod bitmap;

Expand All @@ -21,3 +24,24 @@ pub mod treemap;

pub use bitmap::RoaringBitmap;
pub use treemap::RoaringTreemap;

/// An error type that is returned when an iterator isn't sorted.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct NonSortedIntegers {
valid_until: u64,
}

impl NonSortedIntegers {
/// Returns the number of elements that were
pub fn valid_until(&self) -> u64 {
self.valid_until
}
}

impl fmt::Display for NonSortedIntegers {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "integers are ordered up to the {}th element", self.valid_until())
}
}

impl Error for NonSortedIntegers {}
51 changes: 35 additions & 16 deletions src/treemap/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ use std::iter::{self, FromIterator};
use super::util;
use crate::bitmap::IntoIter as IntoIter32;
use crate::bitmap::Iter as Iter32;
use crate::RoaringBitmap;
use crate::RoaringTreemap;
use crate::{NonSortedIntegers, RoaringBitmap, RoaringTreemap};

struct To64Iter<'a> {
hi: u32,
Expand Down Expand Up @@ -208,30 +207,39 @@ impl Extend<u64> for RoaringTreemap {
}

impl RoaringTreemap {
/// Create the set from a sorted iterator. Values **must** be sorted.
/// Create the set from a sorted iterator. Values must be sorted and deduplicated.
///
/// This method can be faster than `from_iter` because it skips the binary searches.
/// The values of the iterator must be ordered and strictly greater than the greatest value
/// in the set. If a value in the iterator doesn't satisfy this requirement, it is not added
/// and the append operation is stopped.
///
/// Returns `Ok` with the requested `RoaringTreemap`, `Err` with the number of elements
/// we tried to append before an error occurred.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringTreemap;
///
/// let mut rb = RoaringTreemap::from_sorted_iter(0..10);
/// let mut rb = RoaringTreemap::from_sorted_iter(0..10).unwrap();
///
/// assert_eq!(rb.iter().collect::<Vec<u64>>(), (0..10).collect::<Vec<u64>>());
/// assert!(rb.iter().eq(0..10));
/// ```
pub fn from_sorted_iter<I: IntoIterator<Item = u64>>(iterator: I) -> RoaringTreemap {
let mut rb = RoaringTreemap::new();
rb.append(iterator);
rb
pub fn from_sorted_iter<I: IntoIterator<Item = u64>>(
iterator: I,
) -> Result<RoaringTreemap, NonSortedIntegers> {
let mut rt = RoaringTreemap::new();
rt.append(iterator).map(|_| rt)
}

/// Extend the set with a sorted iterator.
/// All value of the iterator **must** be greater or equal than the max element
/// contained in the set.
///
/// This method can be faster than `extend` because it skips the binary searches.
/// The values of the iterator must be ordered and strictly greater than the greatest value
/// in the set. If a value in the iterator doesn't satisfy this requirement, it is not added
/// and the append operation is stopped.
///
/// Returns `Ok` with the number of elements appended to the set, `Err` with
/// the number of elements we effectively appended before an error occurred.
///
/// # Examples
///
Expand All @@ -241,12 +249,23 @@ impl RoaringTreemap {
/// let mut rb = RoaringTreemap::new();
/// rb.append(0..10);
///
/// assert_eq!(rb.iter().collect::<Vec<u64>>(), (0..10).collect::<Vec<u64>>());
/// assert!(rb.iter().eq(0..10));
/// ```
pub fn append<I: IntoIterator<Item = u64>>(&mut self, iterator: I) {
pub fn append<I: IntoIterator<Item = u64>>(
&mut self,
iterator: I,
) -> Result<u64, NonSortedIntegers> {
let mut count = 0;

for value in iterator {
self.push(value);
if self.push(value) {
count += 1;
} else {
return Err(NonSortedIntegers { valid_until: count });
}
}

Ok(count)
}
}

Expand Down
6 changes: 3 additions & 3 deletions tests/push.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::iter::FromIterator;
macro_rules! test_from_sorted_iter {
($values: expr, $class: ty) => {{
let rb1 = <$class>::from_iter($values.clone());
let rb2 = <$class>::from_sorted_iter($values);
let rb2 = <$class>::from_sorted_iter($values).unwrap();

for (x, y) in rb1.iter().zip(rb2.iter()) {
assert_eq!(x, y);
Expand All @@ -22,11 +22,11 @@ macro_rules! test_from_sorted_iter {
#[test]
fn append() {
test_from_sorted_iter!((0..1_000_000).map(|x| 13 * x).collect::<Vec<u32>>(), RoaringBitmap);
test_from_sorted_iter!(vec![1, 2, 4, 5, 5, 7, 8, 8, 9], RoaringBitmap);
test_from_sorted_iter!(vec![1, 2, 4, 5, 7, 8, 9], RoaringBitmap);
}

#[test]
fn append_tree() {
test_from_sorted_iter!((0..1_000_000).map(|x| 13 * x).collect::<Vec<u64>>(), RoaringTreemap);
test_from_sorted_iter!(vec![1, 2, 4, 5, 5, 7, 8, 8, 9], RoaringTreemap);
test_from_sorted_iter!(vec![1, 2, 4, 5, 7, 8, 9], RoaringTreemap);
}

0 comments on commit 2b57f43

Please sign in to comment.