Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify the from_sorted_iter and append methods to return a Result #106

Merged
merged 5 commits into from
Oct 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions benchmarks/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ fn from_sorted_iter(c: &mut Criterion) {
b.iter(|| {
for (_, numbers) in &parsed_numbers {
let numbers = numbers.as_ref().unwrap();
RoaringBitmap::from_sorted_iter(numbers.iter().copied());
RoaringBitmap::from_sorted_iter(numbers.iter().copied()).unwrap();
}
})
});
Expand All @@ -357,7 +357,7 @@ fn successive_and(c: &mut Criterion) {

let mut bitmaps: Vec<_> = parsed_numbers
.into_iter()
.map(|(_, r)| r.map(RoaringBitmap::from_sorted_iter).unwrap())
.map(|(_, r)| r.map(|iter| RoaringBitmap::from_sorted_iter(iter).unwrap()).unwrap())
.collect();

// biggest bitmaps first.
Expand Down Expand Up @@ -410,7 +410,7 @@ fn successive_or(c: &mut Criterion) {

let bitmaps: Vec<_> = parsed_numbers
.into_iter()
.map(|(_, r)| r.map(RoaringBitmap::from_sorted_iter).unwrap())
.map(|(_, r)| r.map(|iter| RoaringBitmap::from_sorted_iter(iter).unwrap()).unwrap())
.collect();

let mut group = c.benchmark_group("Successive Or");
Expand Down
77 changes: 51 additions & 26 deletions src/bitmap/iter.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::convert::identity;
use std::iter::{self, FromIterator};
use std::slice;
use std::vec;
use std::{slice, vec};

use super::container::Container;
use crate::RoaringBitmap;
use crate::{NonSortedIntegers, RoaringBitmap};

/// An iterator for `RoaringBitmap`.
pub struct Iter<'a> {
Expand All @@ -21,27 +21,21 @@ pub struct IntoIter {
size_hint: u64,
}

impl<'a> Iter<'a> {
impl Iter<'_> {
fn new(containers: &[Container]) -> Iter {
fn identity<T>(t: T) -> T {
t
}
let size_hint = containers.iter().map(|c| c.len).sum();
Iter { inner: containers.iter().flat_map(identity as _), size_hint }
}
}

impl IntoIter {
fn new(containers: Vec<Container>) -> IntoIter {
fn identity<T>(t: T) -> T {
t
}
let size_hint = containers.iter().map(|c| c.len).sum();
IntoIter { inner: containers.into_iter().flat_map(identity as _), size_hint }
}
}

impl<'a> Iterator for Iter<'a> {
impl Iterator for Iter<'_> {
type Item = u32;

fn next(&mut self) -> Option<u32> {
Expand Down Expand Up @@ -131,44 +125,75 @@ impl Extend<u32> for RoaringBitmap {
}

impl RoaringBitmap {
/// Create the set from a sorted iterator. Values **must** be sorted.
/// Create the set from a sorted iterator. Values must be sorted and deduplicated.
///
/// This method can be faster than `from_iter` because it skips the binary searches.
/// The values of the iterator must be ordered and strictly greater than the greatest value
/// in the set. If a value in the iterator doesn't satisfy this requirement, it is not added
/// and the append operation is stopped.
///
/// # Examples
/// Returns `Ok` with the requested `RoaringBitmap`, `Err` with the number of elements
/// that were correctly appended before failure.
///
/// # Example: Create a set from an ordered list of integers.
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::from_sorted_iter(0..10);
/// let mut rb = RoaringBitmap::from_sorted_iter(0..10).unwrap();
///
/// assert_eq!(rb.iter().collect::<Vec<u32>>(), (0..10).collect::<Vec<u32>>());
/// assert!(rb.iter().eq(0..10));
/// ```
pub fn from_sorted_iter<I: IntoIterator<Item = u32>>(iterator: I) -> RoaringBitmap {
///
/// # Example: Try to create a set from a non-ordered list of integers.
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let integers = 0..10u32;
/// let error = RoaringBitmap::from_sorted_iter(integers.rev()).unwrap_err();
///
/// assert_eq!(error.valid_until(), 1);
/// ```
pub fn from_sorted_iter<I: IntoIterator<Item = u32>>(
iterator: I,
) -> Result<RoaringBitmap, NonSortedIntegers> {
let mut rb = RoaringBitmap::new();
rb.append(iterator);
rb
rb.append(iterator).map(|_| rb)
}

/// Extend the set with a sorted iterator.
/// All value of the iterator **must** be strictly bigger than the max element
/// contained in the set.
///
/// This method can be faster than `extend` because it skips the binary searches.
/// The values of the iterator must be ordered and strictly greater than the greatest value
/// in the set. If a value in the iterator doesn't satisfy this requirement, it is not added
/// and the append operation is stopped.
///
/// Returns `Ok` with the number of elements appended to the set, `Err` with
/// the number of elements we effectively appended before an error occurred.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::new();
/// rb.append(0..10);
/// assert_eq!(rb.append(0..10), Ok(10));
///
/// assert_eq!(rb.iter().collect::<Vec<u32>>(), (0..10).collect::<Vec<u32>>());
/// assert!(rb.iter().eq(0..10));
/// ```
pub fn append<I: IntoIterator<Item = u32>>(&mut self, iterator: I) {
pub fn append<I: IntoIterator<Item = u32>>(
&mut self,
iterator: I,
) -> Result<u64, NonSortedIntegers> {
let mut count = 0;

for value in iterator {
self.push(value);
if self.push(value) {
count += 1;
} else {
return Err(NonSortedIntegers { valid_until: count });
}
}

Ok(count)
}
}
24 changes: 24 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

extern crate byteorder;

use std::error::Error;
use std::fmt;

/// A compressed bitmap using the [Roaring bitmap compression scheme](http://roaringbitmap.org).
pub mod bitmap;

Expand All @@ -21,3 +24,24 @@ pub mod treemap;

pub use bitmap::RoaringBitmap;
pub use treemap::RoaringTreemap;

/// An error type that is returned when an iterator isn't sorted.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct NonSortedIntegers {
valid_until: u64,
}

impl NonSortedIntegers {
/// Returns the number of elements that were
pub fn valid_until(&self) -> u64 {
self.valid_until
}
}

impl fmt::Display for NonSortedIntegers {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "integers are ordered up to the {}th element", self.valid_until())
}
}

impl Error for NonSortedIntegers {}
51 changes: 35 additions & 16 deletions src/treemap/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ use std::iter::{self, FromIterator};
use super::util;
use crate::bitmap::IntoIter as IntoIter32;
use crate::bitmap::Iter as Iter32;
use crate::RoaringBitmap;
use crate::RoaringTreemap;
use crate::{NonSortedIntegers, RoaringBitmap, RoaringTreemap};

struct To64Iter<'a> {
hi: u32,
Expand Down Expand Up @@ -208,30 +207,39 @@ impl Extend<u64> for RoaringTreemap {
}

impl RoaringTreemap {
/// Create the set from a sorted iterator. Values **must** be sorted.
/// Create the set from a sorted iterator. Values must be sorted and deduplicated.
///
/// This method can be faster than `from_iter` because it skips the binary searches.
/// The values of the iterator must be ordered and strictly greater than the greatest value
/// in the set. If a value in the iterator doesn't satisfy this requirement, it is not added
/// and the append operation is stopped.
///
/// Returns `Ok` with the requested `RoaringTreemap`, `Err` with the number of elements
/// we tried to append before an error occurred.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringTreemap;
///
/// let mut rb = RoaringTreemap::from_sorted_iter(0..10);
/// let mut rb = RoaringTreemap::from_sorted_iter(0..10).unwrap();
///
/// assert_eq!(rb.iter().collect::<Vec<u64>>(), (0..10).collect::<Vec<u64>>());
/// assert!(rb.iter().eq(0..10));
/// ```
pub fn from_sorted_iter<I: IntoIterator<Item = u64>>(iterator: I) -> RoaringTreemap {
let mut rb = RoaringTreemap::new();
rb.append(iterator);
rb
pub fn from_sorted_iter<I: IntoIterator<Item = u64>>(
iterator: I,
) -> Result<RoaringTreemap, NonSortedIntegers> {
let mut rt = RoaringTreemap::new();
rt.append(iterator).map(|_| rt)
}

/// Extend the set with a sorted iterator.
/// All value of the iterator **must** be greater or equal than the max element
/// contained in the set.
///
/// This method can be faster than `extend` because it skips the binary searches.
/// The values of the iterator must be ordered and strictly greater than the greatest value
/// in the set. If a value in the iterator doesn't satisfy this requirement, it is not added
/// and the append operation is stopped.
///
/// Returns `Ok` with the number of elements appended to the set, `Err` with
/// the number of elements we effectively appended before an error occurred.
///
/// # Examples
///
Expand All @@ -241,12 +249,23 @@ impl RoaringTreemap {
/// let mut rb = RoaringTreemap::new();
/// rb.append(0..10);
///
/// assert_eq!(rb.iter().collect::<Vec<u64>>(), (0..10).collect::<Vec<u64>>());
/// assert!(rb.iter().eq(0..10));
/// ```
pub fn append<I: IntoIterator<Item = u64>>(&mut self, iterator: I) {
pub fn append<I: IntoIterator<Item = u64>>(
&mut self,
iterator: I,
) -> Result<u64, NonSortedIntegers> {
let mut count = 0;

for value in iterator {
self.push(value);
if self.push(value) {
count += 1;
} else {
return Err(NonSortedIntegers { valid_until: count });
}
}

Ok(count)
}
}

Expand Down
6 changes: 3 additions & 3 deletions tests/push.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::iter::FromIterator;
macro_rules! test_from_sorted_iter {
($values: expr, $class: ty) => {{
let rb1 = <$class>::from_iter($values.clone());
let rb2 = <$class>::from_sorted_iter($values);
let rb2 = <$class>::from_sorted_iter($values).unwrap();

for (x, y) in rb1.iter().zip(rb2.iter()) {
assert_eq!(x, y);
Expand All @@ -22,11 +22,11 @@ macro_rules! test_from_sorted_iter {
#[test]
fn append() {
test_from_sorted_iter!((0..1_000_000).map(|x| 13 * x).collect::<Vec<u32>>(), RoaringBitmap);
test_from_sorted_iter!(vec![1, 2, 4, 5, 5, 7, 8, 8, 9], RoaringBitmap);
test_from_sorted_iter!(vec![1, 2, 4, 5, 7, 8, 9], RoaringBitmap);
}

#[test]
fn append_tree() {
test_from_sorted_iter!((0..1_000_000).map(|x| 13 * x).collect::<Vec<u64>>(), RoaringTreemap);
test_from_sorted_iter!(vec![1, 2, 4, 5, 5, 7, 8, 8, 9], RoaringTreemap);
test_from_sorted_iter!(vec![1, 2, 4, 5, 7, 8, 9], RoaringTreemap);
}