diff --git a/benchmarks/benches/lib.rs b/benchmarks/benches/lib.rs index d12d67b3..8bdf014a 100644 --- a/benchmarks/benches/lib.rs +++ b/benchmarks/benches/lib.rs @@ -8,7 +8,7 @@ use criterion::{ Throughput, }; -use roaring::{MultiOps, RoaringBitmap}; +use roaring::{MultiOps, RoaringBitmap, RoaringTreemap}; use crate::datasets::Datasets; @@ -674,6 +674,30 @@ fn insert_range_bitmap(c: &mut Criterion) { } } +fn insert_range_treemap(c: &mut Criterion) { + for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] { + let mut group = c.benchmark_group("insert_range_treemap"); + group.throughput(criterion::Throughput::Elements(size as u64)); + group.bench_function(format!("from_empty_{}", size), |b| { + let bm = RoaringTreemap::new(); + b.iter_batched( + || bm.clone(), + |mut bm| black_box(bm.insert_range(0..size)), + criterion::BatchSize::SmallInput, + ) + }); + group.bench_function(format!("pre_populated_{}", size), |b| { + let mut bm = RoaringTreemap::new(); + bm.insert_range(0..size); + b.iter_batched( + || bm.clone(), + |mut bm| black_box(bm.insert_range(0..size)), + criterion::BatchSize::SmallInput, + ) + }); + } +} + criterion_group!( benches, creation, @@ -691,6 +715,7 @@ criterion_group!( remove, remove_range_bitmap, insert_range_bitmap, + insert_range_treemap, iteration, is_empty, serialization, diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index 2e61dc48..48fb304f 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -23,6 +23,10 @@ impl Container { pub fn new(key: u16) -> Container { Container { key, store: Store::new() } } + + pub fn full(key: u16) -> Container { + Container { key, store: Store::full() } + } } impl Container { diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index 6cd87c5d..94f4b314 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -13,12 +13,24 @@ impl RoaringBitmap { /// /// ```rust /// use roaring::RoaringBitmap; - /// let mut rb = RoaringBitmap::new(); + /// let rb = RoaringBitmap::new(); /// ``` pub fn new() -> RoaringBitmap { RoaringBitmap { containers: Vec::new() } } + /// Creates a full `RoaringBitmap`. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// let rb = RoaringBitmap::full(); + /// ``` + pub fn full() -> RoaringBitmap { + RoaringBitmap { containers: (0..=u16::MAX).map(Container::full).collect() } + } + /// Adds a value to the set. /// /// Returns whether the value was absent from the set. @@ -428,6 +440,22 @@ impl RoaringBitmap { self.containers.is_empty() } + /// Returns `true` if there are every possible integers in this set. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::full(); + /// assert!(!rb.is_empty()); + /// assert!(rb.is_full()); + /// ``` + pub fn is_full(&self) -> bool { + self.containers.len() == (u16::MAX as usize + 1) + && self.containers.iter().all(Container::is_full) + } + /// Returns the number of distinct integers added to the set. /// /// # Examples diff --git a/src/bitmap/store/bitmap_store.rs b/src/bitmap/store/bitmap_store.rs index e7aa2ae0..76e121e2 100644 --- a/src/bitmap/store/bitmap_store.rs +++ b/src/bitmap/store/bitmap_store.rs @@ -18,6 +18,10 @@ impl BitmapStore { BitmapStore { len: 0, bits: Box::new([0; BITMAP_LENGTH]) } } + pub fn full() -> BitmapStore { + BitmapStore { len: (BITMAP_LENGTH as u64) * 64, bits: Box::new([u64::MAX; BITMAP_LENGTH]) } + } + pub fn try_from(len: u64, bits: Box<[u64; BITMAP_LENGTH]>) -> Result { let actual_len = bits.iter().map(|v| v.count_ones() as u64).sum(); if len != actual_len { diff --git a/src/bitmap/store/mod.rs b/src/bitmap/store/mod.rs index 72dd832f..0ebc150b 100644 --- a/src/bitmap/store/mod.rs +++ b/src/bitmap/store/mod.rs @@ -31,6 +31,10 @@ impl Store { Store::Array(ArrayStore::new()) } + pub fn full() -> Store { + Store::Bitmap(BitmapStore::full()) + } + pub fn insert(&mut self, index: u16) -> bool { match self { Array(vec) => vec.insert(index), diff --git a/src/treemap/inherent.rs b/src/treemap/inherent.rs index 5ffb9db9..45badbce 100644 --- a/src/treemap/inherent.rs +++ b/src/treemap/inherent.rs @@ -1,4 +1,5 @@ use std::collections::btree_map::{BTreeMap, Entry}; +use std::iter; use std::ops::RangeBounds; use crate::RoaringBitmap; @@ -13,12 +14,24 @@ impl RoaringTreemap { /// /// ```rust /// use roaring::RoaringTreemap; - /// let mut rb = RoaringTreemap::new(); + /// let rb = RoaringTreemap::new(); /// ``` pub fn new() -> RoaringTreemap { RoaringTreemap { map: BTreeMap::new() } } + /// Creates a full `RoaringTreemap`. + /// + /// # Examples + /// + /// ```rust,ignore + /// use roaring::RoaringTreemap; + /// let rb = RoaringTreemap::full(); + /// ``` + pub fn full() -> RoaringTreemap { + RoaringTreemap { map: (0..=u32::MAX).zip(iter::repeat(RoaringBitmap::full())).collect() } + } + /// Adds a value to the set. Returns `true` if the value was not already present in the set. /// /// # Examples @@ -36,6 +49,60 @@ impl RoaringTreemap { self.map.entry(hi).or_insert_with(RoaringBitmap::new).insert(lo) } + /// Inserts a range of values. + /// + /// Returns the number of inserted values. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringTreemap; + /// + /// let mut rb = RoaringTreemap::new(); + /// rb.insert_range(2..4); + /// assert!(rb.contains(2)); + /// assert!(rb.contains(3)); + /// assert!(!rb.contains(4)); + /// ``` + pub fn insert_range>(&mut self, range: R) -> u64 { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + None => return 0, + }; + + let (start_hi, start_lo) = util::split(start); + let (end_hi, end_lo) = util::split(end); + + let mut counter = 0u64; + + // Split the input range by the leading 32 bits + for hi in start_hi..=end_hi { + let entry = self.map.entry(hi); + + // Calculate the sub-range from the lower 32 bits + counter += if hi == end_hi && hi == start_hi { + entry.or_insert_with(RoaringBitmap::new).insert_range(start_lo..=end_lo) + } else if hi == start_hi { + entry.or_insert_with(RoaringBitmap::new).insert_range(start_lo..=u32::MAX) + } else if hi == end_hi { + entry.or_insert_with(RoaringBitmap::new).insert_range(0..=end_lo) + } else { + // We insert a full bitmap if it doesn't already exist and return the size of it. + // But if the bitmap already exists at this spot we replace it with a full bitmap + // and specify that we didn't inserted the integers from the previous bitmap. + let full_bitmap = RoaringBitmap::full(); + match entry { + Entry::Vacant(entry) => entry.insert(full_bitmap).len(), + Entry::Occupied(mut entry) => { + full_bitmap.len() - entry.insert(full_bitmap).len() + } + } + }; + } + + counter + } + /// Pushes `value` in the treemap only if it is greater than the current maximum value. /// /// Returns whether the value was inserted. @@ -58,7 +125,6 @@ impl RoaringTreemap { self.map.entry(hi).or_insert_with(RoaringBitmap::new).push(lo) } - /// /// Pushes `value` in the treemap only if it is greater than the current maximum value. /// It is up to the caller to have validated index > self.max() /// @@ -213,6 +279,21 @@ impl RoaringTreemap { self.map.values().all(RoaringBitmap::is_empty) } + /// Returns `true` if there are every possible integers in this set. + /// + /// # Examples + /// + /// ```rust,ignore + /// use roaring::RoaringTreemap; + /// + /// let mut rb = RoaringTreemap::full(); + /// assert!(!rb.is_empty()); + /// assert!(rb.is_full()); + /// ``` + pub fn is_full(&self) -> bool { + self.map.len() == (u32::MAX as usize + 1) && self.map.values().all(RoaringBitmap::is_full) + } + /// Returns the number of distinct integers added to the set. /// /// # Examples diff --git a/tests/treemap_lib.rs b/tests/treemap_lib.rs index 894196de..0c302af9 100644 --- a/tests/treemap_lib.rs +++ b/tests/treemap_lib.rs @@ -13,11 +13,11 @@ fn smoke() { assert!(bitmap.contains(1)); assert_eq!(bitmap.len(), 1); assert!(!bitmap.is_empty()); - bitmap.insert(u64::max_value() - 2); - assert!(bitmap.contains(u64::max_value() - 2)); + bitmap.insert(u64::MAX - 2); + assert!(bitmap.contains(u64::MAX - 2)); assert_eq!(bitmap.len(), 2); - bitmap.insert(u64::max_value()); - assert!(bitmap.contains(u64::max_value())); + bitmap.insert(u64::MAX); + assert!(bitmap.contains(u64::MAX)); assert_eq!(bitmap.len(), 3); bitmap.insert(2); assert!(bitmap.contains(2)); @@ -28,9 +28,33 @@ fn smoke() { assert!(!bitmap.contains(0)); assert!(bitmap.contains(1)); assert!(!bitmap.contains(100)); - assert!(bitmap.contains(u64::max_value() - 2)); - assert!(!bitmap.contains(u64::max_value() - 1)); - assert!(bitmap.contains(u64::max_value())); + assert!(bitmap.contains(u64::MAX - 2)); + assert!(!bitmap.contains(u64::MAX - 1)); + assert!(bitmap.contains(u64::MAX)); +} + +#[test] +fn insert_range() { + let ranges = 0..0x1000; + const SIGMA: u64 = u32::MAX as u64; + + let mut bitmap = RoaringTreemap::new(); + assert_eq!(bitmap.insert_range(ranges), 0x1000); + assert_eq!(bitmap.len(), 0x1000); + assert_eq!(bitmap.max(), Some(0xFFF)); + + assert_eq!(bitmap.insert_range(u32::MAX as u64 - 1..u32::MAX as u64 + 1), 2); + assert!(bitmap.contains(2)); + assert!(bitmap.contains(0xFFF)); + assert!(!bitmap.contains(0x1000)); + + bitmap.clear(); + bitmap.insert_range(2 * SIGMA..=4 * SIGMA); + + assert_eq!(bitmap.min(), Some(2 * SIGMA)); + assert_eq!(bitmap.max(), Some(4 * SIGMA)); + + assert!(bitmap.contains(3 * SIGMA)); } #[test] @@ -53,16 +77,16 @@ fn test_max() { assert_eq!(bitmap.max(), Some(0)); bitmap.insert(1); assert_eq!(bitmap.max(), Some(1)); - bitmap.insert(u64::max_value()); - assert_eq!(bitmap.max(), Some(u64::max_value())); + bitmap.insert(u64::MAX); + assert_eq!(bitmap.max(), Some(u64::MAX)); } #[test] fn test_min() { let mut bitmap = RoaringTreemap::new(); assert_eq!(bitmap.min(), None); - bitmap.insert(u64::max_value()); - assert_eq!(bitmap.min(), Some(u64::max_value())); + bitmap.insert(u64::MAX); + assert_eq!(bitmap.min(), Some(u64::MAX)); bitmap.insert(1); assert_eq!(bitmap.min(), Some(1)); bitmap.insert(0);