From b9e5daba7e39cb036bad620e51390424285d9b5c Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Sun, 26 Jun 2022 01:35:20 +0200 Subject: [PATCH 1/6] Add a naive implementation of `insert_range` for `RoaringTreemap` --- src/treemap/inherent.rs | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/treemap/inherent.rs b/src/treemap/inherent.rs index 5ffb9db9c..5f9fd9bda 100644 --- a/src/treemap/inherent.rs +++ b/src/treemap/inherent.rs @@ -36,6 +36,52 @@ impl RoaringTreemap { self.map.entry(hi).or_insert_with(RoaringBitmap::new).insert(lo) } + /// Inserts a range of values. + /// Returns the number of inserted values. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringTreemap; + /// + /// let mut rb = RoaringTreemap::new(); + /// rb.insert_range(2..4); + /// assert!(rb.contains(2)); + /// assert!(rb.contains(3)); + /// assert!(!rb.contains(4)); + /// ``` + pub fn insert_range>(&mut self, range: R) -> u64 { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + None => return 0, + }; + + let (start_hi, start_lo) = util::split(start); + let (end_hi, end_lo) = util::split(end); + + let mut counter = 0u64; + + // Split the input range by the leading 32 bits + for hi in start_hi..=end_hi { + // Calculate the sub-range from the lower 32 bits + let range = if hi == end_hi && hi == start_hi { + start_lo..=end_lo + } else if hi == start_hi { + start_lo..=u32::MAX + } else if hi == end_hi { + 0..=end_lo + } else { + // This is pretty expensive, we can definitely pre-calculate what a full + // `RoaringBitmap` looks like so we might as well use it here. + 0..=u32::MAX + }; + + counter += self.map.entry(hi).or_insert_with(RoaringBitmap::new).insert_range(range) + } + + counter + } + /// Pushes `value` in the treemap only if it is greater than the current maximum value. /// /// Returns whether the value was inserted. From 1a27904b1c75ca892531b0b0780b20a9664214b9 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Sun, 26 Jun 2022 01:35:39 +0200 Subject: [PATCH 2/6] Add tests for the naive implementation of `insert_range` for `RoaringTreemap` --- tests/treemap_lib.rs | 46 +++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/tests/treemap_lib.rs b/tests/treemap_lib.rs index 894196de0..0c302af9b 100644 --- a/tests/treemap_lib.rs +++ b/tests/treemap_lib.rs @@ -13,11 +13,11 @@ fn smoke() { assert!(bitmap.contains(1)); assert_eq!(bitmap.len(), 1); assert!(!bitmap.is_empty()); - bitmap.insert(u64::max_value() - 2); - assert!(bitmap.contains(u64::max_value() - 2)); + bitmap.insert(u64::MAX - 2); + assert!(bitmap.contains(u64::MAX - 2)); assert_eq!(bitmap.len(), 2); - bitmap.insert(u64::max_value()); - assert!(bitmap.contains(u64::max_value())); + bitmap.insert(u64::MAX); + assert!(bitmap.contains(u64::MAX)); assert_eq!(bitmap.len(), 3); bitmap.insert(2); assert!(bitmap.contains(2)); @@ -28,9 +28,33 @@ fn smoke() { assert!(!bitmap.contains(0)); assert!(bitmap.contains(1)); assert!(!bitmap.contains(100)); - assert!(bitmap.contains(u64::max_value() - 2)); - assert!(!bitmap.contains(u64::max_value() - 1)); - assert!(bitmap.contains(u64::max_value())); + assert!(bitmap.contains(u64::MAX - 2)); + assert!(!bitmap.contains(u64::MAX - 1)); + assert!(bitmap.contains(u64::MAX)); +} + +#[test] +fn insert_range() { + let ranges = 0..0x1000; + const SIGMA: u64 = u32::MAX as u64; + + let mut bitmap = RoaringTreemap::new(); + assert_eq!(bitmap.insert_range(ranges), 0x1000); + assert_eq!(bitmap.len(), 0x1000); + assert_eq!(bitmap.max(), Some(0xFFF)); + + assert_eq!(bitmap.insert_range(u32::MAX as u64 - 1..u32::MAX as u64 + 1), 2); + assert!(bitmap.contains(2)); + assert!(bitmap.contains(0xFFF)); + assert!(!bitmap.contains(0x1000)); + + bitmap.clear(); + bitmap.insert_range(2 * SIGMA..=4 * SIGMA); + + assert_eq!(bitmap.min(), Some(2 * SIGMA)); + assert_eq!(bitmap.max(), Some(4 * SIGMA)); + + assert!(bitmap.contains(3 * SIGMA)); } #[test] @@ -53,16 +77,16 @@ fn test_max() { assert_eq!(bitmap.max(), Some(0)); bitmap.insert(1); assert_eq!(bitmap.max(), Some(1)); - bitmap.insert(u64::max_value()); - assert_eq!(bitmap.max(), Some(u64::max_value())); + bitmap.insert(u64::MAX); + assert_eq!(bitmap.max(), Some(u64::MAX)); } #[test] fn test_min() { let mut bitmap = RoaringTreemap::new(); assert_eq!(bitmap.min(), None); - bitmap.insert(u64::max_value()); - assert_eq!(bitmap.min(), Some(u64::max_value())); + bitmap.insert(u64::MAX); + assert_eq!(bitmap.min(), Some(u64::MAX)); bitmap.insert(1); assert_eq!(bitmap.min(), Some(1)); bitmap.insert(0); From 3fe99a7867b593b153744ae7f59cb79883971626 Mon Sep 17 00:00:00 2001 From: not-jan <61017633+not-jan@users.noreply.github.com> Date: Tue, 28 Jun 2022 11:47:14 +0200 Subject: [PATCH 3/6] Add a benchmark for RoaringTreemap::insert_range --- benchmarks/benches/lib.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/benchmarks/benches/lib.rs b/benchmarks/benches/lib.rs index d12d67b3e..8bdf014a8 100644 --- a/benchmarks/benches/lib.rs +++ b/benchmarks/benches/lib.rs @@ -8,7 +8,7 @@ use criterion::{ Throughput, }; -use roaring::{MultiOps, RoaringBitmap}; +use roaring::{MultiOps, RoaringBitmap, RoaringTreemap}; use crate::datasets::Datasets; @@ -674,6 +674,30 @@ fn insert_range_bitmap(c: &mut Criterion) { } } +fn insert_range_treemap(c: &mut Criterion) { + for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] { + let mut group = c.benchmark_group("insert_range_treemap"); + group.throughput(criterion::Throughput::Elements(size as u64)); + group.bench_function(format!("from_empty_{}", size), |b| { + let bm = RoaringTreemap::new(); + b.iter_batched( + || bm.clone(), + |mut bm| black_box(bm.insert_range(0..size)), + criterion::BatchSize::SmallInput, + ) + }); + group.bench_function(format!("pre_populated_{}", size), |b| { + let mut bm = RoaringTreemap::new(); + bm.insert_range(0..size); + b.iter_batched( + || bm.clone(), + |mut bm| black_box(bm.insert_range(0..size)), + criterion::BatchSize::SmallInput, + ) + }); + } +} + criterion_group!( benches, creation, @@ -691,6 +715,7 @@ criterion_group!( remove, remove_range_bitmap, insert_range_bitmap, + insert_range_treemap, iteration, is_empty, serialization, From 4d0530944acc109fca8fd9548e9ddc00e9a6cd68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 1 Sep 2022 12:08:35 +0200 Subject: [PATCH 4/6] Introduce the RoaringBitmap full and is_full methods --- src/bitmap/container.rs | 4 ++++ src/bitmap/inherent.rs | 30 +++++++++++++++++++++++++++++- src/bitmap/store/bitmap_store.rs | 4 ++++ src/bitmap/store/mod.rs | 4 ++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index 2e61dc488..48fb304fc 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -23,6 +23,10 @@ impl Container { pub fn new(key: u16) -> Container { Container { key, store: Store::new() } } + + pub fn full(key: u16) -> Container { + Container { key, store: Store::full() } + } } impl Container { diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index 6cd87c5da..94f4b3145 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -13,12 +13,24 @@ impl RoaringBitmap { /// /// ```rust /// use roaring::RoaringBitmap; - /// let mut rb = RoaringBitmap::new(); + /// let rb = RoaringBitmap::new(); /// ``` pub fn new() -> RoaringBitmap { RoaringBitmap { containers: Vec::new() } } + /// Creates a full `RoaringBitmap`. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// let rb = RoaringBitmap::full(); + /// ``` + pub fn full() -> RoaringBitmap { + RoaringBitmap { containers: (0..=u16::MAX).map(Container::full).collect() } + } + /// Adds a value to the set. /// /// Returns whether the value was absent from the set. @@ -428,6 +440,22 @@ impl RoaringBitmap { self.containers.is_empty() } + /// Returns `true` if there are every possible integers in this set. + /// + /// # Examples + /// + /// ```rust + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::full(); + /// assert!(!rb.is_empty()); + /// assert!(rb.is_full()); + /// ``` + pub fn is_full(&self) -> bool { + self.containers.len() == (u16::MAX as usize + 1) + && self.containers.iter().all(Container::is_full) + } + /// Returns the number of distinct integers added to the set. /// /// # Examples diff --git a/src/bitmap/store/bitmap_store.rs b/src/bitmap/store/bitmap_store.rs index e7aa2ae0f..76e121e2a 100644 --- a/src/bitmap/store/bitmap_store.rs +++ b/src/bitmap/store/bitmap_store.rs @@ -18,6 +18,10 @@ impl BitmapStore { BitmapStore { len: 0, bits: Box::new([0; BITMAP_LENGTH]) } } + pub fn full() -> BitmapStore { + BitmapStore { len: (BITMAP_LENGTH as u64) * 64, bits: Box::new([u64::MAX; BITMAP_LENGTH]) } + } + pub fn try_from(len: u64, bits: Box<[u64; BITMAP_LENGTH]>) -> Result { let actual_len = bits.iter().map(|v| v.count_ones() as u64).sum(); if len != actual_len { diff --git a/src/bitmap/store/mod.rs b/src/bitmap/store/mod.rs index 72dd832f3..0ebc150ba 100644 --- a/src/bitmap/store/mod.rs +++ b/src/bitmap/store/mod.rs @@ -31,6 +31,10 @@ impl Store { Store::Array(ArrayStore::new()) } + pub fn full() -> Store { + Store::Bitmap(BitmapStore::full()) + } + pub fn insert(&mut self, index: u16) -> bool { match self { Array(vec) => vec.insert(index), From e0ac3286d3c83c7fcf0328b5dc3b3ec56e6201d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 1 Sep 2022 12:09:14 +0200 Subject: [PATCH 5/6] Introduce the RoaringTreemap full and is_full methods --- src/treemap/inherent.rs | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/treemap/inherent.rs b/src/treemap/inherent.rs index 5f9fd9bda..82b097cae 100644 --- a/src/treemap/inherent.rs +++ b/src/treemap/inherent.rs @@ -1,4 +1,5 @@ use std::collections::btree_map::{BTreeMap, Entry}; +use std::iter; use std::ops::RangeBounds; use crate::RoaringBitmap; @@ -13,12 +14,24 @@ impl RoaringTreemap { /// /// ```rust /// use roaring::RoaringTreemap; - /// let mut rb = RoaringTreemap::new(); + /// let rb = RoaringTreemap::new(); /// ``` pub fn new() -> RoaringTreemap { RoaringTreemap { map: BTreeMap::new() } } + /// Creates a full `RoaringTreemap`. + /// + /// # Examples + /// + /// ```rust,ignore + /// use roaring::RoaringTreemap; + /// let rb = RoaringTreemap::full(); + /// ``` + pub fn full() -> RoaringTreemap { + RoaringTreemap { map: (0..=u32::MAX).zip(iter::repeat(RoaringBitmap::full())).collect() } + } + /// Adds a value to the set. Returns `true` if the value was not already present in the set. /// /// # Examples @@ -259,6 +272,21 @@ impl RoaringTreemap { self.map.values().all(RoaringBitmap::is_empty) } + /// Returns `true` if there are every possible integers in this set. + /// + /// # Examples + /// + /// ```rust,ignore + /// use roaring::RoaringTreemap; + /// + /// let mut rb = RoaringTreemap::full(); + /// assert!(!rb.is_empty()); + /// assert!(rb.is_full()); + /// ``` + pub fn is_full(&self) -> bool { + self.map.len() == (u32::MAX as usize + 1) && self.map.values().all(RoaringBitmap::is_full) + } + /// Returns the number of distinct integers added to the set. /// /// # Examples From 737d952e901e65e759b094ee532c0c676d4dc982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 1 Sep 2022 12:08:01 +0200 Subject: [PATCH 6/6] Use the RoaringBitmap full constructor in the insert_range method --- src/treemap/inherent.rs | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/treemap/inherent.rs b/src/treemap/inherent.rs index 82b097cae..45badbceb 100644 --- a/src/treemap/inherent.rs +++ b/src/treemap/inherent.rs @@ -50,6 +50,7 @@ impl RoaringTreemap { } /// Inserts a range of values. + /// /// Returns the number of inserted values. /// /// # Examples @@ -76,20 +77,27 @@ impl RoaringTreemap { // Split the input range by the leading 32 bits for hi in start_hi..=end_hi { + let entry = self.map.entry(hi); + // Calculate the sub-range from the lower 32 bits - let range = if hi == end_hi && hi == start_hi { - start_lo..=end_lo + counter += if hi == end_hi && hi == start_hi { + entry.or_insert_with(RoaringBitmap::new).insert_range(start_lo..=end_lo) } else if hi == start_hi { - start_lo..=u32::MAX + entry.or_insert_with(RoaringBitmap::new).insert_range(start_lo..=u32::MAX) } else if hi == end_hi { - 0..=end_lo + entry.or_insert_with(RoaringBitmap::new).insert_range(0..=end_lo) } else { - // This is pretty expensive, we can definitely pre-calculate what a full - // `RoaringBitmap` looks like so we might as well use it here. - 0..=u32::MAX + // We insert a full bitmap if it doesn't already exist and return the size of it. + // But if the bitmap already exists at this spot we replace it with a full bitmap + // and specify that we didn't inserted the integers from the previous bitmap. + let full_bitmap = RoaringBitmap::full(); + match entry { + Entry::Vacant(entry) => entry.insert(full_bitmap).len(), + Entry::Occupied(mut entry) => { + full_bitmap.len() - entry.insert(full_bitmap).len() + } + } }; - - counter += self.map.entry(hi).or_insert_with(RoaringBitmap::new).insert_range(range) } counter @@ -117,7 +125,6 @@ impl RoaringTreemap { self.map.entry(hi).or_insert_with(RoaringBitmap::new).push(lo) } - /// /// Pushes `value` in the treemap only if it is greater than the current maximum value. /// It is up to the caller to have validated index > self.max() ///