From dd05bac8a2a45b9bcd9431fcf0c5f769b4f11947 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 28 Aug 2022 21:58:53 -0400 Subject: [PATCH 1/5] Add contains_range fn --- src/bitmap/container.rs | 8 ++++ src/bitmap/inherent.rs | 60 +++++++++++++++++++++++++++++ src/bitmap/store/array_store/mod.rs | 31 +++++++++++++++ src/bitmap/store/bitmap_store.rs | 24 ++++++++++++ src/bitmap/store/mod.rs | 11 ++++++ 5 files changed, 134 insertions(+) diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index d260270bd..2e61dc488 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -94,6 +94,14 @@ impl Container { self.store.contains(index) } + pub fn contains_range(&self, range: RangeInclusive) -> bool { + self.store.contains_range(range) + } + + pub fn is_full(&self) -> bool { + self.store.is_full() + } + pub fn is_disjoint(&self, other: &Self) -> bool { self.store.is_disjoint(&other.store) } diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index 7ee5744d0..852c96163 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -268,6 +268,66 @@ impl RoaringBitmap { } } + /// Returns `true` if all values in the range are present in this set. + /// + /// # Examples + /// + /// ``` + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::new(); + /// // An empty range is always contained + /// assert!(rb.contains_range(7..7)); + /// + /// rb.insert_range(1..0xFFF); + /// assert!(rb.contains_range(1..0xFFF)); + /// assert!(rb.contains_range(2..0xFFF)); + /// // 0 is not contained + /// assert!(!rb.contains_range(0..2)); + /// // 0xFFF is not contained + /// assert!(!rb.contains_range(1..=0xFFF)); + /// ``` + pub fn contains_range(&self, range: R) -> bool + where + R: RangeBounds, + { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + // Empty ranges are always contained + None => return true, + }; + let (start_high, start_low) = util::split(start); + let (end_high, end_low) = util::split(end); + debug_assert!(start_high <= end_high); + + let containers = + match self.containers.binary_search_by_key(&start_high, |container| container.key) { + Ok(i) => &self.containers[i..], + Err(_) => return false, + }; + + if start_high == end_high { + return containers[0].contains_range(start_low..=end_low); + } + + let high_span = usize::from(end_high - start_high); + // If this contains everything in the range, there should be a container for every item in the span + // and the container that many items away should be the high key + let containers = match containers.get(high_span) { + Some(c) if c.key == end_high => &containers[..=high_span], + _ => return false, + }; + + match containers { + [first, rest @ .., last] => { + first.contains_range(start_low..=u16::MAX) + && rest.iter().all(|container| container.is_full()) + && last.contains_range(0..=end_low) + } + _ => unreachable!("already validated containers has at least 2 items"), + } + } + /// Clears all integers in this set. /// /// # Examples diff --git a/src/bitmap/store/array_store/mod.rs b/src/bitmap/store/array_store/mod.rs index dbfb6dfa8..02b7f41a7 100644 --- a/src/bitmap/store/array_store/mod.rs +++ b/src/bitmap/store/array_store/mod.rs @@ -120,6 +120,23 @@ impl ArrayStore { self.vec.binary_search(&index).is_ok() } + pub fn contains_range(&self, range: RangeInclusive) -> bool { + let start = *range.start(); + let end = *range.end(); + let range_count = usize::from(end - start) + 1; + if self.vec.len() < range_count { + return false; + } + let start_i = match self.vec.binary_search(&start) { + Ok(i) => i, + Err(_) => return false, + }; + + // If there are `range_count` items, the `range_count`th item should be the final item + // in the range, because this vec is sorted and has no duplicates + self.vec.get(start_i + range_count) == Some(&end) + } + pub fn is_disjoint(&self, other: &Self) -> bool { let (mut i1, mut i2) = (self.vec.iter(), other.vec.iter()); let (mut value1, mut value2) = (i1.next(), i2.next()); @@ -446,6 +463,20 @@ mod tests { assert_eq!(into_vec(store), vec![1, 2, 4, 5, 6, 7, 8, 9]); } + #[test] + fn test_array_contains_range() { + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![])); + assert!(!store.contains_range(0..=0)); + assert!(!store.contains_range(0..=1)); + assert!(!store.contains_range(1..=u16::MAX)); + + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![0, 1, 2, 3, 4, 5, 100])); + assert!(store.contains_range(0..=0)); + assert!(store.contains_range(0..=5)); + assert!(!store.contains_range(0..=6)); + assert!(store.contains_range(100..=100)); + } + #[test] fn test_array_insert_range_full_overlap() { let mut store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9])); diff --git a/src/bitmap/store/bitmap_store.rs b/src/bitmap/store/bitmap_store.rs index 87de13ea1..e7aa2ae0f 100644 --- a/src/bitmap/store/bitmap_store.rs +++ b/src/bitmap/store/bitmap_store.rs @@ -178,6 +178,30 @@ impl BitmapStore { self.bits[key(index)] & (1 << bit(index)) != 0 } + pub fn contains_range(&self, range: RangeInclusive) -> bool { + let start = *range.start(); + let end = *range.end(); + if self.len() < u64::from(end - start) + 1 { + return false; + } + + let (start_i, start_bit) = (key(start), bit(start)); + let (end_i, end_bit) = (key(end), bit(end)); + + let start_mask = !((1 << start_bit) - 1); + let end_mask = (1 << end_bit) - 1; + + match &self.bits[start_i..=end_i] { + [] => unreachable!(), + &[word] => word & (start_mask & end_mask) == (start_mask & end_mask), + &[first, ref rest @ .., last] => { + (first & start_mask) == start_mask + && rest.iter().all(|&word| word == !0) + && (last & end_mask) == end_mask + } + } + } + pub fn is_disjoint(&self, other: &BitmapStore) -> bool { self.bits.iter().zip(other.bits.iter()).all(|(&i1, &i2)| (i1 & i2) == 0) } diff --git a/src/bitmap/store/mod.rs b/src/bitmap/store/mod.rs index 1b570eb46..72dd832f3 100644 --- a/src/bitmap/store/mod.rs +++ b/src/bitmap/store/mod.rs @@ -99,6 +99,17 @@ impl Store { } } + pub fn contains_range(&self, range: RangeInclusive) -> bool { + match self { + Array(vec) => vec.contains_range(range), + Bitmap(bits) => bits.contains_range(range), + } + } + + pub fn is_full(&self) -> bool { + self.len() == (1 << 16) + } + pub fn is_disjoint(&self, other: &Self) -> bool { match (self, other) { (Array(vec1), Array(vec2)) => vec1.is_disjoint(vec2), From e759e5af3a4c6c05b7921fa5fe62fdaf17111fbb Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 28 Aug 2022 23:36:28 -0400 Subject: [PATCH 2/5] Implement range_cardinality function --- src/bitmap/inherent.rs | 66 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index 852c96163..d67ebf3e4 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::ops::RangeBounds; use crate::RoaringBitmap; @@ -328,6 +329,71 @@ impl RoaringBitmap { } } + /// Returns the number of elements in this set which are in the passed range. + /// + /// # Examples + /// + /// ``` + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::new(); + /// rb.insert_range(0x10000..0x40000); + /// rb.insert(0x50001); + /// rb.insert(0x50005); + /// rb.insert(u32::MAX); + /// + /// assert_eq!(rb.range_cardinality(0..0x10000), 0); + /// assert_eq!(rb.range_cardinality(0x10000..0x40000), 0x30000); + /// assert_eq!(rb.range_cardinality(0x50000..0x60000), 2); + /// assert_eq!(rb.range_cardinality(0x10000..0x10000), 0); + /// assert_eq!(rb.range_cardinality(0x50000..=u32::MAX), 3); + /// ``` + pub fn range_cardinality(&self, range: R) -> u64 + where + R: RangeBounds, + { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + // Empty ranges have 0 bits set in them + None => return 0, + }; + + let (start_key, start_low) = util::split(start); + let (end_key, end_low) = util::split(end); + + let mut cardinality = 0; + + let i = match self.containers.binary_search_by_key(&start_key, |c| c.key) { + Ok(i) => { + let container = &self.containers[i]; + if start_key == end_key { + cardinality += container.rank(end_low) + } else { + cardinality += container.len(); + } + if start_low != 0 { + cardinality -= container.rank(start_low); + } + i + 1 + } + Err(i) => i, + }; + for container in &self.containers[i..] { + match container.key.cmp(&end_key) { + Ordering::Less => cardinality += container.len(), + Ordering::Equal => { + cardinality += container.rank(end_low); + break; + } + Ordering::Greater => { + break; + } + } + } + + cardinality + } + /// Clears all integers in this set. /// /// # Examples From 5cfb7061e10918d783f697c797574fdc287ea98f Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 29 Aug 2022 00:25:43 -0400 Subject: [PATCH 3/5] Correct array contains_range --- src/bitmap/store/array_store/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bitmap/store/array_store/mod.rs b/src/bitmap/store/array_store/mod.rs index 02b7f41a7..5d8427a63 100644 --- a/src/bitmap/store/array_store/mod.rs +++ b/src/bitmap/store/array_store/mod.rs @@ -132,9 +132,9 @@ impl ArrayStore { Err(_) => return false, }; - // If there are `range_count` items, the `range_count`th item should be the final item - // in the range, because this vec is sorted and has no duplicates - self.vec.get(start_i + range_count) == Some(&end) + // If there are `range_count` items, last item in the next range_count should be the + // expected end value, because this vec is sorted and has no duplicates + self.vec.get(start_i + range_count - 1) == Some(&end) } pub fn is_disjoint(&self, other: &Self) -> bool { From cbd8fe9949c164b19fb52accff64b146e53468eb Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 30 Aug 2022 18:14:03 -0400 Subject: [PATCH 4/5] Missed -1 When ignoring items before the range, should only remove the rank of one less than the lower bound of the range --- src/bitmap/inherent.rs | 2 +- tests/range_checks.rs | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 tests/range_checks.rs diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index d67ebf3e4..6cd87c5da 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -372,7 +372,7 @@ impl RoaringBitmap { cardinality += container.len(); } if start_low != 0 { - cardinality -= container.rank(start_low); + cardinality -= container.rank(start_low - 1); } i + 1 } diff --git a/tests/range_checks.rs b/tests/range_checks.rs new file mode 100644 index 000000000..e69de29bb From 27798c0048c2a96cef599e84bca2127e72df62c4 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 30 Aug 2022 18:17:59 -0400 Subject: [PATCH 5/5] Add a proptest for range contains/cardinality --- tests/range_checks.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/range_checks.rs b/tests/range_checks.rs index e69de29bb..430fcdd98 100644 --- a/tests/range_checks.rs +++ b/tests/range_checks.rs @@ -0,0 +1,30 @@ +use proptest::array::uniform2; +use proptest::collection::vec; +use proptest::prelude::*; +use roaring::RoaringBitmap; + +proptest! { + #[test] + fn proptest_range( + range in uniform2(..=262_143_u32), + extra in vec(..=262_143_u32, ..=100), + ){ + let range = range[0]..range[1]; + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(range.clone()); + assert!(bitmap.contains_range(range.clone())); + assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len()); + + for &val in &extra { + bitmap.insert(val); + assert!(bitmap.contains_range(range.clone())); + assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len()); + } + + for (i, &val) in extra.iter().filter(|x| range.contains(x)).enumerate() { + bitmap.remove(val); + assert!(!bitmap.contains_range(range.clone())); + assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len() - i - 1); + } + } +}