diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index d260270b..2e61dc48 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -94,6 +94,14 @@ impl Container { self.store.contains(index) } + pub fn contains_range(&self, range: RangeInclusive) -> bool { + self.store.contains_range(range) + } + + pub fn is_full(&self) -> bool { + self.store.is_full() + } + pub fn is_disjoint(&self, other: &Self) -> bool { self.store.is_disjoint(&other.store) } diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index 7ee5744d..6cd87c5d 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::ops::RangeBounds; use crate::RoaringBitmap; @@ -268,6 +269,131 @@ impl RoaringBitmap { } } + /// Returns `true` if all values in the range are present in this set. + /// + /// # Examples + /// + /// ``` + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::new(); + /// // An empty range is always contained + /// assert!(rb.contains_range(7..7)); + /// + /// rb.insert_range(1..0xFFF); + /// assert!(rb.contains_range(1..0xFFF)); + /// assert!(rb.contains_range(2..0xFFF)); + /// // 0 is not contained + /// assert!(!rb.contains_range(0..2)); + /// // 0xFFF is not contained + /// assert!(!rb.contains_range(1..=0xFFF)); + /// ``` + pub fn contains_range(&self, range: R) -> bool + where + R: RangeBounds, + { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + // Empty ranges are always contained + None => return true, + }; + let (start_high, start_low) = util::split(start); + let (end_high, end_low) = util::split(end); + debug_assert!(start_high <= end_high); + + let containers = + match self.containers.binary_search_by_key(&start_high, |container| container.key) { + Ok(i) => &self.containers[i..], + Err(_) => return false, + }; + + if start_high == end_high { + return containers[0].contains_range(start_low..=end_low); + } + + let high_span = usize::from(end_high - start_high); + // If this contains everything in the range, there should be a container for every item in the span + // and the container that many items away should be the high key + let containers = match containers.get(high_span) { + Some(c) if c.key == end_high => &containers[..=high_span], + _ => return false, + }; + + match containers { + [first, rest @ .., last] => { + first.contains_range(start_low..=u16::MAX) + && rest.iter().all(|container| container.is_full()) + && last.contains_range(0..=end_low) + } + _ => unreachable!("already validated containers has at least 2 items"), + } + } + + /// Returns the number of elements in this set which are in the passed range. + /// + /// # Examples + /// + /// ``` + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::new(); + /// rb.insert_range(0x10000..0x40000); + /// rb.insert(0x50001); + /// rb.insert(0x50005); + /// rb.insert(u32::MAX); + /// + /// assert_eq!(rb.range_cardinality(0..0x10000), 0); + /// assert_eq!(rb.range_cardinality(0x10000..0x40000), 0x30000); + /// assert_eq!(rb.range_cardinality(0x50000..0x60000), 2); + /// assert_eq!(rb.range_cardinality(0x10000..0x10000), 0); + /// assert_eq!(rb.range_cardinality(0x50000..=u32::MAX), 3); + /// ``` + pub fn range_cardinality(&self, range: R) -> u64 + where + R: RangeBounds, + { + let (start, end) = match util::convert_range_to_inclusive(range) { + Some(range) => (*range.start(), *range.end()), + // Empty ranges have 0 bits set in them + None => return 0, + }; + + let (start_key, start_low) = util::split(start); + let (end_key, end_low) = util::split(end); + + let mut cardinality = 0; + + let i = match self.containers.binary_search_by_key(&start_key, |c| c.key) { + Ok(i) => { + let container = &self.containers[i]; + if start_key == end_key { + cardinality += container.rank(end_low) + } else { + cardinality += container.len(); + } + if start_low != 0 { + cardinality -= container.rank(start_low - 1); + } + i + 1 + } + Err(i) => i, + }; + for container in &self.containers[i..] { + match container.key.cmp(&end_key) { + Ordering::Less => cardinality += container.len(), + Ordering::Equal => { + cardinality += container.rank(end_low); + break; + } + Ordering::Greater => { + break; + } + } + } + + cardinality + } + /// Clears all integers in this set. /// /// # Examples diff --git a/src/bitmap/store/array_store/mod.rs b/src/bitmap/store/array_store/mod.rs index dbfb6dfa..5d8427a6 100644 --- a/src/bitmap/store/array_store/mod.rs +++ b/src/bitmap/store/array_store/mod.rs @@ -120,6 +120,23 @@ impl ArrayStore { self.vec.binary_search(&index).is_ok() } + pub fn contains_range(&self, range: RangeInclusive) -> bool { + let start = *range.start(); + let end = *range.end(); + let range_count = usize::from(end - start) + 1; + if self.vec.len() < range_count { + return false; + } + let start_i = match self.vec.binary_search(&start) { + Ok(i) => i, + Err(_) => return false, + }; + + // If there are `range_count` items, last item in the next range_count should be the + // expected end value, because this vec is sorted and has no duplicates + self.vec.get(start_i + range_count - 1) == Some(&end) + } + pub fn is_disjoint(&self, other: &Self) -> bool { let (mut i1, mut i2) = (self.vec.iter(), other.vec.iter()); let (mut value1, mut value2) = (i1.next(), i2.next()); @@ -446,6 +463,20 @@ mod tests { assert_eq!(into_vec(store), vec![1, 2, 4, 5, 6, 7, 8, 9]); } + #[test] + fn test_array_contains_range() { + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![])); + assert!(!store.contains_range(0..=0)); + assert!(!store.contains_range(0..=1)); + assert!(!store.contains_range(1..=u16::MAX)); + + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![0, 1, 2, 3, 4, 5, 100])); + assert!(store.contains_range(0..=0)); + assert!(store.contains_range(0..=5)); + assert!(!store.contains_range(0..=6)); + assert!(store.contains_range(100..=100)); + } + #[test] fn test_array_insert_range_full_overlap() { let mut store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9])); diff --git a/src/bitmap/store/bitmap_store.rs b/src/bitmap/store/bitmap_store.rs index 87de13ea..e7aa2ae0 100644 --- a/src/bitmap/store/bitmap_store.rs +++ b/src/bitmap/store/bitmap_store.rs @@ -178,6 +178,30 @@ impl BitmapStore { self.bits[key(index)] & (1 << bit(index)) != 0 } + pub fn contains_range(&self, range: RangeInclusive) -> bool { + let start = *range.start(); + let end = *range.end(); + if self.len() < u64::from(end - start) + 1 { + return false; + } + + let (start_i, start_bit) = (key(start), bit(start)); + let (end_i, end_bit) = (key(end), bit(end)); + + let start_mask = !((1 << start_bit) - 1); + let end_mask = (1 << end_bit) - 1; + + match &self.bits[start_i..=end_i] { + [] => unreachable!(), + &[word] => word & (start_mask & end_mask) == (start_mask & end_mask), + &[first, ref rest @ .., last] => { + (first & start_mask) == start_mask + && rest.iter().all(|&word| word == !0) + && (last & end_mask) == end_mask + } + } + } + pub fn is_disjoint(&self, other: &BitmapStore) -> bool { self.bits.iter().zip(other.bits.iter()).all(|(&i1, &i2)| (i1 & i2) == 0) } diff --git a/src/bitmap/store/mod.rs b/src/bitmap/store/mod.rs index 1b570eb4..72dd832f 100644 --- a/src/bitmap/store/mod.rs +++ b/src/bitmap/store/mod.rs @@ -99,6 +99,17 @@ impl Store { } } + pub fn contains_range(&self, range: RangeInclusive) -> bool { + match self { + Array(vec) => vec.contains_range(range), + Bitmap(bits) => bits.contains_range(range), + } + } + + pub fn is_full(&self) -> bool { + self.len() == (1 << 16) + } + pub fn is_disjoint(&self, other: &Self) -> bool { match (self, other) { (Array(vec1), Array(vec2)) => vec1.is_disjoint(vec2), diff --git a/tests/range_checks.rs b/tests/range_checks.rs new file mode 100644 index 00000000..430fcdd9 --- /dev/null +++ b/tests/range_checks.rs @@ -0,0 +1,30 @@ +use proptest::array::uniform2; +use proptest::collection::vec; +use proptest::prelude::*; +use roaring::RoaringBitmap; + +proptest! { + #[test] + fn proptest_range( + range in uniform2(..=262_143_u32), + extra in vec(..=262_143_u32, ..=100), + ){ + let range = range[0]..range[1]; + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(range.clone()); + assert!(bitmap.contains_range(range.clone())); + assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len()); + + for &val in &extra { + bitmap.insert(val); + assert!(bitmap.contains_range(range.clone())); + assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len()); + } + + for (i, &val) in extra.iter().filter(|x| range.contains(x)).enumerate() { + bitmap.remove(val); + assert!(!bitmap.contains_range(range.clone())); + assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len() - i - 1); + } + } +}