Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
237: contains_range / range_cardinality functions r=Kerollmops a=Dr-Emann

Fixes RoaringBitmap#235 

Co-authored-by: Zachary Dremann <dremann@gmail.com>
  • Loading branch information
bors[bot] and Dr-Emann authored Aug 31, 2022
2 parents be80c17 + 0e53a3f commit 996675b
Show file tree
Hide file tree
Showing 6 changed files with 230 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/bitmap/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ impl Container {
self.store.contains(index)
}

pub fn contains_range(&self, range: RangeInclusive<u16>) -> bool {
self.store.contains_range(range)
}

pub fn is_full(&self) -> bool {
self.store.is_full()
}

pub fn is_disjoint(&self, other: &Self) -> bool {
self.store.is_disjoint(&other.store)
}
Expand Down
126 changes: 126 additions & 0 deletions src/bitmap/inherent.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::cmp::Ordering;
use std::ops::RangeBounds;

use crate::RoaringBitmap;
Expand Down Expand Up @@ -268,6 +269,131 @@ impl RoaringBitmap {
}
}

/// Returns `true` if all values in the range are present in this set.
///
/// # Examples
///
/// ```
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::new();
/// // An empty range is always contained
/// assert!(rb.contains_range(7..7));
///
/// rb.insert_range(1..0xFFF);
/// assert!(rb.contains_range(1..0xFFF));
/// assert!(rb.contains_range(2..0xFFF));
/// // 0 is not contained
/// assert!(!rb.contains_range(0..2));
/// // 0xFFF is not contained
/// assert!(!rb.contains_range(1..=0xFFF));
/// ```
pub fn contains_range<R>(&self, range: R) -> bool
where
R: RangeBounds<u32>,
{
let (start, end) = match util::convert_range_to_inclusive(range) {
Some(range) => (*range.start(), *range.end()),
// Empty ranges are always contained
None => return true,
};
let (start_high, start_low) = util::split(start);
let (end_high, end_low) = util::split(end);
debug_assert!(start_high <= end_high);

let containers =
match self.containers.binary_search_by_key(&start_high, |container| container.key) {
Ok(i) => &self.containers[i..],
Err(_) => return false,
};

if start_high == end_high {
return containers[0].contains_range(start_low..=end_low);
}

let high_span = usize::from(end_high - start_high);
// If this contains everything in the range, there should be a container for every item in the span
// and the container that many items away should be the high key
let containers = match containers.get(high_span) {
Some(c) if c.key == end_high => &containers[..=high_span],
_ => return false,
};

match containers {
[first, rest @ .., last] => {
first.contains_range(start_low..=u16::MAX)
&& rest.iter().all(|container| container.is_full())
&& last.contains_range(0..=end_low)
}
_ => unreachable!("already validated containers has at least 2 items"),
}
}

/// Returns the number of elements in this set which are in the passed range.
///
/// # Examples
///
/// ```
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::new();
/// rb.insert_range(0x10000..0x40000);
/// rb.insert(0x50001);
/// rb.insert(0x50005);
/// rb.insert(u32::MAX);
///
/// assert_eq!(rb.range_cardinality(0..0x10000), 0);
/// assert_eq!(rb.range_cardinality(0x10000..0x40000), 0x30000);
/// assert_eq!(rb.range_cardinality(0x50000..0x60000), 2);
/// assert_eq!(rb.range_cardinality(0x10000..0x10000), 0);
/// assert_eq!(rb.range_cardinality(0x50000..=u32::MAX), 3);
/// ```
pub fn range_cardinality<R>(&self, range: R) -> u64
where
R: RangeBounds<u32>,
{
let (start, end) = match util::convert_range_to_inclusive(range) {
Some(range) => (*range.start(), *range.end()),
// Empty ranges have 0 bits set in them
None => return 0,
};

let (start_key, start_low) = util::split(start);
let (end_key, end_low) = util::split(end);

let mut cardinality = 0;

let i = match self.containers.binary_search_by_key(&start_key, |c| c.key) {
Ok(i) => {
let container = &self.containers[i];
if start_key == end_key {
cardinality += container.rank(end_low)
} else {
cardinality += container.len();
}
if start_low != 0 {
cardinality -= container.rank(start_low - 1);
}
i + 1
}
Err(i) => i,
};
for container in &self.containers[i..] {
match container.key.cmp(&end_key) {
Ordering::Less => cardinality += container.len(),
Ordering::Equal => {
cardinality += container.rank(end_low);
break;
}
Ordering::Greater => {
break;
}
}
}

cardinality
}

/// Clears all integers in this set.
///
/// # Examples
Expand Down
31 changes: 31 additions & 0 deletions src/bitmap/store/array_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,23 @@ impl ArrayStore {
self.vec.binary_search(&index).is_ok()
}

pub fn contains_range(&self, range: RangeInclusive<u16>) -> bool {
let start = *range.start();
let end = *range.end();
let range_count = usize::from(end - start) + 1;
if self.vec.len() < range_count {
return false;
}
let start_i = match self.vec.binary_search(&start) {
Ok(i) => i,
Err(_) => return false,
};

// If there are `range_count` items, last item in the next range_count should be the
// expected end value, because this vec is sorted and has no duplicates
self.vec.get(start_i + range_count - 1) == Some(&end)
}

pub fn is_disjoint(&self, other: &Self) -> bool {
let (mut i1, mut i2) = (self.vec.iter(), other.vec.iter());
let (mut value1, mut value2) = (i1.next(), i2.next());
Expand Down Expand Up @@ -446,6 +463,20 @@ mod tests {
assert_eq!(into_vec(store), vec![1, 2, 4, 5, 6, 7, 8, 9]);
}

#[test]
fn test_array_contains_range() {
let store = Store::Array(ArrayStore::from_vec_unchecked(vec![]));
assert!(!store.contains_range(0..=0));
assert!(!store.contains_range(0..=1));
assert!(!store.contains_range(1..=u16::MAX));

let store = Store::Array(ArrayStore::from_vec_unchecked(vec![0, 1, 2, 3, 4, 5, 100]));
assert!(store.contains_range(0..=0));
assert!(store.contains_range(0..=5));
assert!(!store.contains_range(0..=6));
assert!(store.contains_range(100..=100));
}

#[test]
fn test_array_insert_range_full_overlap() {
let mut store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9]));
Expand Down
24 changes: 24 additions & 0 deletions src/bitmap/store/bitmap_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,30 @@ impl BitmapStore {
self.bits[key(index)] & (1 << bit(index)) != 0
}

pub fn contains_range(&self, range: RangeInclusive<u16>) -> bool {
let start = *range.start();
let end = *range.end();
if self.len() < u64::from(end - start) + 1 {
return false;
}

let (start_i, start_bit) = (key(start), bit(start));
let (end_i, end_bit) = (key(end), bit(end));

let start_mask = !((1 << start_bit) - 1);
let end_mask = (1 << end_bit) - 1;

match &self.bits[start_i..=end_i] {
[] => unreachable!(),
&[word] => word & (start_mask & end_mask) == (start_mask & end_mask),
&[first, ref rest @ .., last] => {
(first & start_mask) == start_mask
&& rest.iter().all(|&word| word == !0)
&& (last & end_mask) == end_mask
}
}
}

pub fn is_disjoint(&self, other: &BitmapStore) -> bool {
self.bits.iter().zip(other.bits.iter()).all(|(&i1, &i2)| (i1 & i2) == 0)
}
Expand Down
11 changes: 11 additions & 0 deletions src/bitmap/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ impl Store {
}
}

pub fn contains_range(&self, range: RangeInclusive<u16>) -> bool {
match self {
Array(vec) => vec.contains_range(range),
Bitmap(bits) => bits.contains_range(range),
}
}

pub fn is_full(&self) -> bool {
self.len() == (1 << 16)
}

pub fn is_disjoint(&self, other: &Self) -> bool {
match (self, other) {
(Array(vec1), Array(vec2)) => vec1.is_disjoint(vec2),
Expand Down
30 changes: 30 additions & 0 deletions tests/range_checks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use proptest::array::uniform2;
use proptest::collection::vec;
use proptest::prelude::*;
use roaring::RoaringBitmap;

proptest! {
#[test]
fn proptest_range(
range in uniform2(..=262_143_u32),
extra in vec(..=262_143_u32, ..=100),
){
let range = range[0]..range[1];
let mut bitmap = RoaringBitmap::new();
bitmap.insert_range(range.clone());
assert!(bitmap.contains_range(range.clone()));
assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len());

for &val in &extra {
bitmap.insert(val);
assert!(bitmap.contains_range(range.clone()));
assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len());
}

for (i, &val) in extra.iter().filter(|x| range.contains(x)).enumerate() {
bitmap.remove(val);
assert!(!bitmap.contains_range(range.clone()));
assert_eq!(bitmap.range_cardinality(range.clone()) as usize, range.len() - i - 1);
}
}
}

0 comments on commit 996675b

Please sign in to comment.