From 50eda41182dc3d11666cc720622ff4ad90a0798c Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Fri, 1 Nov 2024 20:53:58 -0400 Subject: [PATCH] implement range based iteration Implement `bitmap.iter_range()` and `bitmap.into_iter_range()`, based on `advance_to()` and `advance_back_to()`. Co-authored-by: Matthew Herzl --- roaring/src/bitmap/inherent.rs | 20 +++--- roaring/src/bitmap/iter.rs | 120 +++++++++++++++++++++++++++++++++ roaring/src/bitmap/util.rs | 85 +++++++++++++++-------- roaring/tests/iter_range.rs | 95 ++++++++++++++++++++++++++ 4 files changed, 282 insertions(+), 38 deletions(-) create mode 100644 roaring/tests/iter_range.rs diff --git a/roaring/src/bitmap/inherent.rs b/roaring/src/bitmap/inherent.rs index b6791a37..06d293e4 100644 --- a/roaring/src/bitmap/inherent.rs +++ b/roaring/src/bitmap/inherent.rs @@ -94,8 +94,8 @@ impl RoaringBitmap { R: RangeBounds, { let (start, end) = match util::convert_range_to_inclusive(range) { - Some(range) => (*range.start(), *range.end()), - None => return 0, + Ok(range) => (*range.start(), *range.end()), + Err(_) => return 0, }; let (start_container_key, start_index) = util::split(start); @@ -239,8 +239,8 @@ impl RoaringBitmap { R: RangeBounds, { let (start, end) = match util::convert_range_to_inclusive(range) { - Some(range) => (*range.start(), *range.end()), - None => return 0, + Ok(range) => (*range.start(), *range.end()), + Err(_) => return 0, }; let (start_container_key, start_index) = util::split(start); @@ -309,9 +309,9 @@ impl RoaringBitmap { R: RangeBounds, { let (start, end) = match util::convert_range_to_inclusive(range) { - Some(range) => (*range.start(), *range.end()), - // Empty ranges are always contained - None => return true, + Ok(range) => (*range.start(), *range.end()), + // Empty/Invalid ranges are always contained + Err(_) => return true, }; let (start_high, start_low) = util::split(start); let (end_high, end_low) = util::split(end); @@ -369,9 +369,9 @@ impl RoaringBitmap { R: RangeBounds, { let (start, end) = match util::convert_range_to_inclusive(range) { - Some(range) => (*range.start(), *range.end()), - // Empty ranges have 0 bits set in them - None => return 0, + Ok(range) => (*range.start(), *range.end()), + // Empty/invalid ranges have 0 bits set in them + Err(_) => return 0, }; let (start_key, start_low) = util::split(start); diff --git a/roaring/src/bitmap/iter.rs b/roaring/src/bitmap/iter.rs index 5568511e..31273cfc 100644 --- a/roaring/src/bitmap/iter.rs +++ b/roaring/src/bitmap/iter.rs @@ -1,5 +1,6 @@ use alloc::vec; use core::iter::FusedIterator; +use core::ops::RangeBounds; use core::slice; use super::container::Container; @@ -155,6 +156,10 @@ impl Iter<'_> { Iter { front: None, containers: containers.iter(), back: None } } + fn empty() -> Self { + Self::new(&[]) + } + /// Advance the iterator to the first position where the item has a value >= `n` /// /// # Examples @@ -199,6 +204,10 @@ impl IntoIter { IntoIter { front: None, containers: containers.into_iter(), back: None } } + fn empty() -> Self { + Self::new(Vec::new()) + } + /// Advance the iterator to the first position where the item has a value >= `n` /// /// # Examples @@ -552,6 +561,117 @@ impl RoaringBitmap { pub fn iter(&self) -> Iter { Iter::new(&self.containers) } + + /// Iterator over values within a range stored in the RoaringBitmap. + /// + /// # Examples + /// + /// ```rust + /// use core::ops::Bound; + /// use roaring::RoaringBitmap; + /// + /// let bitmap = RoaringBitmap::from([0, 1, 2, 3, 4, 5, 10, 11, 12, 20, 21, u32::MAX]); + /// let mut iter = bitmap.range(10..20); + /// + /// assert_eq!(iter.next(), Some(10)); + /// assert_eq!(iter.next(), Some(11)); + /// assert_eq!(iter.next(), Some(12)); + /// assert_eq!(iter.next(), None); + /// + /// let mut iter = bitmap.range(100..); + /// assert_eq!(iter.next(), Some(u32::MAX)); + /// assert_eq!(iter.next(), None); + /// + /// let mut iter = bitmap.range((Bound::Excluded(0), Bound::Included(10))); + /// assert_eq!(iter.next(), Some(1)); + /// assert_eq!(iter.next(), Some(2)); + /// assert_eq!(iter.next(), Some(3)); + /// assert_eq!(iter.next(), Some(4)); + /// assert_eq!(iter.next(), Some(5)); + /// assert_eq!(iter.next(), Some(10)); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn range(&self, range: R) -> Iter<'_> + where + R: RangeBounds, + { + let range = match util::convert_range_to_inclusive(range) { + Ok(range) => range, + Err(util::ConvertRangeError::Empty) => return Iter::empty(), + Err(util::ConvertRangeError::StartGreaterThanEnd) => { + panic!("range start is greater than range end") + } + Err(util::ConvertRangeError::StartAndEndEqualExcluded) => { + panic!("range start and end are equal and excluded") + } + }; + let (start, end) = (*range.start(), *range.end()); + let mut iter = self.iter(); + if start != 0 { + iter.advance_to(start); + } + if end != u32::MAX { + iter.advance_back_to(end); + } + iter + } + + /// Iterator over values within a range stored in the RoaringBitmap. + /// + /// # Examples + /// + /// ```rust + /// use core::ops::Bound; + /// use roaring::RoaringBitmap; + /// + /// fn bitmap() -> RoaringBitmap { + /// RoaringBitmap::from([0, 1, 2, 3, 4, 5, 10, 11, 12, 20, 21, u32::MAX]) + /// } + /// + /// let mut iter = bitmap().into_range(10..20); + /// + /// assert_eq!(iter.next(), Some(10)); + /// assert_eq!(iter.next(), Some(11)); + /// assert_eq!(iter.next(), Some(12)); + /// assert_eq!(iter.next(), None); + /// + /// let mut iter = bitmap().into_range(100..); + /// assert_eq!(iter.next(), Some(u32::MAX)); + /// assert_eq!(iter.next(), None); + /// + /// let mut iter = bitmap().into_range((Bound::Excluded(0), Bound::Included(10))); + /// assert_eq!(iter.next(), Some(1)); + /// assert_eq!(iter.next(), Some(2)); + /// assert_eq!(iter.next(), Some(3)); + /// assert_eq!(iter.next(), Some(4)); + /// assert_eq!(iter.next(), Some(5)); + /// assert_eq!(iter.next(), Some(10)); + /// assert_eq!(iter.next(), None); + /// ``` + pub fn into_range(self, range: R) -> IntoIter + where + R: RangeBounds, + { + let range = match util::convert_range_to_inclusive(range) { + Ok(range) => range, + Err(util::ConvertRangeError::Empty) => return IntoIter::empty(), + Err(util::ConvertRangeError::StartGreaterThanEnd) => { + panic!("range start is greater than range end") + } + Err(util::ConvertRangeError::StartAndEndEqualExcluded) => { + panic!("range start and end are equal and excluded") + } + }; + let (start, end) = (*range.start(), *range.end()); + let mut iter = self.into_iter(); + if start != 0 { + iter.advance_to(start); + } + if end != u32::MAX { + iter.advance_back_to(end); + } + iter + } } impl<'a> IntoIterator for &'a RoaringBitmap { diff --git a/roaring/src/bitmap/util.rs b/roaring/src/bitmap/util.rs index 3565c34a..80a4896e 100644 --- a/roaring/src/bitmap/util.rs +++ b/roaring/src/bitmap/util.rs @@ -14,30 +14,56 @@ pub fn join(high: u16, low: u16) -> u32 { (u32::from(high) << 16) + u32::from(low) } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ConvertRangeError { + Empty, + StartGreaterThanEnd, + StartAndEndEqualExcluded, +} + /// Convert a `RangeBounds` object to `RangeInclusive`, -pub fn convert_range_to_inclusive(range: R) -> Option> +pub fn convert_range_to_inclusive(range: R) -> Result, ConvertRangeError> where R: RangeBounds, { - let start: u32 = match range.start_bound() { - Bound::Included(&i) => i, - Bound::Excluded(&i) => i.checked_add(1)?, - Bound::Unbounded => 0, - }; - let end: u32 = match range.end_bound() { - Bound::Included(&i) => i, - Bound::Excluded(&i) => i.checked_sub(1)?, - Bound::Unbounded => u32::MAX, - }; - if end < start { - return None; + let start_bound = range.start_bound().cloned(); + let end_bound = range.end_bound().cloned(); + match (start_bound, end_bound) { + (Bound::Excluded(s), Bound::Excluded(e)) if s == e => { + Err(ConvertRangeError::StartAndEndEqualExcluded) + } + (Bound::Included(s) | Bound::Excluded(s), Bound::Included(e) | Bound::Excluded(e)) + if s > e => + { + Err(ConvertRangeError::StartGreaterThanEnd) + } + _ => { + let start = match start_bound { + Bound::Included(s) => s, + Bound::Excluded(s) => s.checked_add(1).ok_or(ConvertRangeError::Empty)?, + Bound::Unbounded => 0, + }; + + let end = match end_bound { + Bound::Included(e) => e, + Bound::Excluded(e) => e.checked_sub(1).ok_or(ConvertRangeError::Empty)?, + Bound::Unbounded => u32::MAX, + }; + + if start > end { + // This handles e.g. `x..x`: we've ruled out `start > end` overall, so a value must + // have been changed via exclusion. + Err(ConvertRangeError::Empty) + } else { + Ok(start..=end) + } + } } - Some(start..=end) } #[cfg(test)] mod test { - use super::{convert_range_to_inclusive, join, split}; + use super::{convert_range_to_inclusive, join, split, ConvertRangeError}; use core::ops::Bound; #[test] @@ -67,27 +93,30 @@ mod test { #[test] #[allow(clippy::reversed_empty_ranges)] fn test_convert_range_to_inclusive() { - assert_eq!(Some(1..=5), convert_range_to_inclusive(1..6)); - assert_eq!(Some(1..=u32::MAX), convert_range_to_inclusive(1..)); - assert_eq!(Some(0..=u32::MAX), convert_range_to_inclusive(..)); - assert_eq!(Some(16..=16), convert_range_to_inclusive(16..=16)); + assert_eq!(Ok(1..=5), convert_range_to_inclusive(1..6)); + assert_eq!(Ok(1..=u32::MAX), convert_range_to_inclusive(1..)); + assert_eq!(Ok(0..=u32::MAX), convert_range_to_inclusive(..)); + assert_eq!(Ok(16..=16), convert_range_to_inclusive(16..=16)); assert_eq!( - Some(11..=19), + Ok(11..=19), convert_range_to_inclusive((Bound::Excluded(10), Bound::Excluded(20))) ); - assert_eq!(None, convert_range_to_inclusive(0..0)); - assert_eq!(None, convert_range_to_inclusive(5..5)); - assert_eq!(None, convert_range_to_inclusive(1..0)); - assert_eq!(None, convert_range_to_inclusive(10..5)); + assert_eq!(Err(ConvertRangeError::Empty), convert_range_to_inclusive(0..0)); + assert_eq!(Err(ConvertRangeError::Empty), convert_range_to_inclusive(5..5)); + assert_eq!(Err(ConvertRangeError::StartGreaterThanEnd), convert_range_to_inclusive(1..0)); + assert_eq!(Err(ConvertRangeError::StartGreaterThanEnd), convert_range_to_inclusive(10..5)); assert_eq!( - None, + Err(ConvertRangeError::Empty), convert_range_to_inclusive((Bound::Excluded(u32::MAX), Bound::Included(u32::MAX))) ); assert_eq!( - None, - convert_range_to_inclusive((Bound::Excluded(u32::MAX), Bound::Included(u32::MAX))) + Err(ConvertRangeError::StartAndEndEqualExcluded), + convert_range_to_inclusive((Bound::Excluded(u32::MAX), Bound::Excluded(u32::MAX))) + ); + assert_eq!( + Err(ConvertRangeError::Empty), + convert_range_to_inclusive((Bound::Excluded(0), Bound::Included(0))) ); - assert_eq!(None, convert_range_to_inclusive((Bound::Excluded(0), Bound::Included(0)))); } } diff --git a/roaring/tests/iter_range.rs b/roaring/tests/iter_range.rs new file mode 100644 index 00000000..7953c396 --- /dev/null +++ b/roaring/tests/iter_range.rs @@ -0,0 +1,95 @@ +use proptest::collection::btree_set; +use proptest::prelude::*; +use roaring::RoaringBitmap; +use std::ops::Bound; + +#[test] +fn range_array() { + let mut rb = RoaringBitmap::new(); + rb.insert(0); + rb.insert(1); + rb.insert(10); + rb.insert(100_000); + rb.insert(999_999); + rb.insert(1_000_000); + + let expected = vec![1, 10, 100_000, 999_999]; + let actual: Vec = rb.range(1..=999_999).collect(); + assert_eq!(expected, actual); +} + +#[test] +fn range_bitmap() { + let rb = RoaringBitmap::from_sorted_iter(10..5000).unwrap(); + + let expected = vec![10, 11, 12]; + let actual: Vec = rb.range(0..13).collect(); + assert_eq!(expected, actual); +} + +#[test] +fn empty_range() { + let rb = RoaringBitmap::from_sorted_iter(10..5000).unwrap(); + + let mut it = rb.range(0..0); + assert_eq!(it.next(), None); + let mut it = rb.range(..0); + assert_eq!(it.next(), None); + it = rb.range(13..13); + assert_eq!(it.next(), None); + it = rb.range((Bound::Excluded(1), Bound::Included(1))); + assert_eq!(it.next(), None); + it = rb.range(u32::MAX..u32::MAX); + assert_eq!(it.next(), None); + it = rb.range((Bound::Excluded(u32::MAX), Bound::Included(u32::MAX))); + assert_eq!(it.next(), None); + it = rb.range((Bound::Excluded(u32::MAX), Bound::Unbounded)); + assert_eq!(it.next(), None); +} + +#[test] +#[should_panic(expected = "range start is greater than range end")] +fn invalid_range() { + let rb = RoaringBitmap::from_sorted_iter(10..5000).unwrap(); + #[allow(clippy::reversed_empty_ranges)] + let _ = rb.range(13..0); +} + +#[test] +#[should_panic(expected = "range start and end are equal and excluded")] +fn invalid_range_equal_excluded() { + let rb = RoaringBitmap::from_sorted_iter(10..5000).unwrap(); + let _ = rb.range((Bound::Excluded(13), Bound::Excluded(13))); +} + +#[test] +#[should_panic(expected = "range start is greater than range end")] +fn into_invalid_range() { + let rb = RoaringBitmap::from_sorted_iter(10..5000).unwrap(); + #[allow(clippy::reversed_empty_ranges)] + let _ = rb.into_range(13..0); +} + +#[test] +#[should_panic(expected = "range start and end are equal and excluded")] +fn into_invalid_range_equal_excluded() { + let rb = RoaringBitmap::from_sorted_iter(10..5000).unwrap(); + let _ = rb.into_range((Bound::Excluded(13), Bound::Excluded(13))); +} + +proptest! { + #[test] + fn proptest_range( + values in btree_set(..=262_143_u32, ..=1000), + range_a in 0u32..262_143, + range_b in 0u32..262_143, + ){ + let range = range_a.min(range_b)..=range_a.max(range_b); + + let bitmap = RoaringBitmap::from_sorted_iter(values.iter().cloned()).unwrap(); + let expected: Vec = values.range(range.clone()).copied().collect(); + let actual: Vec = bitmap.range(range.clone()).collect(); + + assert_eq!(expected, actual); + } +}