Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce insert_range for the Treemap #240

Merged
merged 6 commits into from
Sep 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion benchmarks/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use criterion::{
Throughput,
};

use roaring::{MultiOps, RoaringBitmap};
use roaring::{MultiOps, RoaringBitmap, RoaringTreemap};

use crate::datasets::Datasets;

Expand Down Expand Up @@ -674,6 +674,30 @@ fn insert_range_bitmap(c: &mut Criterion) {
}
}

fn insert_range_treemap(c: &mut Criterion) {
for &size in &[1_000_u64, 10_000u64, 2 * (u32::MAX as u64)] {
let mut group = c.benchmark_group("insert_range_treemap");
group.throughput(criterion::Throughput::Elements(size as u64));
group.bench_function(format!("from_empty_{}", size), |b| {
let bm = RoaringTreemap::new();
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
group.bench_function(format!("pre_populated_{}", size), |b| {
let mut bm = RoaringTreemap::new();
bm.insert_range(0..size);
b.iter_batched(
|| bm.clone(),
|mut bm| black_box(bm.insert_range(0..size)),
criterion::BatchSize::SmallInput,
)
});
}
}

criterion_group!(
benches,
creation,
Expand All @@ -691,6 +715,7 @@ criterion_group!(
remove,
remove_range_bitmap,
insert_range_bitmap,
insert_range_treemap,
iteration,
is_empty,
serialization,
Expand Down
4 changes: 4 additions & 0 deletions src/bitmap/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ impl Container {
pub fn new(key: u16) -> Container {
Container { key, store: Store::new() }
}

pub fn full(key: u16) -> Container {
Container { key, store: Store::full() }
}
}

impl Container {
Expand Down
30 changes: 29 additions & 1 deletion src/bitmap/inherent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,24 @@ impl RoaringBitmap {
///
/// ```rust
/// use roaring::RoaringBitmap;
/// let mut rb = RoaringBitmap::new();
/// let rb = RoaringBitmap::new();
/// ```
pub fn new() -> RoaringBitmap {
RoaringBitmap { containers: Vec::new() }
}

/// Creates a full `RoaringBitmap`.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
/// let rb = RoaringBitmap::full();
/// ```
pub fn full() -> RoaringBitmap {
RoaringBitmap { containers: (0..=u16::MAX).map(Container::full).collect() }
}

/// Adds a value to the set.
///
/// Returns whether the value was absent from the set.
Expand Down Expand Up @@ -428,6 +440,22 @@ impl RoaringBitmap {
self.containers.is_empty()
}

/// Returns `true` if there are every possible integers in this set.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
///
/// let mut rb = RoaringBitmap::full();
/// assert!(!rb.is_empty());
/// assert!(rb.is_full());
/// ```
pub fn is_full(&self) -> bool {
self.containers.len() == (u16::MAX as usize + 1)
&& self.containers.iter().all(Container::is_full)
}

/// Returns the number of distinct integers added to the set.
///
/// # Examples
Expand Down
4 changes: 4 additions & 0 deletions src/bitmap/store/bitmap_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ impl BitmapStore {
BitmapStore { len: 0, bits: Box::new([0; BITMAP_LENGTH]) }
}

pub fn full() -> BitmapStore {
BitmapStore { len: (BITMAP_LENGTH as u64) * 64, bits: Box::new([u64::MAX; BITMAP_LENGTH]) }
}

pub fn try_from(len: u64, bits: Box<[u64; BITMAP_LENGTH]>) -> Result<BitmapStore, Error> {
let actual_len = bits.iter().map(|v| v.count_ones() as u64).sum();
if len != actual_len {
Expand Down
4 changes: 4 additions & 0 deletions src/bitmap/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ impl Store {
Store::Array(ArrayStore::new())
}

pub fn full() -> Store {
Store::Bitmap(BitmapStore::full())
}

pub fn insert(&mut self, index: u16) -> bool {
match self {
Array(vec) => vec.insert(index),
Expand Down
85 changes: 83 additions & 2 deletions src/treemap/inherent.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::collections::btree_map::{BTreeMap, Entry};
use std::iter;
use std::ops::RangeBounds;

use crate::RoaringBitmap;
Expand All @@ -13,12 +14,24 @@ impl RoaringTreemap {
///
/// ```rust
/// use roaring::RoaringTreemap;
/// let mut rb = RoaringTreemap::new();
/// let rb = RoaringTreemap::new();
/// ```
pub fn new() -> RoaringTreemap {
RoaringTreemap { map: BTreeMap::new() }
}

/// Creates a full `RoaringTreemap`.
///
/// # Examples
///
/// ```rust,ignore
/// use roaring::RoaringTreemap;
/// let rb = RoaringTreemap::full();
/// ```
pub fn full() -> RoaringTreemap {
RoaringTreemap { map: (0..=u32::MAX).zip(iter::repeat(RoaringBitmap::full())).collect() }
}

/// Adds a value to the set. Returns `true` if the value was not already present in the set.
///
/// # Examples
Expand All @@ -36,6 +49,60 @@ impl RoaringTreemap {
self.map.entry(hi).or_insert_with(RoaringBitmap::new).insert(lo)
}

/// Inserts a range of values.
///
/// Returns the number of inserted values.
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringTreemap;
///
/// let mut rb = RoaringTreemap::new();
/// rb.insert_range(2..4);
/// assert!(rb.contains(2));
/// assert!(rb.contains(3));
/// assert!(!rb.contains(4));
/// ```
pub fn insert_range<R: RangeBounds<u64>>(&mut self, range: R) -> u64 {
let (start, end) = match util::convert_range_to_inclusive(range) {
Some(range) => (*range.start(), *range.end()),
None => return 0,
};

let (start_hi, start_lo) = util::split(start);
let (end_hi, end_lo) = util::split(end);

let mut counter = 0u64;

// Split the input range by the leading 32 bits
for hi in start_hi..=end_hi {
let entry = self.map.entry(hi);

// Calculate the sub-range from the lower 32 bits
counter += if hi == end_hi && hi == start_hi {
entry.or_insert_with(RoaringBitmap::new).insert_range(start_lo..=end_lo)
} else if hi == start_hi {
entry.or_insert_with(RoaringBitmap::new).insert_range(start_lo..=u32::MAX)
} else if hi == end_hi {
entry.or_insert_with(RoaringBitmap::new).insert_range(0..=end_lo)
} else {
// We insert a full bitmap if it doesn't already exist and return the size of it.
// But if the bitmap already exists at this spot we replace it with a full bitmap
// and specify that we didn't inserted the integers from the previous bitmap.
let full_bitmap = RoaringBitmap::full();
match entry {
Entry::Vacant(entry) => entry.insert(full_bitmap).len(),
Entry::Occupied(mut entry) => {
full_bitmap.len() - entry.insert(full_bitmap).len()
}
}
};
}

counter
}

/// Pushes `value` in the treemap only if it is greater than the current maximum value.
///
/// Returns whether the value was inserted.
Expand All @@ -58,7 +125,6 @@ impl RoaringTreemap {
self.map.entry(hi).or_insert_with(RoaringBitmap::new).push(lo)
}

///
/// Pushes `value` in the treemap only if it is greater than the current maximum value.
/// It is up to the caller to have validated index > self.max()
///
Expand Down Expand Up @@ -213,6 +279,21 @@ impl RoaringTreemap {
self.map.values().all(RoaringBitmap::is_empty)
}

/// Returns `true` if there are every possible integers in this set.
///
/// # Examples
///
/// ```rust,ignore
/// use roaring::RoaringTreemap;
///
/// let mut rb = RoaringTreemap::full();
/// assert!(!rb.is_empty());
/// assert!(rb.is_full());
/// ```
pub fn is_full(&self) -> bool {
self.map.len() == (u32::MAX as usize + 1) && self.map.values().all(RoaringBitmap::is_full)
}

/// Returns the number of distinct integers added to the set.
///
/// # Examples
Expand Down
46 changes: 35 additions & 11 deletions tests/treemap_lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ fn smoke() {
assert!(bitmap.contains(1));
assert_eq!(bitmap.len(), 1);
assert!(!bitmap.is_empty());
bitmap.insert(u64::max_value() - 2);
assert!(bitmap.contains(u64::max_value() - 2));
bitmap.insert(u64::MAX - 2);
assert!(bitmap.contains(u64::MAX - 2));
assert_eq!(bitmap.len(), 2);
bitmap.insert(u64::max_value());
assert!(bitmap.contains(u64::max_value()));
bitmap.insert(u64::MAX);
assert!(bitmap.contains(u64::MAX));
assert_eq!(bitmap.len(), 3);
bitmap.insert(2);
assert!(bitmap.contains(2));
Expand All @@ -28,9 +28,33 @@ fn smoke() {
assert!(!bitmap.contains(0));
assert!(bitmap.contains(1));
assert!(!bitmap.contains(100));
assert!(bitmap.contains(u64::max_value() - 2));
assert!(!bitmap.contains(u64::max_value() - 1));
assert!(bitmap.contains(u64::max_value()));
assert!(bitmap.contains(u64::MAX - 2));
assert!(!bitmap.contains(u64::MAX - 1));
assert!(bitmap.contains(u64::MAX));
}

#[test]
fn insert_range() {
let ranges = 0..0x1000;
const SIGMA: u64 = u32::MAX as u64;

let mut bitmap = RoaringTreemap::new();
assert_eq!(bitmap.insert_range(ranges), 0x1000);
assert_eq!(bitmap.len(), 0x1000);
assert_eq!(bitmap.max(), Some(0xFFF));

assert_eq!(bitmap.insert_range(u32::MAX as u64 - 1..u32::MAX as u64 + 1), 2);
assert!(bitmap.contains(2));
assert!(bitmap.contains(0xFFF));
assert!(!bitmap.contains(0x1000));

bitmap.clear();
bitmap.insert_range(2 * SIGMA..=4 * SIGMA);

assert_eq!(bitmap.min(), Some(2 * SIGMA));
assert_eq!(bitmap.max(), Some(4 * SIGMA));

assert!(bitmap.contains(3 * SIGMA));
}

#[test]
Expand All @@ -53,16 +77,16 @@ fn test_max() {
assert_eq!(bitmap.max(), Some(0));
bitmap.insert(1);
assert_eq!(bitmap.max(), Some(1));
bitmap.insert(u64::max_value());
assert_eq!(bitmap.max(), Some(u64::max_value()));
bitmap.insert(u64::MAX);
assert_eq!(bitmap.max(), Some(u64::MAX));
}

#[test]
fn test_min() {
let mut bitmap = RoaringTreemap::new();
assert_eq!(bitmap.min(), None);
bitmap.insert(u64::max_value());
assert_eq!(bitmap.min(), Some(u64::max_value()));
bitmap.insert(u64::MAX);
assert_eq!(bitmap.min(), Some(u64::MAX));
bitmap.insert(1);
assert_eq!(bitmap.min(), Some(1));
bitmap.insert(0);
Expand Down