From 803a1cd117d9a4d4e38e2c7ba752c3165749048a Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Fri, 22 Dec 2023 17:13:18 +0100 Subject: [PATCH] Primary caching 1: more `VecDeque` extensions (#4592) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding some more `VecDeque` extensions that are used by the upcoming cache implementation. Also made sure to _not_ run these benchmarks on CI since they are only useful when iterating specifically on the implementation of these extensions and are not worth the CI compute time otherwise. Some numbers for posterity / git log (5950X, Arch): ``` vec_deque/insert_range/prefilled/back 1.00 5.6±0.07µs 170.8 MElem/sec vec_deque/insert_range/prefilled/front 1.00 5.5±0.12µs 172.4 MElem/sec vec_deque/insert_range/prefilled/middle 1.00 7.2±0.28µs 131.7 MElem/sec vec_deque/remove/prefilled/back 1.00 2.5±0.04µs 384.2 KElem/sec vec_deque/remove/prefilled/front 1.00 2.5±0.03µs 390.7 KElem/sec vec_deque/remove/prefilled/middle 1.00 3.3±0.13µs 292.1 KElem/sec vec_deque/remove_range/prefilled/back 1.00 2.5±0.12µs 375.8 MElem/sec vec_deque/remove_range/prefilled/front 1.00 2.5±0.11µs 378.4 MElem/sec vec_deque/remove_range/prefilled/middle 1.00 5.2±0.25µs 182.7 MElem/sec vec_deque/swap_remove/prefilled/back 1.00 2.6±0.02µs 381.6 KElem/sec vec_deque/swap_remove/prefilled/front 1.00 2.6±0.05µs 380.8 KElem/sec vec_deque/swap_remove/prefilled/middle 1.00 2.5±0.08µs 383.3 KElem/sec vec_deque/swap_remove_front/prefilled/back 1.00 2.5±0.05µs 392.0 KElem/sec vec_deque/swap_remove_front/prefilled/front 1.00 2.5±0.10µs 394.2 KElem/sec vec_deque/swap_remove_front/prefilled/middle 1.00 2.6±0.02µs 378.8 KElem/sec ``` --- Part of the primary caching series of PR (index search, joins, deserialization): - #4592 - #4593 --- crates/re_log_types/benches/vec_deque_ext.rs | 262 +++++++++++++------ crates/re_log_types/src/lib.rs | 2 +- crates/re_log_types/src/vec_deque_ext.rs | 153 ++++++++++- 3 files changed, 331 insertions(+), 86 deletions(-) diff --git a/crates/re_log_types/benches/vec_deque_ext.rs b/crates/re_log_types/benches/vec_deque_ext.rs index 0cce9e2617f4..4fe00e89f5c1 100644 --- a/crates/re_log_types/benches/vec_deque_ext.rs +++ b/crates/re_log_types/benches/vec_deque_ext.rs @@ -1,18 +1,26 @@ //! Simple benchmark suite to keep track of how the different removal methods for [`VecDeque`] //! behave in practice. -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - use std::collections::VecDeque; use criterion::{criterion_group, criterion_main, Criterion}; +use itertools::Itertools; -use re_log_types::VecDequeRemovalExt as _; +use re_log_types::{VecDequeInsertionExt as _, VecDequeRemovalExt as _}; // --- -criterion_group!(benches, remove, swap_remove, swap_remove_front); +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +criterion_group!( + benches, + insert_range, + remove_range, + remove, + swap_remove, + swap_remove_front, +); criterion_main!(benches); // --- @@ -21,11 +29,13 @@ criterion_main!(benches); #[cfg(debug_assertions)] mod constants { pub const INITIAL_NUM_ENTRIES: usize = 1; + pub const NUM_MODIFIED_ELEMENTS: usize = 1; } #[cfg(not(debug_assertions))] mod constants { pub const INITIAL_NUM_ENTRIES: usize = 20_000; + pub const NUM_MODIFIED_ELEMENTS: usize = 1_000; } #[allow(clippy::wildcard_imports)] @@ -33,97 +43,189 @@ use self::constants::*; // --- -fn remove(c: &mut Criterion) { - { - let mut group = c.benchmark_group("flat_vec_deque"); - group.throughput(criterion::Throughput::Elements(1)); - group.bench_function("remove/prefilled/front", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.remove(0); - v - }); +fn insert_range(c: &mut Criterion) { + if std::env::var("CI").is_ok() { + return; + } + + let inserted = (0..NUM_MODIFIED_ELEMENTS as i64).collect_vec(); + + let mut group = c.benchmark_group("vec_deque"); + group.throughput(criterion::Throughput::Elements(inserted.len() as _)); + + group.bench_function("insert_range/prefilled/front", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.insert_range(0, inserted.clone().into_iter()); + v }); - group.bench_function("remove/prefilled/middle", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.remove(INITIAL_NUM_ENTRIES / 2); - v - }); + }); + + group.bench_function("insert_range/prefilled/middle", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.insert_range(INITIAL_NUM_ENTRIES / 2, inserted.clone().into_iter()); + v }); - group.bench_function("remove/prefilled/back", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.remove(INITIAL_NUM_ENTRIES - 1); - v - }); + }); + + group.bench_function("insert_range/prefilled/back", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.insert_range(INITIAL_NUM_ENTRIES, inserted.clone().into_iter()); + v }); + }); +} + +fn remove_range(c: &mut Criterion) { + if std::env::var("CI").is_ok() { + return; } + + let mut group = c.benchmark_group("vec_deque"); + group.throughput(criterion::Throughput::Elements(NUM_MODIFIED_ELEMENTS as _)); + + group.bench_function("remove_range/prefilled/front", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.remove_range(0..NUM_MODIFIED_ELEMENTS); + v + }); + }); + + group.bench_function("remove_range/prefilled/middle", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.remove_range( + INITIAL_NUM_ENTRIES / 2 - NUM_MODIFIED_ELEMENTS / 2 + ..INITIAL_NUM_ENTRIES / 2 + NUM_MODIFIED_ELEMENTS / 2, + ); + v + }); + }); + + group.bench_function("remove_range/prefilled/back", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.remove_range(INITIAL_NUM_ENTRIES - NUM_MODIFIED_ELEMENTS..INITIAL_NUM_ENTRIES); + v + }); + }); } -fn swap_remove(c: &mut Criterion) { - { - let mut group = c.benchmark_group("flat_vec_deque"); - group.throughput(criterion::Throughput::Elements(1)); - group.bench_function("swap_remove/prefilled/front", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.swap_remove(0); - v - }); +fn remove(c: &mut Criterion) { + if std::env::var("CI").is_ok() { + return; + } + + let mut group = c.benchmark_group("vec_deque"); + group.throughput(criterion::Throughput::Elements(1)); + + group.bench_function("remove/prefilled/front", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.remove(0); + v }); - group.bench_function("swap_remove/prefilled/middle", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.swap_remove(INITIAL_NUM_ENTRIES / 2); - v - }); + }); + + group.bench_function("remove/prefilled/middle", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.remove(INITIAL_NUM_ENTRIES / 2); + v }); - group.bench_function("swap_remove/prefilled/back", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.swap_remove(INITIAL_NUM_ENTRIES - 1); - v - }); + }); + + group.bench_function("remove/prefilled/back", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.remove(INITIAL_NUM_ENTRIES - 1); + v }); + }); +} + +fn swap_remove(c: &mut Criterion) { + if std::env::var("CI").is_ok() { + return; } + + let mut group = c.benchmark_group("vec_deque"); + group.throughput(criterion::Throughput::Elements(1)); + + group.bench_function("swap_remove/prefilled/front", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.swap_remove(0); + v + }); + }); + + group.bench_function("swap_remove/prefilled/middle", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.swap_remove(INITIAL_NUM_ENTRIES / 2); + v + }); + }); + + group.bench_function("swap_remove/prefilled/back", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.swap_remove(INITIAL_NUM_ENTRIES - 1); + v + }); + }); } fn swap_remove_front(c: &mut Criterion) { - { - let mut group = c.benchmark_group("flat_vec_deque"); - group.throughput(criterion::Throughput::Elements(1)); - group.bench_function("swap_remove_front/prefilled/front", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.swap_remove_front(0); - v - }); + if std::env::var("CI").is_ok() { + return; + } + + let mut group = c.benchmark_group("vec_deque"); + group.throughput(criterion::Throughput::Elements(1)); + + group.bench_function("swap_remove_front/prefilled/front", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.swap_remove_front(0); + v }); - group.bench_function("swap_remove_front/prefilled/middle", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.swap_remove_front(INITIAL_NUM_ENTRIES / 2); - v - }); + }); + + group.bench_function("swap_remove_front/prefilled/middle", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.swap_remove_front(INITIAL_NUM_ENTRIES / 2); + v }); - group.bench_function("swap_remove_front/prefilled/back", |b| { - let base = create_prefilled(); - b.iter(|| { - let mut v: VecDeque = base.clone(); - v.swap_remove_front(INITIAL_NUM_ENTRIES - 1); - v - }); + }); + + group.bench_function("swap_remove_front/prefilled/back", |b| { + let base = create_prefilled(); + b.iter(|| { + let mut v: VecDeque = base.clone(); + v.swap_remove_front(INITIAL_NUM_ENTRIES - 1); + v }); - } + }); } // --- diff --git a/crates/re_log_types/src/lib.rs b/crates/re_log_types/src/lib.rs index 329a4683fd2d..52fe26104a48 100644 --- a/crates/re_log_types/src/lib.rs +++ b/crates/re_log_types/src/lib.rs @@ -54,7 +54,7 @@ pub use self::time::{Duration, Time, TimeZone}; pub use self::time_point::{TimeInt, TimePoint, TimeType, Timeline, TimelineName}; pub use self::time_range::{TimeRange, TimeRangeF}; pub use self::time_real::TimeReal; -pub use self::vec_deque_ext::{VecDequeRemovalExt, VecDequeSortingExt}; +pub use self::vec_deque_ext::{VecDequeInsertionExt, VecDequeRemovalExt, VecDequeSortingExt}; #[cfg(not(target_arch = "wasm32"))] pub use self::data_table_batcher::{ diff --git a/crates/re_log_types/src/vec_deque_ext.rs b/crates/re_log_types/src/vec_deque_ext.rs index 5b0ef51ab115..54ef233253b6 100644 --- a/crates/re_log_types/src/vec_deque_ext.rs +++ b/crates/re_log_types/src/vec_deque_ext.rs @@ -1,5 +1,6 @@ -use std::collections::VecDeque; +use std::{collections::VecDeque, ops::Range}; +// -- // --- /// Extends [`VecDeque`] with extra sorting routines. @@ -71,6 +72,67 @@ fn is_sorted() { // --- +/// Extends [`VecDeque`] with extra insertion routines. +pub trait VecDequeInsertionExt { + /// Inserts multiple elements at `index` within the deque, shifting all elements + /// with indices greater than or equal to `index` towards the back. + /// + /// This is O(1) if `index` corresponds to either the start or the end of the deque. + /// Otherwise, this means splitting the deque into two pieces then stitching them back together + /// at both ends of the added data. + /// + /// Panics if `index` is out of bounds. + fn insert_range(&mut self, index: usize, values: impl ExactSizeIterator); +} + +impl VecDequeInsertionExt for VecDeque { + fn insert_range(&mut self, index: usize, values: impl ExactSizeIterator) { + if index == self.len() { + self.extend(values); // has a specialization fast-path builtin + } else if index == 0 { + let n = values.len(); + self.extend(values); + self.rotate_right(n); + } else { + let right = self.split_off(index); + + // NOTE: definitely more elegant, but _much_ slower :( + // self.extend(values); + // self.extend(right); + + *self = std::mem::take(self) + .into_iter() + .chain(values) + .chain(right) + .collect(); + } + } +} + +#[test] +fn insert_range() { + let mut v: VecDeque = vec![].into(); + + assert!(v.is_empty()); + + v.insert_range(0, [1, 2, 3].into_iter()); + assert_deque_eq([1, 2, 3], v.clone()); + + v.insert_range(0, [4, 5].into_iter()); + assert_deque_eq([4, 5, 1, 2, 3], v.clone()); + + v.insert_range(2, std::iter::once(6)); + assert_deque_eq([4, 5, 6, 1, 2, 3], v.clone()); + + v.insert_range(v.len(), [7, 8, 9, 10].into_iter()); + assert_deque_eq([4, 5, 6, 1, 2, 3, 7, 8, 9, 10], v.clone()); + + v.insert_range(5, [11, 12].into_iter()); + assert_deque_eq([4, 5, 6, 1, 2, 11, 12, 3, 7, 8, 9, 10], v.clone()); +} + +// --- + /// Extends [`VecDeque`] with extra removal routines. pub trait VecDequeRemovalExt { /// Removes an element from anywhere in the deque and returns it, replacing it with @@ -81,8 +143,6 @@ pub trait VecDequeRemovalExt { /// In either case, this is *O*(1). /// /// Returns `None` if `index` is out of bounds. - /// - /// Element at index 0 is the front of the queue. fn swap_remove(&mut self, index: usize) -> Option; /// Splits the deque into two at the given index. @@ -93,9 +153,23 @@ pub trait VecDequeRemovalExt { /// If `at` is equal or greater than the length, the returned `VecDeque` is empty. /// /// Note that the capacity of `self` does not change. - /// - /// Element at index 0 is the front of the queue. fn split_off_or_default(&mut self, at: usize) -> Self; + + /// Removes and returns the elements in the given `range` from the deque. + /// + /// This is O(1) if `range` either starts at the beginning of the deque, or ends at the end of + /// the deque, or both. + /// Otherwise, this means splitting the deque into three pieces, dropping the middle one, then + /// stitching back the remaining two. + /// + /// This doesn't do any kind of element re-ordering: if the deque was sorted before, it's + /// still sorted after. + /// + /// Panics if `index` is out of bounds. + // + // NOTE: We take a `Range` rather than a `impl RangeBounds` because we rely on the fact that + // `range` must be contiguous. + fn remove_range(&mut self, range: Range); } impl VecDequeRemovalExt for VecDeque { @@ -128,6 +202,27 @@ impl VecDequeRemovalExt for VecDeque { } self.split_off(at) } + + #[inline] + fn remove_range(&mut self, range: Range) { + if range.start == 0 && range.end == self.len() { + self.clear(); + } else if range.start == 0 { + self.rotate_left(range.len()); + self.truncate(self.len() - range.len()); + } else if range.end == self.len() { + self.truncate(self.len() - range.len()); + } else { + // NOTE: More elegant, but also 70% slower (!) + // let mid_and_right = self.split_off(range.start); + // self.extend(mid_and_right.into_iter().skip(range.len())); + + let mut mid_and_right = self.split_off(range.start); + mid_and_right.rotate_left(range.len()); + mid_and_right.truncate(mid_and_right.len() - range.len()); + self.extend(mid_and_right); + } + } } #[test] @@ -159,3 +254,51 @@ fn swap_remove() { assert_eq!(Some(5), v.swap_remove(0)); assert!(v.is_sorted()); } + +#[test] +fn remove_range() { + let mut v: VecDeque = vec![].into(); + + assert!(v.is_empty()); + assert!(v.is_sorted()); + + v.insert_range(0, [1, 2, 3, 4, 5, 6, 7, 8, 9].into_iter()); + assert_deque_eq([1, 2, 3, 4, 5, 6, 7, 8, 9], v.clone()); + assert!(v.is_sorted()); + + { + let mut v = v.clone(); + v.remove_range(0..v.len()); + assert!(v.is_empty()); + assert!(v.is_sorted()); + } + + v.remove_range(0..2); + assert_deque_eq([3, 4, 5, 6, 7, 8, 9], v.clone()); + assert!(v.is_sorted()); + + v.remove_range(v.len() - 2..v.len()); + assert_deque_eq([3, 4, 5, 6, 7], v.clone()); + assert!(v.is_sorted()); + + v.remove_range(1..v.len() - 1); + assert_deque_eq([3, 7], v.clone()); + assert!(v.is_sorted()); + + v.remove_range(0..1); + assert_deque_eq([7], v.clone()); + assert!(v.is_sorted()); + + v.remove_range(0..1); + assert_deque_eq([], v.clone()); + assert!(v.is_sorted()); +} + +// --- + +#[cfg(test)] +fn assert_deque_eq(expected: impl IntoIterator, got: impl IntoIterator) { + let expected = expected.into_iter().collect::>(); + let got = got.into_iter().collect::>(); + similar_asserts::assert_eq!(expected, got); +}