From ef62affc6e638882f428b69f7857d3b37a89ae18 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 29 Mar 2023 20:32:25 +0200 Subject: [PATCH 1/5] revamp bench suite --- crates/re_arrow_store/benches/data_store.rs | 237 ++++++++++++-------- 1 file changed, 146 insertions(+), 91 deletions(-) diff --git a/crates/re_arrow_store/benches/data_store.rs b/crates/re_arrow_store/benches/data_store.rs index cd055537b0a9..056f25464de8 100644 --- a/crates/re_arrow_store/benches/data_store.rs +++ b/crates/re_arrow_store/benches/data_store.rs @@ -8,88 +8,113 @@ use re_arrow_store::{DataStore, DataStoreConfig, LatestAtQuery, RangeQuery, Time use re_log_types::{ component_types::{InstanceKey, Rect2D}, datagen::{build_frame_nr, build_some_instances, build_some_rects}, - Component as _, ComponentName, DataRow, EntityPath, MsgId, TimeType, Timeline, + Component as _, ComponentName, DataRow, DataTable, EntityPath, MsgId, TimeType, Timeline, }; // --- #[cfg(not(debug_assertions))] -const NUM_FRAMES: i64 = 100; +const NUM_ROWS: i64 = 1_000; #[cfg(not(debug_assertions))] -const NUM_RECTS: i64 = 100; +const NUM_INSTANCES: i64 = 1_000; // `cargo test` also runs the benchmark setup code, so make sure they run quickly: #[cfg(debug_assertions)] -const NUM_FRAMES: i64 = 1; +const NUM_ROWS: i64 = 1; #[cfg(debug_assertions)] -const NUM_RECTS: i64 = 1; +const NUM_INSTANCES: i64 = 1; // --- Benchmarks --- -// TODO(cmc): need additional benches for full tables - fn insert(c: &mut Criterion) { - { - let rows = build_rows(NUM_RECTS as usize); - let mut group = c.benchmark_group("datastore/insert/batch/rects"); + for packed in [false, true] { + let mut group = c.benchmark_group(format!( + "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/packed={packed}/insert" + )); group.throughput(criterion::Throughput::Elements( - (NUM_RECTS * NUM_FRAMES) as _, + (NUM_INSTANCES * NUM_ROWS) as _, )); - group.bench_function("insert", |b| { - b.iter(|| insert_rows(Default::default(), InstanceKey::name(), rows.iter())); + + let table = build_table(NUM_INSTANCES as usize, packed); + + // Default config + group.bench_function("default", |b| { + b.iter(|| insert_table(Default::default(), InstanceKey::name(), &table)); }); + + // Emulate more or less buckets + let num_rows_per_bucket = [0, 2, 32, 2048]; + for num_rows_per_bucket in num_rows_per_bucket { + group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { + b.iter(|| { + insert_table( + DataStoreConfig { + index_bucket_nb_rows: num_rows_per_bucket, + component_bucket_nb_rows: num_rows_per_bucket, + ..Default::default() + }, + InstanceKey::name(), + &table, + ) + }); + }); + } } } -fn latest_at_batch(c: &mut Criterion) { - { - let rows = build_rows(NUM_RECTS as usize); - let store = insert_rows(Default::default(), InstanceKey::name(), rows.iter()); - let mut group = c.benchmark_group("datastore/latest_at/batch/rects"); - group.throughput(criterion::Throughput::Elements(NUM_RECTS as _)); - group.bench_function("query", |b| { - b.iter(|| { - let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); - let rects = results[0] - .as_ref() - .unwrap() - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(NUM_RECTS as usize, rects.len()); - }); +fn latest_at(c: &mut Criterion) { + for packed in [false, true] { + let mut group = c.benchmark_group(format!( + "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/packed={packed}/latest_at" + )); + group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); + + let table = build_table(NUM_INSTANCES as usize, packed); + let store = insert_table(Default::default(), InstanceKey::name(), &table); + + // Default config + group.bench_function("default", |b| { + b.iter(|| insert_table(Default::default(), InstanceKey::name(), &table)); }); + + // Emulate more or less buckets + let num_rows_per_bucket = [0, 2, 32, 2048]; + for num_rows_per_bucket in num_rows_per_bucket { + group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { + b.iter(|| { + let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); + let rects = results[0] + .as_ref() + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(NUM_INSTANCES as usize, rects.len()); + }); + }); + } } } -fn latest_at_missing_components(c: &mut Criterion) { - // Simulate the worst possible case: many many buckets. - let config = DataStoreConfig { - index_bucket_size_bytes: 0, - index_bucket_nb_rows: 0, - ..Default::default() - }; - - { - let msgs = build_rows(NUM_RECTS as usize); - let store = insert_rows(config.clone(), InstanceKey::name(), msgs.iter()); - let mut group = c.benchmark_group("datastore/latest_at/missing_components"); - group.throughput(criterion::Throughput::Elements(NUM_RECTS as _)); - group.bench_function("primary", |b| { +fn latest_at_missing(c: &mut Criterion) { + for packed in [false, true] { + let mut group = c.benchmark_group(format!( + "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/packed={packed}/latest_at_missing" + )); + group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); + + let table = build_table(NUM_INSTANCES as usize, packed); + let store = insert_table(Default::default(), InstanceKey::name(), &table); + + // Default config + group.bench_function("primary/default", |b| { b.iter(|| { let results = latest_data_at(&store, "non_existing_component".into(), &[Rect2D::name()]); assert!(results[0].is_none()); }); }); - } - - { - let msgs = build_rows(NUM_RECTS as usize); - let store = insert_rows(config, InstanceKey::name(), msgs.iter()); - let mut group = c.benchmark_group("datastore/latest_at/missing_components"); - group.throughput(criterion::Throughput::Elements(NUM_RECTS as _)); - group.bench_function("secondaries", |b| { + group.bench_function("secondaries/default", |b| { b.iter(|| { let results = latest_data_at( &store, @@ -105,51 +130,76 @@ fn latest_at_missing_components(c: &mut Criterion) { assert!(results[2].is_none()); }); }); - } -} - -fn range_batch(c: &mut Criterion) { - { - let msgs = build_rows(NUM_RECTS as usize); - let store = insert_rows(Default::default(), InstanceKey::name(), msgs.iter()); - let mut group = c.benchmark_group("datastore/range/batch/rects"); - group.throughput(criterion::Throughput::Elements( - (NUM_RECTS * NUM_FRAMES) as _, - )); - group.bench_function("query", |b| { - b.iter(|| { - let msgs = range_data(&store, [Rect2D::name()]); - for (cur_time, (time, results)) in msgs.enumerate() { - let time = time.unwrap(); - assert_eq!(cur_time as i64, time.as_i64()); + // Emulate more or less buckets + let num_rows_per_bucket = [0, 2, 32, 2048]; + for num_rows_per_bucket in num_rows_per_bucket { + group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { + b.iter(|| { + let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); let rects = results[0] .as_ref() .unwrap() .as_any() .downcast_ref::() .unwrap(); - assert_eq!(NUM_RECTS as usize, rects.len()); - } + assert_eq!(NUM_INSTANCES as usize, rects.len()); + }); }); + } + } +} + +fn range(c: &mut Criterion) { + for packed in [false, true] { + let mut group = c.benchmark_group(format!( + "datastore/num_rows={NUM_ROWS}/num_instances={NUM_INSTANCES}/packed={packed}/range" + )); + group.throughput(criterion::Throughput::Elements( + (NUM_INSTANCES * NUM_ROWS) as _, + )); + + let table = build_table(NUM_INSTANCES as usize, packed); + let store = insert_table(Default::default(), InstanceKey::name(), &table); + + // Default config + group.bench_function("default", |b| { + b.iter(|| insert_table(Default::default(), InstanceKey::name(), &table)); }); + + // Emulate more or less buckets + let num_rows_per_bucket = [0, 2, 32, 2048]; + for num_rows_per_bucket in num_rows_per_bucket { + group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { + b.iter(|| { + let msgs = range_data(&store, [Rect2D::name()]); + for (cur_time, (time, results)) in msgs.enumerate() { + let time = time.unwrap(); + assert_eq!(cur_time as i64, time.as_i64()); + + let rects = results[0] + .as_ref() + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(NUM_INSTANCES as usize, rects.len()); + } + }); + }); + } } } -criterion_group!( - benches, - insert, - latest_at_batch, - latest_at_missing_components, - range_batch, -); +criterion_group!(benches, insert, latest_at, latest_at_missing, range); criterion_main!(benches); // --- Helpers --- -fn build_rows(n: usize) -> Vec { - (0..NUM_FRAMES) - .map(move |frame_idx| { +fn build_table(n: usize, packed: bool) -> DataTable { + let mut table = DataTable::from_rows( + MsgId::ZERO, + (0..NUM_ROWS).map(move |frame_idx| { DataRow::from_cells2( MsgId::random(), "rects", @@ -157,17 +207,25 @@ fn build_rows(n: usize) -> Vec { n as _, (build_some_instances(n), build_some_rects(n)), ) - }) - .collect() + }), + ); + + // Do a serialization roundtrip to pack everything in contiguous memory. + if packed { + let (schema, columns) = table.serialize().unwrap(); + table = DataTable::deserialize(MsgId::ZERO, &schema, &columns).unwrap(); + } + + table } -fn insert_rows<'a>( +fn insert_table( config: DataStoreConfig, cluster_key: ComponentName, - rows: impl Iterator, + table: &DataTable, ) -> DataStore { let mut store = DataStore::new(cluster_key, config); - rows.for_each(|row| store.insert_row(row).unwrap()); + store.insert_table(table).unwrap(); store } @@ -177,7 +235,7 @@ fn latest_data_at( secondaries: &[ComponentName; N], ) -> [Option>; N] { let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - let timeline_query = LatestAtQuery::new(timeline_frame_nr, (NUM_FRAMES / 2).into()); + let timeline_query = LatestAtQuery::new(timeline_frame_nr, (NUM_ROWS / 2).into()); let ent_path = EntityPath::from("rects"); let row_indices = store @@ -191,10 +249,7 @@ fn range_data( components: [ComponentName; N], ) -> impl Iterator, [Option>; N])> + '_ { let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); - let query = RangeQuery::new( - timeline_frame_nr, - TimeRange::new(0.into(), NUM_FRAMES.into()), - ); + let query = RangeQuery::new(timeline_frame_nr, TimeRange::new(0.into(), NUM_ROWS.into())); let ent_path = EntityPath::from("rects"); store From 9dad206ab640abd2b672050f45d0a9416464078b Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 29 Mar 2023 20:57:53 +0200 Subject: [PATCH 2/5] i kinda forgot to, you know, put the code in --- crates/re_arrow_store/benches/data_store.rs | 31 +++++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/crates/re_arrow_store/benches/data_store.rs b/crates/re_arrow_store/benches/data_store.rs index 056f25464de8..73a0e064e469 100644 --- a/crates/re_arrow_store/benches/data_store.rs +++ b/crates/re_arrow_store/benches/data_store.rs @@ -70,7 +70,6 @@ fn latest_at(c: &mut Criterion) { group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); let table = build_table(NUM_INSTANCES as usize, packed); - let store = insert_table(Default::default(), InstanceKey::name(), &table); // Default config group.bench_function("default", |b| { @@ -80,6 +79,15 @@ fn latest_at(c: &mut Criterion) { // Emulate more or less buckets let num_rows_per_bucket = [0, 2, 32, 2048]; for num_rows_per_bucket in num_rows_per_bucket { + let store = insert_table( + DataStoreConfig { + index_bucket_nb_rows: num_rows_per_bucket, + component_bucket_nb_rows: num_rows_per_bucket, + ..Default::default() + }, + InstanceKey::name(), + &table, + ); group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { b.iter(|| { let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); @@ -104,9 +112,9 @@ fn latest_at_missing(c: &mut Criterion) { group.throughput(criterion::Throughput::Elements(NUM_INSTANCES as _)); let table = build_table(NUM_INSTANCES as usize, packed); - let store = insert_table(Default::default(), InstanceKey::name(), &table); // Default config + let store = insert_table(Default::default(), InstanceKey::name(), &table); group.bench_function("primary/default", |b| { b.iter(|| { let results = @@ -134,6 +142,15 @@ fn latest_at_missing(c: &mut Criterion) { // Emulate more or less buckets let num_rows_per_bucket = [0, 2, 32, 2048]; for num_rows_per_bucket in num_rows_per_bucket { + let store = insert_table( + DataStoreConfig { + index_bucket_nb_rows: num_rows_per_bucket, + component_bucket_nb_rows: num_rows_per_bucket, + ..Default::default() + }, + InstanceKey::name(), + &table, + ); group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { b.iter(|| { let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); @@ -160,7 +177,6 @@ fn range(c: &mut Criterion) { )); let table = build_table(NUM_INSTANCES as usize, packed); - let store = insert_table(Default::default(), InstanceKey::name(), &table); // Default config group.bench_function("default", |b| { @@ -170,6 +186,15 @@ fn range(c: &mut Criterion) { // Emulate more or less buckets let num_rows_per_bucket = [0, 2, 32, 2048]; for num_rows_per_bucket in num_rows_per_bucket { + let store = insert_table( + DataStoreConfig { + index_bucket_nb_rows: num_rows_per_bucket, + component_bucket_nb_rows: num_rows_per_bucket, + ..Default::default() + }, + InstanceKey::name(), + &table, + ); group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { b.iter(|| { let msgs = range_data(&store, [Rect2D::name()]); From a574423234d03f234703d1d214daaaa599555d1f Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 29 Mar 2023 21:04:24 +0200 Subject: [PATCH 3/5] make sure nothing we dont get surprised by size limits --- crates/re_arrow_store/benches/data_store.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/re_arrow_store/benches/data_store.rs b/crates/re_arrow_store/benches/data_store.rs index 73a0e064e469..aaf4684d74a7 100644 --- a/crates/re_arrow_store/benches/data_store.rs +++ b/crates/re_arrow_store/benches/data_store.rs @@ -51,6 +51,8 @@ fn insert(c: &mut Criterion) { DataStoreConfig { index_bucket_nb_rows: num_rows_per_bucket, component_bucket_nb_rows: num_rows_per_bucket, + index_bucket_size_bytes: u64::MAX, + component_bucket_size_bytes: u64::MAX, ..Default::default() }, InstanceKey::name(), @@ -83,6 +85,8 @@ fn latest_at(c: &mut Criterion) { DataStoreConfig { index_bucket_nb_rows: num_rows_per_bucket, component_bucket_nb_rows: num_rows_per_bucket, + index_bucket_size_bytes: u64::MAX, + component_bucket_size_bytes: u64::MAX, ..Default::default() }, InstanceKey::name(), @@ -146,6 +150,8 @@ fn latest_at_missing(c: &mut Criterion) { DataStoreConfig { index_bucket_nb_rows: num_rows_per_bucket, component_bucket_nb_rows: num_rows_per_bucket, + index_bucket_size_bytes: u64::MAX, + component_bucket_size_bytes: u64::MAX, ..Default::default() }, InstanceKey::name(), @@ -190,6 +196,8 @@ fn range(c: &mut Criterion) { DataStoreConfig { index_bucket_nb_rows: num_rows_per_bucket, component_bucket_nb_rows: num_rows_per_bucket, + index_bucket_size_bytes: u64::MAX, + component_bucket_size_bytes: u64::MAX, ..Default::default() }, InstanceKey::name(), From b624d41d2a4d825e05f742cc73c1c61b907f7818 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 29 Mar 2023 21:12:51 +0200 Subject: [PATCH 4/5] tired copy pasting is dangerous copy pasting --- crates/re_arrow_store/benches/data_store.rs | 47 +++++++++++++++------ 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/crates/re_arrow_store/benches/data_store.rs b/crates/re_arrow_store/benches/data_store.rs index aaf4684d74a7..c0e5792e33b8 100644 --- a/crates/re_arrow_store/benches/data_store.rs +++ b/crates/re_arrow_store/benches/data_store.rs @@ -11,6 +11,9 @@ use re_log_types::{ Component as _, ComponentName, DataRow, DataTable, EntityPath, MsgId, TimeType, Timeline, }; +criterion_group!(benches, insert, latest_at, latest_at_missing, range); +criterion_main!(benches); + // --- #[cfg(not(debug_assertions))] @@ -75,7 +78,17 @@ fn latest_at(c: &mut Criterion) { // Default config group.bench_function("default", |b| { - b.iter(|| insert_table(Default::default(), InstanceKey::name(), &table)); + let store = insert_table(Default::default(), InstanceKey::name(), &table); + b.iter(|| { + let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); + let rects = results[0] + .as_ref() + .unwrap() + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(NUM_INSTANCES as usize, rects.len()); + }); }); // Emulate more or less buckets @@ -157,16 +170,27 @@ fn latest_at_missing(c: &mut Criterion) { InstanceKey::name(), &table, ); - group.bench_function(format!("bucketsz={num_rows_per_bucket}"), |b| { + group.bench_function(format!("primary/bucketsz={num_rows_per_bucket}"), |b| { b.iter(|| { - let results = latest_data_at(&store, Rect2D::name(), &[Rect2D::name()]); - let rects = results[0] - .as_ref() - .unwrap() - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!(NUM_INSTANCES as usize, rects.len()); + let results = + latest_data_at(&store, "non_existing_component".into(), &[Rect2D::name()]); + assert!(results[0].is_none()); + }); + }); + group.bench_function("secondaries/bucketsz={num_rows_per_bucket}", |b| { + b.iter(|| { + let results = latest_data_at( + &store, + Rect2D::name(), + &[ + "non_existing_component1".into(), + "non_existing_component2".into(), + "non_existing_component3".into(), + ], + ); + assert!(results[0].is_none()); + assert!(results[1].is_none()); + assert!(results[2].is_none()); }); }); } @@ -224,9 +248,6 @@ fn range(c: &mut Criterion) { } } -criterion_group!(benches, insert, latest_at, latest_at_missing, range); -criterion_main!(benches); - // --- Helpers --- fn build_table(n: usize, packed: bool) -> DataTable { From 29626cd4595c55fcda792a46ec16a1352b773c32 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 29 Mar 2023 21:19:25 +0200 Subject: [PATCH 5/5] bruh --- crates/re_arrow_store/benches/data_store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_arrow_store/benches/data_store.rs b/crates/re_arrow_store/benches/data_store.rs index c0e5792e33b8..cb8517114743 100644 --- a/crates/re_arrow_store/benches/data_store.rs +++ b/crates/re_arrow_store/benches/data_store.rs @@ -177,7 +177,7 @@ fn latest_at_missing(c: &mut Criterion) { assert!(results[0].is_none()); }); }); - group.bench_function("secondaries/bucketsz={num_rows_per_bucket}", |b| { + group.bench_function(format!("secondaries/bucketsz={num_rows_per_bucket}"), |b| { b.iter(|| { let results = latest_data_at( &store,