From 1786633a88ffba4fd33847b0046b465e4fdf8a85 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Sun, 18 Dec 2022 15:28:55 +0100 Subject: [PATCH] Create matching benchmarks for the arrow datastore (#582) * Create obj_query_benchmark for re_query * Add bench for batch points * Add `bench = false` to re_query/Cargo.toml * Add re_query to list of benchmarked crates Co-authored-by: John Hughes --- .github/workflows/rust.yml | 1 + Cargo.lock | 3 + crates/re_query/Cargo.toml | 13 ++ .../re_query/benches/obj_query_benchmark.rs | 150 ++++++++++++++++++ 4 files changed, 167 insertions(+) create mode 100644 crates/re_query/benches/obj_query_benchmark.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1253db64bad0..63fc84fbb06e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -52,6 +52,7 @@ jobs: cargo bench \ -p re_arrow_store \ -p re_data_store \ + -p re_query \ -p re_tuid \ -- --output-format=bencher | tee output.txt diff --git a/Cargo.lock b/Cargo.lock index daec2c58ed83..15798d2e3e83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3730,9 +3730,11 @@ dependencies = [ "anyhow", "arrow2", "chrono", + "criterion", "document-features", "indent", "itertools", + "mimalloc", "nohash-hasher", "polars-core", "polars-lazy", @@ -3741,6 +3743,7 @@ dependencies = [ "re_log", "re_log_types", "thiserror", + "tracing-subscriber", ] [[package]] diff --git a/crates/re_query/Cargo.toml b/crates/re_query/Cargo.toml index 8a70f7c9dbad..41dd31f7a9df 100644 --- a/crates/re_query/Cargo.toml +++ b/crates/re_query/Cargo.toml @@ -34,3 +34,16 @@ polars-core = { workspace = true, features = [ "dtype-struct", ] } polars-lazy = { workspace = true } + +[dev-dependencies] +criterion = "0.4" +itertools = "0.10" +mimalloc = "0.1" +tracing-subscriber = "0.3" + +[lib] +bench = false + +[[bench]] +name = "obj_query_benchmark" +harness = false diff --git a/crates/re_query/benches/obj_query_benchmark.rs b/crates/re_query/benches/obj_query_benchmark.rs new file mode 100644 index 000000000000..763a9944ab7d --- /dev/null +++ b/crates/re_query/benches/obj_query_benchmark.rs @@ -0,0 +1,150 @@ +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +use criterion::{criterion_group, criterion_main, Criterion}; + +use itertools::Itertools; +use re_arrow_store::{DataStore, TimeQuery, TimelineQuery}; +use re_log_types::{ + datagen::{build_frame_nr, build_some_colors, build_some_point2d}, + field_types::{ColorRGBA, Instance, Point2D}, + msg_bundle::{try_build_msg_bundle2, Component, MsgBundle}, + obj_path, Index, MsgId, ObjPath, ObjPathComp, TimeType, Timeline, +}; +use re_query::{query_entity_with_primary, visit_components3}; + +// --- + +#[cfg(not(debug_assertions))] +const NUM_FRAMES: u32 = 100; +#[cfg(not(debug_assertions))] +const NUM_POINTS: u32 = 100; + +// `cargo test` also runs the benchmark setup code, so make sure they run quickly: +#[cfg(debug_assertions)] +const NUM_FRAMES: u32 = 1; +#[cfg(debug_assertions)] +const NUM_POINTS: u32 = 1; + +// --- Benchmarks --- + +fn obj_mono_points(c: &mut Criterion) { + { + // Each mono point gets logged at a different path + let paths = (0..NUM_POINTS) + .into_iter() + .map(move |point_idx| obj_path!("points", Index::Sequence(point_idx as _))) + .collect_vec(); + let msgs = build_messages(&paths, 1); + + { + let mut group = c.benchmark_group("arrow_mono_points"); + group.throughput(criterion::Throughput::Elements( + (NUM_POINTS * NUM_FRAMES) as _, + )); + group.bench_function("insert", |b| { + b.iter(|| insert_messages(msgs.iter())); + }); + } + + { + let mut group = c.benchmark_group("arrow_mono_points"); + group.throughput(criterion::Throughput::Elements(NUM_POINTS as _)); + let mut store = insert_messages(msgs.iter()); + group.bench_function("query", |b| { + b.iter(|| query_and_visit(&mut store, &paths)); + }); + } + } +} + +fn obj_batch_points(c: &mut Criterion) { + { + // Batch points are logged together at a single path + let paths = [ObjPath::from("points")]; + let msgs = build_messages(&paths, NUM_POINTS as _); + + { + let mut group = c.benchmark_group("arrow_batch_points"); + group.throughput(criterion::Throughput::Elements( + (NUM_POINTS * NUM_FRAMES) as _, + )); + group.bench_function("insert", |b| { + b.iter(|| insert_messages(msgs.iter())); + }); + } + + { + let mut group = c.benchmark_group("arrow_batch_points"); + group.throughput(criterion::Throughput::Elements(NUM_POINTS as _)); + let mut store = insert_messages(msgs.iter()); + group.bench_function("query", |b| { + b.iter(|| query_and_visit(&mut store, &paths)); + }); + } + } +} + +criterion_group!(benches, obj_mono_points, obj_batch_points); +criterion_main!(benches); + +// --- Helpers --- + +fn build_messages(paths: &[ObjPath], pts: usize) -> Vec { + (0..NUM_FRAMES) + .into_iter() + .flat_map(move |frame_idx| { + paths.iter().map(move |path| { + try_build_msg_bundle2( + MsgId::ZERO, + path.clone(), + [build_frame_nr(frame_idx as _)], + (build_some_point2d(pts), build_some_colors(pts)), + ) + .unwrap() + }) + }) + .collect() +} + +fn insert_messages<'a>(msgs: impl Iterator) -> DataStore { + let mut store = DataStore::default(); + msgs.for_each(|msg_bundle| store.insert(msg_bundle).unwrap()); + store +} + +struct Point { + _pos: Point2D, + _color: Option, +} + +fn query_and_visit(store: &mut DataStore, paths: &[ObjPath]) -> Vec { + let time_query = TimeQuery::LatestAt((NUM_FRAMES as i64) / 2); + let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence); + let timeline_query = TimelineQuery::new(timeline_frame_nr, time_query); + + let mut points = Vec::with_capacity(NUM_POINTS as _); + + // TODO(jleibs): Add Radius once we have support for it in field_types + for path in paths.iter() { + if let Ok(df) = query_entity_with_primary( + store, + &timeline_query, + path, + Point2D::NAME, + &[ColorRGBA::NAME], + ) { + visit_components3( + &df, + |pos: &Point2D, _instance: Option<&Instance>, color: Option<&ColorRGBA>| { + points.push(Point { + _pos: pos.clone(), + _color: color.cloned(), + }); + }, + ); + }; + } + assert_eq!(NUM_POINTS as usize, points.len()); + points +}