Create matching benchmarks for the arrow datastore (#582)

* Create obj_query_benchmark for re_query * Add bench for batch points * Add `bench = false` to re_query/Cargo.toml * Add re_query to list of benchmarked crates Co-authored-by: John Hughes <john@rerun.io>
rerun-io · Dec 18, 2022 · 1786633 · 1786633 · github-actions · Dec 18, 2022
1 parent d2cf10b
commit 1786633
Show file tree

Hide file tree

Showing 4 changed files with 167 additions and 0 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -52,6 +52,7 @@ jobs:
           cargo bench \
             -p re_arrow_store \
             -p re_data_store \
+            -p re_query \
             -p re_tuid \
             -- --output-format=bencher | tee output.txt
 

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/re_query/Cargo.toml b/crates/re_query/Cargo.toml
@@ -34,3 +34,16 @@ polars-core = { workspace = true, features = [
   "dtype-struct",
 ] }
 polars-lazy = { workspace = true }
+
+[dev-dependencies]
+criterion = "0.4"
+itertools = "0.10"
+mimalloc = "0.1"
+tracing-subscriber = "0.3"
+
+[lib]
+bench = false
+
+[[bench]]
+name = "obj_query_benchmark"
+harness = false
diff --git a/crates/re_query/benches/obj_query_benchmark.rs b/crates/re_query/benches/obj_query_benchmark.rs
@@ -0,0 +1,150 @@
+#[global_allocator]
+static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use itertools::Itertools;
+use re_arrow_store::{DataStore, TimeQuery, TimelineQuery};
+use re_log_types::{
+    datagen::{build_frame_nr, build_some_colors, build_some_point2d},
+    field_types::{ColorRGBA, Instance, Point2D},
+    msg_bundle::{try_build_msg_bundle2, Component, MsgBundle},
+    obj_path, Index, MsgId, ObjPath, ObjPathComp, TimeType, Timeline,
+};
+use re_query::{query_entity_with_primary, visit_components3};
+
+// ---
+
+#[cfg(not(debug_assertions))]
+const NUM_FRAMES: u32 = 100;
+#[cfg(not(debug_assertions))]
+const NUM_POINTS: u32 = 100;
+
+// `cargo test` also runs the benchmark setup code, so make sure they run quickly:
+#[cfg(debug_assertions)]
+const NUM_FRAMES: u32 = 1;
+#[cfg(debug_assertions)]
+const NUM_POINTS: u32 = 1;
+
+// --- Benchmarks ---
+
+fn obj_mono_points(c: &mut Criterion) {
+    {
+        // Each mono point gets logged at a different path
+        let paths = (0..NUM_POINTS)
+            .into_iter()
+            .map(move |point_idx| obj_path!("points", Index::Sequence(point_idx as _)))
+            .collect_vec();
+        let msgs = build_messages(&paths, 1);
+
+        {
+            let mut group = c.benchmark_group("arrow_mono_points");
+            group.throughput(criterion::Throughput::Elements(
+                (NUM_POINTS * NUM_FRAMES) as _,
+            ));
+            group.bench_function("insert", |b| {
+                b.iter(|| insert_messages(msgs.iter()));
+            });
+        }
+
+        {
+            let mut group = c.benchmark_group("arrow_mono_points");
+            group.throughput(criterion::Throughput::Elements(NUM_POINTS as _));
+            let mut store = insert_messages(msgs.iter());
+            group.bench_function("query", |b| {
+                b.iter(|| query_and_visit(&mut store, &paths));
+            });
+        }
+    }
+}
+
+fn obj_batch_points(c: &mut Criterion) {
+    {
+        // Batch points are logged together at a single path
+        let paths = [ObjPath::from("points")];
+        let msgs = build_messages(&paths, NUM_POINTS as _);
+
+        {
+            let mut group = c.benchmark_group("arrow_batch_points");
+            group.throughput(criterion::Throughput::Elements(
+                (NUM_POINTS * NUM_FRAMES) as _,
+            ));
+            group.bench_function("insert", |b| {
+                b.iter(|| insert_messages(msgs.iter()));
+            });
+        }
+
+        {
+            let mut group = c.benchmark_group("arrow_batch_points");
+            group.throughput(criterion::Throughput::Elements(NUM_POINTS as _));
+            let mut store = insert_messages(msgs.iter());
+            group.bench_function("query", |b| {
+                b.iter(|| query_and_visit(&mut store, &paths));
+            });
+        }
+    }
+}
+
+criterion_group!(benches, obj_mono_points, obj_batch_points);
+criterion_main!(benches);
+
+// --- Helpers ---
+
+fn build_messages(paths: &[ObjPath], pts: usize) -> Vec<MsgBundle> {
+    (0..NUM_FRAMES)
+        .into_iter()
+        .flat_map(move |frame_idx| {
+            paths.iter().map(move |path| {
+                try_build_msg_bundle2(
+                    MsgId::ZERO,
+                    path.clone(),
+                    [build_frame_nr(frame_idx as _)],
+                    (build_some_point2d(pts), build_some_colors(pts)),
+                )
+                .unwrap()
+            })
+        })
+        .collect()
+}
+
+fn insert_messages<'a>(msgs: impl Iterator<Item = &'a MsgBundle>) -> DataStore {
+    let mut store = DataStore::default();
+    msgs.for_each(|msg_bundle| store.insert(msg_bundle).unwrap());
+    store
+}
+
+struct Point {
+    _pos: Point2D,
+    _color: Option<ColorRGBA>,
+}
+
+fn query_and_visit(store: &mut DataStore, paths: &[ObjPath]) -> Vec<Point> {
+    let time_query = TimeQuery::LatestAt((NUM_FRAMES as i64) / 2);
+    let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence);
+    let timeline_query = TimelineQuery::new(timeline_frame_nr, time_query);
+
+    let mut points = Vec::with_capacity(NUM_POINTS as _);
+
+    // TODO(jleibs): Add Radius once we have support for it in field_types
+    for path in paths.iter() {
+        if let Ok(df) = query_entity_with_primary(
+            store,
+            &timeline_query,
+            path,
+            Point2D::NAME,
+            &[ColorRGBA::NAME],
+        ) {
+            visit_components3(
+                &df,
+                |pos: &Point2D, _instance: Option<&Instance>, color: Option<&ColorRGBA>| {
+                    points.push(Point {
+                        _pos: pos.clone(),
+                        _color: color.cloned(),
+                    });
+                },
+            );
+        };
+    }
+    assert_eq!(NUM_POINTS as usize, points.len());
+    points
+}
Benchmark suite	Current: `1786633`	Previous: `e81937a`	Ratio
`datastore/batch/rects/insert`	`1681240` ns/iter (`± 9677`)	`1713541` ns/iter (`± 40868`)	`0.98`
`datastore/batch/rects/query`	`1374` ns/iter (`± 1`)	`1398` ns/iter (`± 7`)	`0.98`
`obj_mono_points/insert`	`964918508` ns/iter (`± 5783867`)	`866847444` ns/iter (`± 4808204`)	`1.11`
`obj_mono_points/query`	`339432` ns/iter (`± 2007`)	`328600` ns/iter (`± 1416`)	`1.03`
`obj_batch_points/insert`	`99228765` ns/iter (`± 513823`)	`90872184` ns/iter (`± 444529`)	`1.09`
`obj_batch_points/query`	`11212` ns/iter (`± 29`)	`11179` ns/iter (`± 52`)	`1.00`
`obj_batch_points_sequential/insert`	`23190247` ns/iter (`± 311693`)	`22590384` ns/iter (`± 297256`)	`1.03`
`obj_batch_points_sequential/query`	`5760` ns/iter (`± 12`)	`5731` ns/iter (`± 33`)	`1.01`
`arrow_mono_points/insert`	`287414576` ns/iter (`± 521604`)	`284741134` ns/iter (`± 1004541`)	`1.01`
`arrow_mono_points/query`	`61832450` ns/iter (`± 930251`)	`61234420` ns/iter (`± 1810774`)	`1.01`
`arrow_batch_points/insert`	`966896` ns/iter (`± 6516`)	`974860` ns/iter (`± 3083`)	`0.99`
`arrow_batch_points/query`	`634483` ns/iter (`± 12586`)	`625322` ns/iter (`± 21919`)	`1.01`
`obj_batch_points_sequential/Tuid::random`	`38` ns/iter (`± 0`)	`37` ns/iter (`± 0`)	`1.03`