From 1786633a88ffba4fd33847b0046b465e4fdf8a85 Mon Sep 17 00:00:00 2001
From: Jeremy Leibs <jeremy@rerun.io>
Date: Sun, 18 Dec 2022 15:28:55 +0100
Subject: [PATCH] Create matching benchmarks for the arrow datastore (#582)

* Create obj_query_benchmark for re_query
* Add bench for batch points
* Add `bench = false` to re_query/Cargo.toml
* Add re_query to list of benchmarked crates

Co-authored-by: John Hughes <john@rerun.io>
---
 .github/workflows/rust.yml                    |   1 +
 Cargo.lock                                    |   3 +
 crates/re_query/Cargo.toml                    |  13 ++
 .../re_query/benches/obj_query_benchmark.rs   | 150 ++++++++++++++++++
 4 files changed, 167 insertions(+)
 create mode 100644 crates/re_query/benches/obj_query_benchmark.rs

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 1253db64bad0..63fc84fbb06e 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -52,6 +52,7 @@ jobs:
           cargo bench \
             -p re_arrow_store \
             -p re_data_store \
+            -p re_query \
             -p re_tuid \
             -- --output-format=bencher | tee output.txt
 
diff --git a/Cargo.lock b/Cargo.lock
index daec2c58ed83..15798d2e3e83 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3730,9 +3730,11 @@ dependencies = [
  "anyhow",
  "arrow2",
  "chrono",
+ "criterion",
  "document-features",
  "indent",
  "itertools",
+ "mimalloc",
  "nohash-hasher",
  "polars-core",
  "polars-lazy",
@@ -3741,6 +3743,7 @@ dependencies = [
  "re_log",
  "re_log_types",
  "thiserror",
+ "tracing-subscriber",
 ]
 
 [[package]]
diff --git a/crates/re_query/Cargo.toml b/crates/re_query/Cargo.toml
index 8a70f7c9dbad..41dd31f7a9df 100644
--- a/crates/re_query/Cargo.toml
+++ b/crates/re_query/Cargo.toml
@@ -34,3 +34,16 @@ polars-core = { workspace = true, features = [
   "dtype-struct",
 ] }
 polars-lazy = { workspace = true }
+
+[dev-dependencies]
+criterion = "0.4"
+itertools = "0.10"
+mimalloc = "0.1"
+tracing-subscriber = "0.3"
+
+[lib]
+bench = false
+
+[[bench]]
+name = "obj_query_benchmark"
+harness = false
diff --git a/crates/re_query/benches/obj_query_benchmark.rs b/crates/re_query/benches/obj_query_benchmark.rs
new file mode 100644
index 000000000000..763a9944ab7d
--- /dev/null
+++ b/crates/re_query/benches/obj_query_benchmark.rs
@@ -0,0 +1,150 @@
+#[global_allocator]
+static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use itertools::Itertools;
+use re_arrow_store::{DataStore, TimeQuery, TimelineQuery};
+use re_log_types::{
+    datagen::{build_frame_nr, build_some_colors, build_some_point2d},
+    field_types::{ColorRGBA, Instance, Point2D},
+    msg_bundle::{try_build_msg_bundle2, Component, MsgBundle},
+    obj_path, Index, MsgId, ObjPath, ObjPathComp, TimeType, Timeline,
+};
+use re_query::{query_entity_with_primary, visit_components3};
+
+// ---
+
+#[cfg(not(debug_assertions))]
+const NUM_FRAMES: u32 = 100;
+#[cfg(not(debug_assertions))]
+const NUM_POINTS: u32 = 100;
+
+// `cargo test` also runs the benchmark setup code, so make sure they run quickly:
+#[cfg(debug_assertions)]
+const NUM_FRAMES: u32 = 1;
+#[cfg(debug_assertions)]
+const NUM_POINTS: u32 = 1;
+
+// --- Benchmarks ---
+
+fn obj_mono_points(c: &mut Criterion) {
+    {
+        // Each mono point gets logged at a different path
+        let paths = (0..NUM_POINTS)
+            .into_iter()
+            .map(move |point_idx| obj_path!("points", Index::Sequence(point_idx as _)))
+            .collect_vec();
+        let msgs = build_messages(&paths, 1);
+
+        {
+            let mut group = c.benchmark_group("arrow_mono_points");
+            group.throughput(criterion::Throughput::Elements(
+                (NUM_POINTS * NUM_FRAMES) as _,
+            ));
+            group.bench_function("insert", |b| {
+                b.iter(|| insert_messages(msgs.iter()));
+            });
+        }
+
+        {
+            let mut group = c.benchmark_group("arrow_mono_points");
+            group.throughput(criterion::Throughput::Elements(NUM_POINTS as _));
+            let mut store = insert_messages(msgs.iter());
+            group.bench_function("query", |b| {
+                b.iter(|| query_and_visit(&mut store, &paths));
+            });
+        }
+    }
+}
+
+fn obj_batch_points(c: &mut Criterion) {
+    {
+        // Batch points are logged together at a single path
+        let paths = [ObjPath::from("points")];
+        let msgs = build_messages(&paths, NUM_POINTS as _);
+
+        {
+            let mut group = c.benchmark_group("arrow_batch_points");
+            group.throughput(criterion::Throughput::Elements(
+                (NUM_POINTS * NUM_FRAMES) as _,
+            ));
+            group.bench_function("insert", |b| {
+                b.iter(|| insert_messages(msgs.iter()));
+            });
+        }
+
+        {
+            let mut group = c.benchmark_group("arrow_batch_points");
+            group.throughput(criterion::Throughput::Elements(NUM_POINTS as _));
+            let mut store = insert_messages(msgs.iter());
+            group.bench_function("query", |b| {
+                b.iter(|| query_and_visit(&mut store, &paths));
+            });
+        }
+    }
+}
+
+criterion_group!(benches, obj_mono_points, obj_batch_points);
+criterion_main!(benches);
+
+// --- Helpers ---
+
+fn build_messages(paths: &[ObjPath], pts: usize) -> Vec<MsgBundle> {
+    (0..NUM_FRAMES)
+        .into_iter()
+        .flat_map(move |frame_idx| {
+            paths.iter().map(move |path| {
+                try_build_msg_bundle2(
+                    MsgId::ZERO,
+                    path.clone(),
+                    [build_frame_nr(frame_idx as _)],
+                    (build_some_point2d(pts), build_some_colors(pts)),
+                )
+                .unwrap()
+            })
+        })
+        .collect()
+}
+
+fn insert_messages<'a>(msgs: impl Iterator<Item = &'a MsgBundle>) -> DataStore {
+    let mut store = DataStore::default();
+    msgs.for_each(|msg_bundle| store.insert(msg_bundle).unwrap());
+    store
+}
+
+struct Point {
+    _pos: Point2D,
+    _color: Option<ColorRGBA>,
+}
+
+fn query_and_visit(store: &mut DataStore, paths: &[ObjPath]) -> Vec<Point> {
+    let time_query = TimeQuery::LatestAt((NUM_FRAMES as i64) / 2);
+    let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence);
+    let timeline_query = TimelineQuery::new(timeline_frame_nr, time_query);
+
+    let mut points = Vec::with_capacity(NUM_POINTS as _);
+
+    // TODO(jleibs): Add Radius once we have support for it in field_types
+    for path in paths.iter() {
+        if let Ok(df) = query_entity_with_primary(
+            store,
+            &timeline_query,
+            path,
+            Point2D::NAME,
+            &[ColorRGBA::NAME],
+        ) {
+            visit_components3(
+                &df,
+                |pos: &Point2D, _instance: Option<&Instance>, color: Option<&ColorRGBA>| {
+                    points.push(Point {
+                        _pos: pos.clone(),
+                        _color: color.cloned(),
+                    });
+                },
+            );
+        };
+    }
+    assert_eq!(NUM_POINTS as usize, points.len());
+    points
+}