chore: Single-shot benchmarking + continuous benchmarking (#183)

Setups `iai-callgrind`-based benchmarks for single-shot benchmarking (by using `callgrind` internally). Cleans up the benchmark cases and adds some structure so they can be executed both with `iai-callgrind` and `criterion`, depending on whether we want to measure instruction count or time. They can be called separatedly, ```bash # Single-shot, requires some extra setup cargo bench --bench iai_benches # Time-based, takes longer to run cargo bench --bench criterion_benches ``` See DEVELOPMENT.md for instructions. --- The instruction count benchmarks are now uploaded to [bencher.dev](https://bencher.dev/perf/portgraph), so we get an historical comparison of the performance and a CI check can alert about regressions. I believe the "No thresholds found" error in this PR will go away once this gets run in `main`. The service choice was mainly between bencher.dev and codspeed.io . I choose the former since it supports single-shot benchmarks natively. See this issue in `ratatui` where the Bencher maintainer discusses some differences, [ratatui/ratatui#1092](https://www.github.com/ratatui/ratatui/issues/1092#issuecomment-2415565274).
CQCL · Feb 3, 2025 · 03368af · 03368af
1 parent cb11a97
commit 03368af
Show file tree

Hide file tree

Showing 17 changed files with 781 additions and 309 deletions.
diff --git a/.github/workflows/archive-bencher.yml b/.github/workflows/archive-bencher.yml
@@ -0,0 +1,20 @@
+name: Archive Bencher.dev PR benchmarks
+on:
+  pull_request:
+    types:
+      - closed
+
+jobs:
+  archive_pr_branch:
+    name: Archive closed PR branch with Bencher
+    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: bencherdev/bencher@main
+      - name: Archive closed PR branch with Bencher
+        run: |
+          bencher archive \
+          --project portgraph \
+          --token '${{ secrets.BENCHER_API_TOKEN }}' \
+          --branch "$GITHUB_HEAD_REF"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -54,20 +54,69 @@ jobs:
         run: cargo miri test
 
   benches:
+    name: continuous Benchmarking
     # Not required, we can ignore it for the merge queue check.
     if: github.event_name != 'merge_group'
     runs-on: ubuntu-latest
+    permissions:
+      checks: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Install stable toolchain
         uses: dtolnay/rust-toolchain@stable
       - uses: Swatinem/rust-cache@v2
         with:
           prefix-key: v0
-      - name: Build benchmarks with no features
-        run: cargo bench --verbose --no-run --no-default-features
-      - name: Build benchmarks with all features
-        run: cargo bench --verbose --no-run --all-features
+
+      # The installed iai-callgrind-runner version must match the
+      # version of iai-callgrind in the Cargo.toml
+      - uses: cargo-bins/cargo-binstall@main
+      - name: Install iai-callgrind-runner
+        run: |
+          version=$(cargo metadata --format-version=1 |\
+            jq '.packages[] | select(.name == "iai-callgrind").version' |\
+            tr -d '"'
+          )
+          cargo binstall --no-confirm iai-callgrind-runner --version $version --force
+
+      - uses: bencherdev/bencher@main
+      - name: Install valgrind
+        run: sudo apt update && sudo apt install -y valgrind
+
+      - name: Track base branch IAI benchmarks
+        if: github.event_name == 'push'
+        run: |
+          bencher run \
+          --project portgraph \
+          --token '${{ secrets.BENCHER_API_TOKEN }}' \
+          --branch main \
+          --testbed ubuntu-latest \
+          --threshold-measure instructions \
+          --threshold-test t_test \
+          --threshold-max-sample-size 64 \
+          --threshold-upper-boundary 0.99 \
+          --thresholds-reset \
+          --err \
+          --github-actions '${{ secrets.HUGRBOT_PAT }}' \
+          --adapter rust_iai_callgrind \
+          "cargo bench --bench iai_benches"
+
+      - name: Track PR IAI benchmarks
+        if: github.event_name == 'pull_request'
+        run: |
+          bencher run \
+          --project portgraph \
+          --token '${{ secrets.BENCHER_API_TOKEN }}' \
+          --branch "${{ github.event.pull_request.head.ref }}" \
+          --testbed ubuntu-latest \
+          --start-point "${{ github.event.pull_request.base.ref }}" \
+          --start-point-clone-thresholds \
+          --start-point-reset \
+          --err \
+          --github-actions '${{ secrets.HUGRBOT_PAT }}' \
+          --adapter rust_iai_callgrind \
+          "cargo bench --bench iai_benches"
+        # --start-point-hash '${{ github.event.pull_request.base.sha }}' \
 
   tests:
     runs-on: ubuntu-latest

diff --git a/Cargo.toml b/Cargo.toml
@@ -38,11 +38,19 @@ petgraph = ["dep:petgraph"]
 
 [dev-dependencies]
 criterion = { version = "0.5.1", features = ["html_reports"] }
+iai-callgrind = "0.14.0"
 rmp-serde = "1.1.1"
 rstest = "0.24.0"
 itertools = "0.14.0"
 insta = "1.39.0"
 
 [[bench]]
-name = "bench_main"
+name = "criterion_benches"
 harness = false
+
+[[bench]]
+name = "iai_benches"
+harness = false
+
+[profile.bench]
+debug = true
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -28,17 +28,62 @@ cargo build
 cargo test
 ```
 
-Run the benchmarks with:
+Finally, if you have rust nightly installed, you can run `miri` to detect
+undefined behaviour in the code.
 
 ```bash
-cargo bench
+cargo +nightly miri test
 ```
 
-Finally, if you have rust nightly installed, you can run `miri` to detect
-undefined behaviour in the code.
+## 🏋️ Benchmarking
+
+We use two kinds of benchmarks in this project:
+
+- A wall-clock time benchmark using `criterion`. This measures the time taken to
+  run a function by running it multiple times.
+- A single-shot instruction count / memory hits benchmark using `iai-callgrind`.
+  This measures the number of instructions executed and the number of cache hits
+  and misses.
+
+Both tools run the same set of test cases.
+
+When profiling and debugging performance issues, you may also want to use
+[samply](https://github.com/mstange/samply) to visualize the see flame graphs of
+specific examples.
+
+### Wall-clock time benchmarks
+
+This is the simplest kind of benchmark. To run the, use:
 
 ```bash
-cargo +nightly miri test
+cargo bench --bench criterion_benches
+```
+
+### Single-shot benchmarking
+
+These benchmarks are useful when running in noisy environments, in addition to
+being faster than criterion. We run these on CI to track historical performance
+in [bencher.dev](https://bencher.dev/perf/portgraph).
+
+To run these, you must have [`valgrind`](https://valgrind.org/) installed.
+Support for Apple Silicon (M1/M2/...) macs is
+[experimental](https://github.com/LouisBrunner/valgrind-macos/issues/56), so you
+will need to manually clone and compile the branch. See
+[`LouisBrunner/valgrind-macos`](https://github.com/LouisBrunner/valgrind-macos/blob/feature/m1/README)
+for instructions.
+
+In addition to `valgrind`, you will need to install `iai-callgrind` runner. The
+pre-build binaries are available on
+[`cargo binstall`](https://github.com/cargo-bins/cargo-binstall).
+
+```bash
+cargo binstall iai-callgrind-runner
+```
+
+The benchmarks can then be run with:
+
+```bash
+cargo bench --bench iai_benches
 ```
 
 ## 💅 Coding Style

diff --git a/README.md b/README.md
@@ -5,6 +5,7 @@ portgraph
 [![crates][]](https://crates.io/crates/portgraph)
 [![msrv][]](https://github.com/CQCL/portgraph)
 [![codecov][]](https://codecov.io/gh/CQCL/portgraph)
+[![bencher][]](https://bencher.dev/perf/portgraph)
 
 Data structure library for directed graphs with first-level ports. Includes
 secondary data structures for node and port weights, and node hierarchies.
@@ -32,9 +33,10 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for instructions on setting up the developm
 This project is licensed under Apache License, Version 2.0 ([LICENSE][] or http://www.apache.org/licenses/LICENSE-2.0).
 
   [API documentation here]: https://docs.rs/portgraph/
-  [build_status]: https://github.com/CQCL/portgraph/workflows/Continuous%20integration/badge.svg?branch=main
+  [build_status]: https://github.com/CQCL/portgraph/actions/workflows/ci.yml/badge.svg
   [crates]: https://img.shields.io/crates/v/portgraph
   [LICENSE]: LICENCE
   [msrv]: https://img.shields.io/badge/rust-1.75.0%2B-blue.svg?maxAge=3600
   [codecov]: https://img.shields.io/codecov/c/gh/CQCL/portgraph?logo=codecov
+  [bencher]: https://img.shields.io/badge/bencher-.dev-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAAtFBMVEVHcEwEBAT+/v4AAADi4uKenp7u7/ABAQEAAAAAAAAAAAAwMDAAAAAAAAAAAACGh4e2tLJfX18AAAAAAAD///++wMEAAABoaGiRkZEAAABucXMAAAAAAAB1dXUxMTHMzMzh4eH/0rRJSUmsrKz29vb////+q3v8bQD9+PP/8+kZGhp9fn/8dAD/3cXk4OT/tIL9p2z8ZwD+vI+UlJTPmnn/5tT/xqH/2cD9k1D9kkL8ijj8fhszkVn/AAAAJXRSTlMA4fgz7ePq/o3zRv0hfcfb7dJoVubyDvvpqfjTt+Dl+tTz3uPjxce2cgAAAaJJREFUOI2lklmTqjAQhcMSOpFFQHB3nJl7O2wii46z/f//daOOJYkvU3XPA6Q4X53udEPIbxWv1+x2ZsF6ovsLd05n/PJ5wmd07jLVj7mDaFKwCPE4NREdrmbwEOu+MOerKQnmZtHXGBpDfwp28dV9YgKWxf/iZ/dV2OV0CLjO227X1cgnE451t9u9Oa43jChtCWQ9vsbxK/aZBOxS6cFI848sE7jx/Q2KLPvIU6UHwiJszsCYsfEZaDBS72nxJO/uQJcn3FIHEVBsuxvQtUgDbZIj1ym+BUZBEKH4Lhx3pM+aUzy1GBlGhO0JqaH7ZARy2Dc58BBAGDzfgWdguu+7gwAZoW/T4jYqsrUiK4qa1DanpQw4VObVM6uDjCiHy4pnJqYA1RWoAFI0Z/HwChW+HwEOV+AAcHzHiilA3TbH8FY/PDZtrQD+8kWIppBecXk0Qrws/QHglfZJ/oeIuRC5fNX9SW2SGBTPRo77/c9hrG7D4uml+l7qctNQ/x9GsEnuU0qWj9vyFvwp3NpS2z9PsPB0/1zGDziUJfCV/7js/9A/nA48HxVN/KwAAAAASUVORK5CYII=
   [CHANGELOG]: CHANGELOG.md
diff --git a/benches/bench_main.rs b/benches/bench_main.rs
diff --git a/benches/benchmarks/convex.rs b/benches/benchmarks/convex.rs
@@ -1,68 +1,106 @@
-use criterion::{black_box, criterion_group, AxisScale, BenchmarkId, Criterion, PlotConfiguration};
+use criterion::{criterion_group, Criterion};
 use itertools::Itertools;
 use portgraph::{algorithms::TopoConvexChecker, PortView};
+use portgraph::{NodeIndex, PortGraph};
 
-use super::generators::make_two_track_dag;
-
-fn bench_convex_construction(c: &mut Criterion) {
-    let mut g = c.benchmark_group("initialize convex checker object");
-    g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
-
-    for size in [100, 1_000, 10_000] {
-        g.bench_with_input(
-            BenchmarkId::new("initalize_convexity", size),
-            &size,
-            |b, size| {
-                let graph = make_two_track_dag(*size);
-                b.iter(|| black_box(TopoConvexChecker::new(&graph)))
-            },
-        );
+use crate::helpers::*;
+
+// -----------------------------------------------------------------------------
+// Benchmark functions
+// -----------------------------------------------------------------------------
+
+struct ConvexConstruction {
+    graph: PortGraph,
+}
+impl SizedBenchmark for ConvexConstruction {
+    fn name() -> &'static str {
+        "initialize_convexity"
+    }
+
+    fn setup(size: usize) -> Self {
+        let graph = make_two_track_dag(size);
+        Self { graph }
+    }
+
+    fn run(&self) -> impl Sized {
+        TopoConvexChecker::new(&self.graph)
     }
-    g.finish();
 }
 
 /// We benchmark the worst case scenario, where the "subgraph" is the
 /// entire graph itself.
-fn bench_convex_full(c: &mut Criterion) {
-    let mut g = c.benchmark_group("Runtime convexity check. Full graph.");
-    g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+struct ConvexFull {
+    checker: TopoConvexChecker<PortGraph>,
+    nodes: Vec<NodeIndex>,
+}
+impl SizedBenchmark for ConvexFull {
+    fn name() -> &'static str {
+        "check_convexity_full"
+    }
 
-    for size in [100, 1_000, 10_000] {
+    fn setup(size: usize) -> Self {
         let graph = make_two_track_dag(size);
-        let checker = TopoConvexChecker::new(&graph);
-        g.bench_with_input(
-            BenchmarkId::new("check_convexity_full", size),
-            &size,
-            |b, _size| b.iter(|| black_box(checker.is_node_convex(graph.nodes_iter()))),
-        );
+        let nodes = graph.nodes_iter().collect_vec();
+        let checker = TopoConvexChecker::new(graph);
+        Self { checker, nodes }
+    }
+
+    fn run(&self) -> impl Sized {
+        self.checker.is_node_convex(self.nodes.iter().copied())
     }
-    g.finish();
 }
 
 /// We benchmark the an scenario where the size of the "subgraph" is sub-linear on the size of the graph.
-fn bench_convex_sparse(c: &mut Criterion) {
-    let mut g = c.benchmark_group("Runtime convexity check. Sparse subgraph on an n^2 size graph.");
-    g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
-
-    for size in [100usize, 1_000, 5_000] {
-        let graph_size = size.pow(2);
-        let graph = make_two_track_dag(graph_size);
-        let checker = TopoConvexChecker::new(&graph);
-        let nodes = graph.nodes_iter().step_by(graph_size / size).collect_vec();
-        g.bench_with_input(
-            BenchmarkId::new("check_convexity_sparse", size),
-            &size,
-            |b, _size| b.iter(|| black_box(checker.is_node_convex(nodes.iter().copied()))),
-        );
+struct ConvexSparse {
+    checker: TopoConvexChecker<PortGraph>,
+    nodes: Vec<NodeIndex>,
+}
+impl SizedBenchmark for ConvexSparse {
+    fn name() -> &'static str {
+        "check_convexity_sparse"
+    }
+
+    fn setup(size: usize) -> Self {
+        let graph = make_two_track_dag(size);
+        let subgraph_size = (size as f64).sqrt().floor() as usize;
+        let nodes = graph
+            .nodes_iter()
+            .step_by(size / subgraph_size)
+            .collect_vec();
+        let checker = TopoConvexChecker::new(graph);
+        Self { checker, nodes }
+    }
+
+    fn run(&self) -> impl Sized {
+        self.checker.is_node_convex(self.nodes.iter().copied())
     }
-    g.finish();
 }
 
+// -----------------------------------------------------------------------------
+// iai_callgrind definitions
+// -----------------------------------------------------------------------------
+
+sized_iai_benchmark!(callgrind_convex_construction, ConvexConstruction);
+sized_iai_benchmark!(callgrind_convex_full, ConvexFull);
+sized_iai_benchmark!(callgrind_convex_sparse, ConvexSparse);
+
+iai_callgrind::library_benchmark_group!(
+    name = callgrind_group;
+    benchmarks =
+        callgrind_convex_construction,
+        callgrind_convex_full,
+        callgrind_convex_sparse,
+);
+
+// -----------------------------------------------------------------------------
+// Criterion definitions
+// -----------------------------------------------------------------------------
+
 criterion_group! {
-    name = benches;
+    name = criterion_group;
     config = Criterion::default();
     targets =
-        bench_convex_full,
-        bench_convex_sparse,
-        bench_convex_construction
+        ConvexConstruction::criterion,
+        ConvexFull::criterion,
+        ConvexSparse::criterion,
 }