Add subsystems regression tests to CI (paritytech#3527)

Fixes paritytech#3530
dharjeezy · Mar 24, 2024 · 0e50785 · 0e50785
1 parent d6bb27f
commit 0e50785
Show file tree

Hide file tree

Showing 6 changed files with 134 additions and 99 deletions.
diff --git a/.gitlab/pipeline/test.yml b/.gitlab/pipeline/test.yml
@@ -494,3 +494,15 @@ test-syscalls:
       printf "The x86_64 syscalls used by the worker binaries have changed. Please review if this is expected and update polkadot/scripts/list-syscalls/*-worker-syscalls as needed.\n";
       fi
   allow_failure: false # this rarely triggers in practice
+
+subsystem-regression-tests:
+  stage: test
+  extends:
+    - .docker-env
+    - .common-refs
+    - .run-immediately
+  script:
+    - cargo test --profile=testnet -p polkadot-availability-recovery --test availability-recovery-regression-bench --features subsystem-benchmarks
+  tags:
+    - benchmark
+  allow_failure: true
diff --git a/...ode/network/availability-distribution/tests/availability-distribution-regression-bench.rs b/...ode/network/availability-distribution/tests/availability-distribution-regression-bench.rs
@@ -26,13 +26,9 @@
 use polkadot_subsystem_bench::{
 	availability::{benchmark_availability_write, prepare_test, TestDataAvailability, TestState},
 	configuration::TestConfiguration,
-	usage::BenchmarkUsage,
+	utils::{warm_up_and_benchmark, WarmUpOptions},
 };
 
-const BENCH_COUNT: usize = 3;
-const WARM_UP_COUNT: usize = 30;
-const WARM_UP_PRECISION: f64 = 0.01;
-
 fn main() -> Result<(), String> {
 	let mut messages = vec![];
 	let mut config = TestConfiguration::default();
@@ -41,17 +37,33 @@ fn main() -> Result<(), String> {
 	config.num_blocks = 3;
 	config.generate_pov_sizes();
 
-	warm_up(config.clone())?;
-	let usage = benchmark(config.clone());
+	let usage = warm_up_and_benchmark(
+		WarmUpOptions::new(&[
+			"availability-distribution",
+			"bitfield-distribution",
+			"availability-store",
+		]),
+		|| {
+			let mut state = TestState::new(&config);
+			let (mut env, _protocol_config) =
+				prepare_test(config.clone(), &mut state, TestDataAvailability::Write, false);
+			env.runtime().block_on(benchmark_availability_write(
+				"data_availability_write",
+				&mut env,
+				state,
+			))
+		},
+	)?;
+	println!("{}", usage);
 
 	messages.extend(usage.check_network_usage(&[
-		("Received from peers", 4330.0, 0.05),
-		("Sent to peers", 15900.0, 0.05),
+		("Received from peers", 443.333, 0.05),
+		("Sent to peers", 21818.555, 0.05),
 	]));
 	messages.extend(usage.check_cpu_usage(&[
-		("availability-distribution", 0.025, 0.05),
-		("bitfield-distribution", 0.085, 0.05),
-		("availability-store", 0.180, 0.05),
+		("availability-distribution", 0.011, 0.05),
+		("bitfield-distribution", 0.029, 0.05),
+		("availability-store", 0.232, 0.05),
 	]));
 
 	if messages.is_empty() {
@@ -61,44 +73,3 @@ fn main() -> Result<(), String> {
 		Err("Regressions found".to_string())
 	}
 }
-
-fn warm_up(config: TestConfiguration) -> Result<(), String> {
-	println!("Warming up...");
-	let mut prev_run: Option<BenchmarkUsage> = None;
-	for _ in 0..WARM_UP_COUNT {
-		let curr = run(config.clone());
-		if let Some(ref prev) = prev_run {
-			let av_distr_diff =
-				curr.cpu_usage_diff(prev, "availability-distribution").expect("Must exist");
-			let bitf_distr_diff =
-				curr.cpu_usage_diff(prev, "bitfield-distribution").expect("Must exist");
-			let av_store_diff =
-				curr.cpu_usage_diff(prev, "availability-store").expect("Must exist");
-			if av_distr_diff < WARM_UP_PRECISION &&
-				bitf_distr_diff < WARM_UP_PRECISION &&
-				av_store_diff < WARM_UP_PRECISION
-			{
-				return Ok(())
-			}
-		}
-		prev_run = Some(curr);
-	}
-
-	Err("Can't warm up".to_string())
-}
-
-fn benchmark(config: TestConfiguration) -> BenchmarkUsage {
-	println!("Benchmarking...");
-	let usages: Vec<BenchmarkUsage> = (0..BENCH_COUNT).map(|_| run(config.clone())).collect();
-	let usage = BenchmarkUsage::average(&usages);
-	println!("{}", usage);
-	usage
-}
-
-fn run(config: TestConfiguration) -> BenchmarkUsage {
-	let mut state = TestState::new(&config);
-	let (mut env, _protocol_config) =
-		prepare_test(config.clone(), &mut state, TestDataAvailability::Write, false);
-	env.runtime()
-		.block_on(benchmark_availability_write("data_availability_write", &mut env, state))
-}
diff --git a/polkadot/node/network/availability-recovery/tests/availability-recovery-regression-bench.rs b/polkadot/node/network/availability-recovery/tests/availability-recovery-regression-bench.rs
@@ -27,13 +27,9 @@ use polkadot_subsystem_bench::{
 		TestDataAvailability, TestState,
 	},
 	configuration::TestConfiguration,
-	usage::BenchmarkUsage,
+	utils::{warm_up_and_benchmark, WarmUpOptions},
 };
 
-const BENCH_COUNT: usize = 3;
-const WARM_UP_COUNT: usize = 10;
-const WARM_UP_PRECISION: f64 = 0.01;
-
 fn main() -> Result<(), String> {
 	let mut messages = vec![];
 
@@ -42,14 +38,27 @@ fn main() -> Result<(), String> {
 	config.num_blocks = 3;
 	config.generate_pov_sizes();
 
-	warm_up(config.clone(), options.clone())?;
-	let usage = benchmark(config.clone(), options.clone());
+	let usage = warm_up_and_benchmark(WarmUpOptions::new(&["availability-recovery"]), || {
+		let mut state = TestState::new(&config);
+		let (mut env, _protocol_config) = prepare_test(
+			config.clone(),
+			&mut state,
+			TestDataAvailability::Read(options.clone()),
+			false,
+		);
+		env.runtime().block_on(benchmark_availability_read(
+			"data_availability_read",
+			&mut env,
+			state,
+		))
+	})?;
+	println!("{}", usage);
 
 	messages.extend(usage.check_network_usage(&[
-		("Received from peers", 102400.000, 0.05),
-		("Sent to peers", 0.335, 0.05),
+		("Received from peers", 307200.000, 0.05),
+		("Sent to peers", 1.667, 0.05),
 	]));
-	messages.extend(usage.check_cpu_usage(&[("availability-recovery", 3.850, 0.05)]));
+	messages.extend(usage.check_cpu_usage(&[("availability-recovery", 11.500, 0.05)]));
 
 	if messages.is_empty() {
 		Ok(())
@@ -58,37 +67,3 @@ fn main() -> Result<(), String> {
 		Err("Regressions found".to_string())
 	}
 }
-
-fn warm_up(config: TestConfiguration, options: DataAvailabilityReadOptions) -> Result<(), String> {
-	println!("Warming up...");
-	let mut prev_run: Option<BenchmarkUsage> = None;
-	for _ in 0..WARM_UP_COUNT {
-		let curr = run(config.clone(), options.clone());
-		if let Some(ref prev) = prev_run {
-			let diff = curr.cpu_usage_diff(prev, "availability-recovery").expect("Must exist");
-			if diff < WARM_UP_PRECISION {
-				return Ok(())
-			}
-		}
-		prev_run = Some(curr);
-	}
-
-	Err("Can't warm up".to_string())
-}
-
-fn benchmark(config: TestConfiguration, options: DataAvailabilityReadOptions) -> BenchmarkUsage {
-	println!("Benchmarking...");
-	let usages: Vec<BenchmarkUsage> =
-		(0..BENCH_COUNT).map(|_| run(config.clone(), options.clone())).collect();
-	let usage = BenchmarkUsage::average(&usages);
-	println!("{}", usage);
-	usage
-}
-
-fn run(config: TestConfiguration, options: DataAvailabilityReadOptions) -> BenchmarkUsage {
-	let mut state = TestState::new(&config);
-	let (mut env, _protocol_config) =
-		prepare_test(config.clone(), &mut state, TestDataAvailability::Read(options), false);
-	env.runtime()
-		.block_on(benchmark_availability_read("data_availability_read", &mut env, state))
-}
diff --git a/polkadot/node/subsystem-bench/src/lib/lib.rs b/polkadot/node/subsystem-bench/src/lib/lib.rs
@@ -26,3 +26,4 @@ pub(crate) mod keyring;
 pub(crate) mod mock;
 pub(crate) mod network;
 pub mod usage;
+pub mod utils;
diff --git a/polkadot/node/subsystem-bench/src/lib/usage.rs b/polkadot/node/subsystem-bench/src/lib/usage.rs
@@ -20,7 +20,7 @@ use colored::Colorize;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct BenchmarkUsage {
 	pub benchmark_name: String,
 	pub network_usage: Vec<ResourceUsage>,
@@ -110,7 +110,7 @@ fn check_resource_usage(
 	}
 }
 
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ResourceUsage {
 	pub resource_name: String,
 	pub total: f64,

diff --git a/polkadot/node/subsystem-bench/src/lib/utils.rs b/polkadot/node/subsystem-bench/src/lib/utils.rs
@@ -0,0 +1,76 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// This file is part of Polkadot.
+
+// Polkadot is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Polkadot is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Test utils
+
+use crate::usage::BenchmarkUsage;
+use std::io::{stdout, Write};
+
+pub struct WarmUpOptions<'a> {
+	/// The maximum number of runs considered for marming up.
+	pub warm_up: usize,
+	/// The number of runs considered for benchmarking.
+	pub bench: usize,
+	/// The difference in CPU usage between runs considered as normal
+	pub precision: f64,
+	/// The subsystems whose CPU usage is checked during warm-up cycles
+	pub subsystems: &'a [&'a str],
+}
+
+impl<'a> WarmUpOptions<'a> {
+	pub fn new(subsystems: &'a [&'a str]) -> Self {
+		Self { warm_up: 100, bench: 3, precision: 0.02, subsystems }
+	}
+}
+
+pub fn warm_up_and_benchmark(
+	options: WarmUpOptions,
+	run: impl Fn() -> BenchmarkUsage,
+) -> Result<BenchmarkUsage, String> {
+	println!("Warming up...");
+	let mut usages = Vec::with_capacity(options.bench);
+
+	for n in 1..=options.warm_up {
+		let curr = run();
+		if let Some(prev) = usages.last() {
+			let diffs = options
+				.subsystems
+				.iter()
+				.map(|&v| {
+					curr.cpu_usage_diff(prev, v)
+						.ok_or(format!("{} not found in benchmark {:?}", v, prev))
+				})
+				.collect::<Result<Vec<f64>, String>>()?;
+			if !diffs.iter().all(|&v| v < options.precision) {
+				usages.clear();
+			}
+		}
+		usages.push(curr);
+		print!("\r{}%", n * 100 / options.warm_up);
+		if usages.len() == options.bench {
+			println!("\rTook {} runs to warm up", n.saturating_sub(options.bench));
+			break;
+		}
+		stdout().flush().unwrap();
+	}
+
+	if usages.len() != options.bench {
+		println!("Didn't warm up after {} runs", options.warm_up);
+		return Err("Can't warm up".to_string())
+	}
+
+	Ok(BenchmarkUsage::average(&usages))
+}