From 9d7305ee7d8386066c518673321a03489cd06e15 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Sat, 26 Jun 2021 20:40:32 +1000 Subject: [PATCH] Add perf event support (#315) * Add perfmon to Stats * Configure perf events with MMTk options * Include perf_counter feature in CI --- .github/scripts/ci-build.sh | 1 + .github/scripts/ci-common.sh | 2 +- .github/scripts/ci-style.sh | 3 ++ .github/scripts/ci-test.sh | 1 + Cargo.toml | 7 ++- src/scheduler/mod.rs | 2 +- src/scheduler/scheduler.rs | 1 + src/scheduler/stat.rs | 61 ++++++++++++++++++++++---- src/scheduler/work.rs | 2 +- src/scheduler/work_counter.rs | 81 +++++++++++++++++++++++++++++++++++ src/scheduler/worker.rs | 2 +- src/util/options.rs | 73 ++++++++++++++++++++++++++----- src/util/statistics/stats.rs | 13 ++++++ 13 files changed, 225 insertions(+), 24 deletions(-) diff --git a/.github/scripts/ci-build.sh b/.github/scripts/ci-build.sh index 092480d954..4e69251937 100755 --- a/.github/scripts/ci-build.sh +++ b/.github/scripts/ci-build.sh @@ -16,4 +16,5 @@ if [[ $arch == "x86_64" && $os == "linux" ]]; then cargo build --target i686-unknown-linux-gnu for_all_features "cargo build --target i686-unknown-linux-gnu" for_all_features "cargo build --release --target i686-unknown-linux-gnu" + cargo build --features perf_counter fi \ No newline at end of file diff --git a/.github/scripts/ci-common.sh b/.github/scripts/ci-common.sh index a443e465c0..271b67b9ce 100644 --- a/.github/scripts/ci-common.sh +++ b/.github/scripts/ci-common.sh @@ -40,7 +40,7 @@ init_non_exclusive_features() { while IFS= read -r line; do # Only parse non mutally exclusive features - if [[ $line == *"-- Non mutally exclusive features --"* ]]; then + if [[ $line == *"-- Non mutually exclusive features --"* ]]; then parse_features=true continue fi diff --git a/.github/scripts/ci-style.sh b/.github/scripts/ci-style.sh index 9715e8d1b2..545bd414b6 100755 --- a/.github/scripts/ci-style.sh +++ b/.github/scripts/ci-style.sh @@ -17,6 +17,9 @@ cargo clippy --manifest-path=vmbindings/dummyvm/Cargo.toml if [[ $arch == "x86_64" && $os == "linux" ]]; then for_all_features "cargo clippy --target i686-unknown-linux-gnu" for_all_features "cargo clippy --release --target i686-unknown-linux-gnu" + cargo clippy --features perf_counter + cargo clippy --release --features perf_counter + cargo clippy --tests --features perf_counter fi # check format diff --git a/.github/scripts/ci-test.sh b/.github/scripts/ci-test.sh index 1c701028be..90a7589ade 100755 --- a/.github/scripts/ci-test.sh +++ b/.github/scripts/ci-test.sh @@ -5,6 +5,7 @@ for_all_features "cargo test" # For x86_64-linux, also check for i686 if [[ $arch == "x86_64" && $os == "linux" ]]; then for_all_features "cargo test --target i686-unknown-linux-gnu" + cargo test --features perf_counter fi python examples/build.py diff --git a/Cargo.toml b/Cargo.toml index 33f0f676f1..7c673586c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ atomic-traits = "0.2.0" atomic = "0.4.6" spin = "0.5.2" env_logger = "0.8.2" +pfm = {version = "0.0.7", optional = true} [dev-dependencies] crossbeam = "0.7.3" @@ -41,11 +42,15 @@ rand = "0.7.3" [features] default = [] +# This feature is only supported on x86-64 for now +# It's manually added to CI scripts +perf_counter = ["pfm"] + # .github/scripts/ci-common.sh extracts features from the following part (including from comments). # So be careful when editing or adding stuff to the section below. # Do not modify the following line - ci-common.sh matches it -# -- Non mutally exclusive features -- +# -- Non mutually exclusive features -- # spaces vm_space = [] diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 9fd45e30cc..f659a2c070 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -15,7 +15,7 @@ pub(crate) use scheduler::MMTkScheduler; pub(self) use scheduler::Scheduler; mod stat; -mod work_counter; +pub(self) mod work_counter; mod work; pub use work::CoordinatorWork; diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index c461fc8f69..6ff6736b52 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -410,6 +410,7 @@ mod tests { // println!("Original: {:?}", data); SCHEDULER.initialize(NUM_WORKERS, &(), VMThread::UNINITIALIZED); + SCHEDULER.enable_stat(); SCHEDULER.work_buckets[WorkBucketStage::Unconstrained] .add(Sort(unsafe { &mut *(data as *mut _) })); SCHEDULER.wait_for_completion(); diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index 2cc21da41f..259a9b24c8 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -1,7 +1,13 @@ //! Statistics for work packets use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration}; +#[cfg(feature = "perf_counter")] +use crate::scheduler::work_counter::WorkPerfEvent; +use crate::scheduler::Context; +use crate::vm::VMBinding; +use crate::MMTK; use std::any::TypeId; use std::collections::HashMap; +use std::marker::PhantomData; use std::sync::atomic::{AtomicBool, Ordering}; /// Merge and print the work-packet level statistics from all worker threads @@ -99,7 +105,7 @@ impl SchedulerStat { stat } /// Merge work counters from different worker threads - pub fn merge(&mut self, stat: &WorkerLocalStat) { + pub fn merge(&mut self, stat: &WorkerLocalStat) { // Merge work packet type ID to work packet name mapping for (id, name) in &stat.work_id_name_map { self.work_id_name_map.insert(*id, *name); @@ -144,7 +150,7 @@ impl WorkStat { /// Stop all work counters for the work packet type of the just executed /// work packet #[inline(always)] - pub fn end_of_work(&self, worker_stat: &mut WorkerLocalStat) { + pub fn end_of_work(&self, worker_stat: &mut WorkerLocalStat) { if !worker_stat.is_enabled() { return; }; @@ -165,15 +171,27 @@ impl WorkStat { } /// Worker thread local counterpart of [`SchedulerStat`] -#[derive(Default)] -pub struct WorkerLocalStat { +pub struct WorkerLocalStat { work_id_name_map: HashMap, work_counts: HashMap, work_counters: HashMap>>, enabled: AtomicBool, + _phantom: PhantomData, +} + +impl Default for WorkerLocalStat { + fn default() -> Self { + WorkerLocalStat { + work_id_name_map: Default::default(), + work_counts: Default::default(), + work_counters: Default::default(), + enabled: AtomicBool::new(false), + _phantom: Default::default(), + } + } } -impl WorkerLocalStat { +impl WorkerLocalStat { #[inline] pub fn is_enabled(&self) -> bool { self.enabled.load(Ordering::SeqCst) @@ -185,7 +203,12 @@ impl WorkerLocalStat { /// Measure the execution of a work packet by starting all counters for that /// type #[inline] - pub fn measure_work(&mut self, work_id: TypeId, work_name: &'static str) -> WorkStat { + pub fn measure_work( + &mut self, + work_id: TypeId, + work_name: &'static str, + context: &'static C, + ) -> WorkStat { let stat = WorkStat { type_id: work_id, type_name: work_name, @@ -193,15 +216,35 @@ impl WorkerLocalStat { if self.is_enabled() { self.work_counters .entry(work_id) - .or_insert_with(WorkerLocalStat::counter_set) + .or_insert_with(|| C::counter_set(context)) .iter_mut() .for_each(|c| c.start()); } stat } +} - // The set of work counters for all work packet types - fn counter_set() -> Vec> { +/// Private trait to let different contexts supply different sets of default +/// counters +trait HasCounterSet { + fn counter_set(context: &'static Self) -> Vec>; +} + +impl HasCounterSet for C { + default fn counter_set(_context: &'static Self) -> Vec> { vec![Box::new(WorkDuration::new())] } } + +/// Specialization for MMTk to read the options +#[allow(unused_variables, unused_mut)] +impl HasCounterSet for MMTK { + fn counter_set(mmtk: &'static Self) -> Vec> { + let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; + #[cfg(feature = "perf_counter")] + for e in &mmtk.options.perf_events.events { + counters.push(box WorkPerfEvent::new(&e.0, e.1, e.2)); + } + counters + } +} diff --git a/src/scheduler/work.rs b/src/scheduler/work.rs index 61ba34bf68..14e69e3b84 100644 --- a/src/scheduler/work.rs +++ b/src/scheduler/work.rs @@ -10,7 +10,7 @@ pub trait Work: 'static + Send { fn do_work_with_stat(&mut self, worker: &mut Worker, context: &'static C) { let stat = worker .stat - .measure_work(TypeId::of::(), type_name::()); + .measure_work(TypeId::of::(), type_name::(), context); self.do_work(worker, context); stat.end_of_work(&mut worker.stat); } diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index eea7c08b1b..c0bfb00d55 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -133,3 +133,84 @@ impl WorkCounter for WorkDuration { &mut self.base } } + +#[cfg(feature = "perf_counter")] +mod perf_event { + //! Measure the perf events of work packets + //! + //! This is built on top of libpfm4. + //! The events to measure are parsed from MMTk option `perf_events` + use super::*; + use libc::{c_int, pid_t}; + use pfm::PerfEvent; + use std::fmt; + + /// Work counter for perf events + #[derive(Clone)] + pub struct WorkPerfEvent { + base: WorkCounterBase, + running: bool, + event_name: String, + pe: PerfEvent, + } + + impl WorkPerfEvent { + /// Create a work counter + /// + /// See `perf_event_open` for more details on `pid` and `cpu` + /// Examples: + /// 0, -1 measures the calling thread on all CPUs + /// -1, 0 measures all threads on CPU 0 + /// -1, -1 is invalid + pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent { + let mut pe = PerfEvent::new(name) + .unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); + pe.open(pid, cpu) + .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); + WorkPerfEvent { + base: Default::default(), + running: false, + event_name: name.to_string(), + pe, + } + } + } + + impl fmt::Debug for WorkPerfEvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("WorkPerfEvent") + .field("base", &self.base) + .field("running", &self.running) + .field("event_name", &self.event_name) + .finish() + } + } + + impl WorkCounter for WorkPerfEvent { + fn start(&mut self) { + self.running = true; + self.pe.reset(); + self.pe.enable(); + } + fn stop(&mut self) { + self.running = true; + let perf_event_value = self.pe.read().unwrap(); + self.base.merge_val(perf_event_value.value as f64); + // assert not multiplexing + assert_eq!(perf_event_value.time_enabled, perf_event_value.time_running); + self.pe.disable(); + } + fn name(&self) -> String { + self.event_name.to_owned() + } + fn get_base(&self) -> &WorkCounterBase { + &self.base + } + fn get_base_mut(&mut self) -> &mut WorkCounterBase { + &mut self.base + } + } +} + +#[cfg(feature = "perf_counter")] +pub(super) use perf_event::WorkPerfEvent; diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index d9ab24edc2..0ac96d3c95 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -52,7 +52,7 @@ pub struct Worker { local: WorkerLocalPtr, pub local_work_bucket: WorkBucket, pub sender: Sender>, - pub stat: WorkerLocalStat, + pub stat: WorkerLocalStat, context: Option<&'static C>, is_coordinator: bool, local_work_buffer: Vec<(WorkBucketStage, Box>)>, diff --git a/src/util/options.rs b/src/util/options.rs index cd1f5dd5c8..e51237ae49 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -2,6 +2,7 @@ use crate::util::constants::DEFAULT_STRESS_FACTOR; use std::cell::UnsafeCell; use std::default::Default; use std::ops::Deref; +use std::str::FromStr; custom_derive! { #[derive(Copy, Clone, EnumFromStr, Debug)] @@ -23,6 +24,50 @@ custom_derive! { } } +/// MMTk option for perf events +/// +/// The format is +/// ``` +/// ::= "," "," +/// ::= ";" | | "" +/// ``` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PerfEventOptions { + pub events: Vec<(String, i32, i32)>, +} + +impl PerfEventOptions { + fn parse_perf_events(events: &str) -> Result, String> { + events + .split(';') + .filter(|e| !e.is_empty()) + .map(|e| { + let e: Vec<&str> = e.split(',').into_iter().collect(); + if e.len() != 3 { + Err("Please supply (event name, pid, cpu)".into()) + } else { + let event_name = e[0].into(); + let pid = e[1] + .parse() + .map_err(|_| String::from("Failed to parse cpu"))?; + let cpu = e[2] + .parse() + .map_err(|_| String::from("Failed to parse cpu"))?; + Ok((event_name, pid, cpu)) + } + }) + .collect() + } +} + +impl FromStr for PerfEventOptions { + type Err = String; + + fn from_str(s: &str) -> Result { + PerfEventOptions::parse_perf_events(s).map(|events| PerfEventOptions { events }) + } +} + pub struct UnsafeOptionsWrapper(UnsafeCell); // TODO: We should carefully examine the unsync with UnsafeCell. We should be able to provide a safe implementation. @@ -49,6 +94,7 @@ impl Deref for UnsafeOptionsWrapper { fn always_valid(_: &T) -> bool { true } + macro_rules! options { ($($name:ident: $type:ty[$validator:expr] = $default:expr),*,) => [ options!($($name: $type[$validator] = $default),*); @@ -139,9 +185,10 @@ options! { // FIXME: This value is set for JikesRVM. We need a proper way to set options. // We need to set these values programmatically in VM specific code. vm_space_size: usize [|v: &usize| *v > 0] = 0x7cc_cccc, - // An example string option. Can be deleted when we have other string options. - // Make sure to include the string option tests in the unit tests. - example_string_option: String [|v: &str| v.starts_with("hello") ] = "hello world".to_string(), + // Perf events to measure + // Semicolons are used to separate events + // Each event is in the format of event_name,pid,cpu (see man perf_event_open for what pid and cpu mean) + perf_events: PerfEventOptions [always_valid] = PerfEventOptions {events: vec![]} } impl Options { @@ -175,6 +222,7 @@ impl Options { #[cfg(test)] mod tests { + use super::*; use crate::util::constants::DEFAULT_STRESS_FACTOR; use crate::util::options::Options; use crate::util::test_util::{serial_test, with_cleanup}; @@ -264,7 +312,7 @@ mod tests { fn test_str_option_default() { serial_test(|| { let options = Options::default(); - assert_eq!(&options.example_string_option as &str, "hello world"); + assert_eq!(&options.perf_events, &PerfEventOptions { events: vec![] }); }) } @@ -273,13 +321,18 @@ mod tests { serial_test(|| { with_cleanup( || { - std::env::set_var("MMTK_EXAMPLE_STRING_OPTION", "hello string"); + std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES,0,-1"); let options = Options::default(); - assert_eq!(&options.example_string_option as &str, "hello string"); + assert_eq!( + &options.perf_events, + &PerfEventOptions { + events: vec![("PERF_COUNT_HW_CPU_CYCLES".into(), 0, -1)] + } + ); }, || { - std::env::remove_var("MMTK_EXAMPLE_STRING_OPTION"); + std::env::remove_var("MMTK_PERF_EVENTS"); }, ) }) @@ -291,14 +344,14 @@ mod tests { with_cleanup( || { // The option needs to start with "hello", otherwise it is invalid. - std::env::set_var("MMTK_EXAMPLE_STRING_OPTION", "abc"); + std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES"); let options = Options::default(); // invalid value from env var, use default. - assert_eq!(&options.example_string_option as &str, "hello world"); + assert_eq!(&options.perf_events, &PerfEventOptions { events: vec![] }); }, || { - std::env::remove_var("MMTK_EXAMPLE_STRING_OPTION"); + std::env::remove_var("MMTK_PERF_EVENTS"); }, ) }) diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index b0e5099616..5c271bb3df 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -2,6 +2,9 @@ use crate::mmtk::MMTK; use crate::util::statistics::counter::*; use crate::util::statistics::Timer; use crate::vm::VMBinding; + +#[cfg(feature = "perf_counter")] +use pfm::Perfmon; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; @@ -37,6 +40,10 @@ impl SharedStats { pub struct Stats { gc_count: AtomicUsize, total_time: Arc>, + // crate `pfm` uses libpfm4 under the hood for parsing perf event names + // Initialization of libpfm4 is required before we can use `PerfEvent` types + #[cfg(feature = "perf_counter")] + perfmon: Perfmon, pub shared: Arc, counters: Mutex>>>, @@ -58,6 +65,12 @@ impl Stats { Stats { gc_count: AtomicUsize::new(0), total_time: t.clone(), + #[cfg(feature = "perf_counter")] + perfmon: { + let mut perfmon: Perfmon = Default::default(); + perfmon.initialize().expect("Perfmon failed to initialize"); + perfmon + }, shared, counters: Mutex::new(vec![t]),