From ce0c3f97a81a36e6ca61ed4d68ed00541a529248 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 14:15:50 +1000 Subject: [PATCH 01/28] Add perfmon to Stats --- Cargo.toml | 1 + src/util/statistics/stats.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index ef8792aed1..e4fdcc2bb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ atomic-traits = "0.2.0" atomic = "0.4.6" spin = "0.5.2" env_logger = "0.8.2" +pfm = "0.0.3" [dev-dependencies] crossbeam = "0.7.3" diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 360551fea9..4011e1ebf6 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -6,6 +6,7 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use std::sync::Mutex; +use pfm::Perfmon; pub const MAX_PHASES: usize = 1 << 12; pub const MAX_COUNTERS: usize = 100; @@ -37,6 +38,7 @@ impl SharedStats { pub struct Stats { gc_count: AtomicUsize, total_time: Arc>, + perfmon: Perfmon, pub shared: Arc, counters: Mutex>>>, @@ -55,9 +57,12 @@ impl Stats { true, false, ))); + let mut perfmon: Perfmon = Default::default(); + perfmon.initialize().expect("Perfmon failed to initialize"); Stats { gc_count: AtomicUsize::new(0), total_time: t.clone(), + perfmon, shared, counters: Mutex::new(vec![t]), From 5f1fca90ed48cd4257ec39d3e1e6f300443e3663 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 19:02:01 +1000 Subject: [PATCH 02/28] Refactor worker stat type --- Cargo.toml | 2 +- src/scheduler/stat.rs | 189 +++++++++++++++++++++++------------------- 2 files changed, 104 insertions(+), 87 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e4fdcc2bb8..25f72548a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ atomic-traits = "0.2.0" atomic = "0.4.6" spin = "0.5.2" env_logger = "0.8.2" -pfm = "0.0.3" +pfm = "0.0.5" [dev-dependencies] crossbeam = "0.7.3" diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index 9b5a8ca7ac..b5ff6b397d 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -1,56 +1,88 @@ use std::any::TypeId; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, Ordering}; -use std::time::{Duration, SystemTime}; +use std::time::SystemTime; #[derive(Default)] pub struct SchedulerStat { work_id_name_map: HashMap, work_counts: HashMap, - work_durations: HashMap>, + work_durations: HashMap>, } -impl SchedulerStat { - /// Extract the work-packet name from the full type name. - /// i.e. simplifies `crate::scheduler::gc_work::SomeWorkPacket` to `SomeWorkPacket`. - fn work_name(&self, name: &str) -> String { - let end_index = name.find('<').unwrap_or_else(|| name.len()); - let name = name[..end_index].to_owned(); - match name.rfind(':') { - Some(start_index) => name[(start_index + 1)..end_index].to_owned(), - _ => name, +trait SimpleCounter { + // TODO: consolidate with crate::util::statistics::counter::Counter; + fn start(&mut self); + fn stop(&mut self); +} + +#[derive(Copy, Clone)] +struct WorkDuration { + total: f64, + min: f64, + max: f64, + start_value: Option, + running: bool, +} + +impl WorkDuration { + fn new() -> Self { + WorkDuration { + total: 0.0, + min: f64::INFINITY, + max: f64::NEG_INFINITY, + start_value: None, + running: false, } } - fn geomean(&self, values: &[f64]) -> f64 { - // Geomean(xs, N=xs.len()) = (PI(xs))^(1/N) = e^{log{PI(xs)^(1/N)}} = e^{ (1/N) * sum_{x \in xs}{ log(x) } } - let logs = values.iter().map(|v| v.ln()); - let sum_logs = logs.sum::(); - (sum_logs / values.len() as f64).exp() + fn process_duration(&mut self, duration: f64) { + self.min = self.min.min(duration); + self.max = self.max.max(duration); + self.total = self.total + duration; } - fn min(&self, values: &[f64]) -> f64 { - let mut min = values[0]; - for v in values { - if *v < min { - min = *v - } + fn merge_duration(&self, other: &Self) -> Self { + let min = self.min.min(other.min); + let max = self.max.max(other.max); + let total = self.total + other.total; + WorkDuration { + total, + min, + max, + start_value: None, + running: false, } - min } - fn max(&self, values: &[f64]) -> f64 { - let mut max = values[0]; - for v in values { - if *v > max { - max = *v - } - } - max + fn merge_duration_inplace(&mut self, other: &Self) { + self.min = self.min.min(other.min); + self.max = self.max.max(other.max); + self.total = self.total + other.total; + } +} +impl SimpleCounter for WorkDuration { + fn start(&mut self) { + self.start_value = Some(SystemTime::now()); + self.running = true; } - fn sum(&self, values: &[f64]) -> f64 { - values.iter().sum() + fn stop(&mut self) { + let duration = self.start_value.unwrap().elapsed().unwrap().as_nanos() as f64; + self.process_duration(duration); + } +} + +impl SchedulerStat { + /// Extract the work-packet name from the full type name. + /// i.e. simplifies `crate::scheduler::gc_work::SomeWorkPacket` to `SomeWorkPacket`. + fn work_name(&self, name: &str) -> String { + let end_index = name.find('<').unwrap_or_else(|| name.len()); + let name = name[..end_index].to_owned(); + match name.rfind(':') { + Some(start_index) => name[(start_index + 1)..end_index].to_owned(), + _ => name, + } } pub fn harness_stat(&self) -> HashMap { @@ -67,52 +99,40 @@ impl SchedulerStat { } stat.insert("total-work.count".to_owned(), format!("{}", total_count)); // Work execution times - let mut total_durations = vec![]; + let mut duration_overall = WorkDuration::new(); for (t, durations) in &self.work_durations { - for d in durations { - total_durations.push(*d); - } let n = self.work_id_name_map[t]; - let geomean = self.geomean( - &durations - .iter() - .map(|d| d.as_nanos() as f64) - .collect::>(), - ); - stat.insert( - format!("work.{}.time.geomean", self.work_name(n)), - format!("{:.2}", geomean), - ); - let sum = self.sum( - &durations - .iter() - .map(|d| d.as_nanos() as f64) - .collect::>(), - ); - stat.insert( - format!("work.{}.time.sum", self.work_name(n)), - format!("{:.2}", sum), - ); - } - let durations = total_durations - .iter() - .map(|d| d.as_nanos() as f64) - .collect::>(); - if !durations.is_empty() { + let fold = durations + .iter() + .fold(WorkDuration::new(), |acc, x| acc.merge_duration(x)); + duration_overall.merge_duration_inplace(&fold); stat.insert( - "total-work.time.geomean".to_owned(), - format!("{:.2}", self.geomean(&durations)), + format!("work.{}.time.total", self.work_name(n)), + format!("{:.2}", fold.total), ); stat.insert( - "total-work.time.min".to_owned(), - format!("{:.2}", self.min(&durations)), + format!("work.{}.time.min", self.work_name(n)), + format!("{:.2}", fold.min), ); stat.insert( - "total-work.time.max".to_owned(), - format!("{:.2}", self.max(&durations)), + format!("work.{}.time.max", self.work_name(n)), + format!("{:.2}", fold.max), ); } + stat.insert( + "total-work.time.total".to_owned(), + format!("{:.2}", duration_overall.total), + ); + stat.insert( + "total-work.time.min".to_owned(), + format!("{:.2}", duration_overall.min), + ); + stat.insert( + "total-work.time.max".to_owned(), + format!("{:.2}", duration_overall.max), + ); + stat } @@ -127,15 +147,11 @@ impl SchedulerStat { self.work_counts.insert(*id, *count); } } - for (id, durations) in &stat.work_durations { - if self.work_durations.contains_key(id) { - let work_durations = self.work_durations.get_mut(id).unwrap(); - for d in durations { - work_durations.push(*d); - } - } else { - self.work_durations.insert(*id, durations.clone()); - } + for (id, duration) in &stat.work_durations { + self.work_durations + .entry(*id) + .and_modify(|v| v.push(*duration)) + .or_insert(vec![*duration]); } } } @@ -143,7 +159,6 @@ impl SchedulerStat { pub struct WorkStat { type_id: TypeId, type_name: &'static str, - start_time: SystemTime, } impl WorkStat { @@ -156,12 +171,10 @@ impl WorkStat { .work_id_name_map .insert(self.type_id, self.type_name); *worker_stat.work_counts.entry(self.type_id).or_insert(0) += 1; - let duration = self.start_time.elapsed().unwrap(); worker_stat .work_durations .entry(self.type_id) - .or_insert_with(Vec::new) - .push(duration); + .and_modify(|v| v.stop()); } } @@ -169,7 +182,7 @@ impl WorkStat { pub struct WorkerLocalStat { work_id_name_map: HashMap, work_counts: HashMap, - work_durations: HashMap>, + work_durations: HashMap, enabled: AtomicBool, } @@ -184,10 +197,14 @@ impl WorkerLocalStat { } #[inline] pub fn measure_work(&mut self, work_id: TypeId, work_name: &'static str) -> WorkStat { - WorkStat { + let stat = WorkStat { type_id: work_id, type_name: work_name, - start_time: SystemTime::now(), - } + }; + self.work_durations + .entry(work_id) + .or_insert(WorkDuration::new()) + .start(); + stat } } From 86ae0805fc9390c21620ae5f5e024209e0d20d69 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 19:28:15 +1000 Subject: [PATCH 03/28] Revert "Add perfmon to Stats" This reverts commit ce0c3f97a81a36e6ca61ed4d68ed00541a529248. --- Cargo.toml | 1 - src/util/statistics/stats.rs | 5 ----- 2 files changed, 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 25f72548a8..ef8792aed1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,6 @@ atomic-traits = "0.2.0" atomic = "0.4.6" spin = "0.5.2" env_logger = "0.8.2" -pfm = "0.0.5" [dev-dependencies] crossbeam = "0.7.3" diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 4011e1ebf6..360551fea9 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -6,7 +6,6 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use std::sync::Mutex; -use pfm::Perfmon; pub const MAX_PHASES: usize = 1 << 12; pub const MAX_COUNTERS: usize = 100; @@ -38,7 +37,6 @@ impl SharedStats { pub struct Stats { gc_count: AtomicUsize, total_time: Arc>, - perfmon: Perfmon, pub shared: Arc, counters: Mutex>>>, @@ -57,12 +55,9 @@ impl Stats { true, false, ))); - let mut perfmon: Perfmon = Default::default(); - perfmon.initialize().expect("Perfmon failed to initialize"); Stats { gc_count: AtomicUsize::new(0), total_time: t.clone(), - perfmon, shared, counters: Mutex::new(vec![t]), From 863af8f9744a46f945be79a4ec8e8b7175ecaa92 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 20:28:36 +1000 Subject: [PATCH 04/28] Implement as trait object --- src/scheduler/stat.rs | 176 ++++++++++++++++++++++++++++-------------- 1 file changed, 116 insertions(+), 60 deletions(-) diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index b5ff6b397d..e96f31718a 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -7,61 +7,90 @@ use std::time::SystemTime; pub struct SchedulerStat { work_id_name_map: HashMap, work_counts: HashMap, - work_durations: HashMap>, -} - -trait SimpleCounter { - // TODO: consolidate with crate::util::statistics::counter::Counter; - fn start(&mut self); - fn stop(&mut self); + work_counters: HashMap>>>, } #[derive(Copy, Clone)] -struct WorkDuration { +struct WorkCounterBase { total: f64, min: f64, max: f64, - start_value: Option, - running: bool, } -impl WorkDuration { - fn new() -> Self { - WorkDuration { +impl Default for WorkCounterBase { + fn default() -> Self { + WorkCounterBase { total: 0.0, min: f64::INFINITY, max: f64::NEG_INFINITY, - start_value: None, - running: false, } } +} - fn process_duration(&mut self, duration: f64) { - self.min = self.min.min(duration); - self.max = self.max.max(duration); - self.total = self.total + duration; - } - - fn merge_duration(&self, other: &Self) -> Self { +impl WorkCounterBase { + fn merge(&self, other: &Self) -> Self { let min = self.min.min(other.min); let max = self.max.max(other.max); let total = self.total + other.total; - WorkDuration { - total, - min, - max, - start_value: None, - running: false, - } + WorkCounterBase { total, min, max } } - fn merge_duration_inplace(&mut self, other: &Self) { + fn merge_inplace(&mut self, other: &Self) { self.min = self.min.min(other.min); self.max = self.max.max(other.max); self.total = self.total + other.total; } + + fn merge_val(&mut self, val: f64) { + self.min = self.min.min(val); + self.max = self.max.max(val); + self.total = self.total + val; + } } -impl SimpleCounter for WorkDuration { + +trait WorkCounter: WorkCounterClone { + // TODO: consolidate with crate::util::statistics::counter::Counter; + fn start(&mut self); + fn stop(&mut self); + fn name(&self) -> &'static str; + fn get_base(&self) -> &WorkCounterBase; + fn get_base_mut(&mut self) -> &mut WorkCounterBase; +} + +trait WorkCounterClone { + fn clone_box(&self) -> Box; +} + +impl WorkCounterClone for T { + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } +} + +impl Clone for Box { + fn clone(&self) -> Box { + self.clone_box() + } +} + +#[derive(Copy, Clone)] +struct WorkDuration { + base: WorkCounterBase, + start_value: Option, + running: bool, +} + +impl WorkDuration { + fn new() -> Self { + WorkDuration { + base: Default::default(), + start_value: None, + running: false, + } + } +} + +impl WorkCounter for WorkDuration { fn start(&mut self) { self.start_value = Some(SystemTime::now()); self.running = true; @@ -69,7 +98,19 @@ impl SimpleCounter for WorkDuration { fn stop(&mut self) { let duration = self.start_value.unwrap().elapsed().unwrap().as_nanos() as f64; - self.process_duration(duration); + self.base.merge_val(duration); + } + + fn name(&self) -> &'static str { + "time" + } + + fn get_base(&self) -> &WorkCounterBase { + &self.base + } + + fn get_base_mut(&mut self) -> &mut WorkCounterBase { + &mut self.base } } @@ -99,25 +140,30 @@ impl SchedulerStat { } stat.insert("total-work.count".to_owned(), format!("{}", total_count)); // Work execution times - let mut duration_overall = WorkDuration::new(); - for (t, durations) in &self.work_durations { + let mut duration_overall: WorkCounterBase = Default::default(); + for (t, vs) in &self.work_counters { let n = self.work_id_name_map[t]; - let fold = durations - .iter() - .fold(WorkDuration::new(), |acc, x| acc.merge_duration(x)); - duration_overall.merge_duration_inplace(&fold); - stat.insert( - format!("work.{}.time.total", self.work_name(n)), - format!("{:.2}", fold.total), - ); - stat.insert( - format!("work.{}.time.min", self.work_name(n)), - format!("{:.2}", fold.min), - ); - stat.insert( - format!("work.{}.time.max", self.work_name(n)), - format!("{:.2}", fold.max), - ); + for v in vs.iter() { + let fold = v + .iter() + .fold(Default::default(), |acc: WorkCounterBase, x| { + acc.merge(x.get_base()) + }); + duration_overall.merge_inplace(&fold); + let name = v.first().unwrap().name(); + stat.insert( + format!("work.{}.{}.total", self.work_name(n), name), + format!("{:.2}", fold.total), + ); + stat.insert( + format!("work.{}.{}.min", self.work_name(n), name), + format!("{:.2}", fold.min), + ); + stat.insert( + format!("work.{}.{}.max", self.work_name(n), name), + format!("{:.2}", fold.max), + ); + } } stat.insert( @@ -147,11 +193,14 @@ impl SchedulerStat { self.work_counts.insert(*id, *count); } } - for (id, duration) in &stat.work_durations { - self.work_durations + for (id, counters) in &stat.work_counters { + let vs = self + .work_counters .entry(*id) - .and_modify(|v| v.push(*duration)) - .or_insert(vec![*duration]); + .or_insert(vec![vec![]; counters.len()]); + for (v, c) in vs.iter_mut().zip(counters.iter()) { + v.push(c.clone()); + } } } } @@ -172,9 +221,11 @@ impl WorkStat { .insert(self.type_id, self.type_name); *worker_stat.work_counts.entry(self.type_id).or_insert(0) += 1; worker_stat - .work_durations + .work_counters .entry(self.type_id) - .and_modify(|v| v.stop()); + .and_modify(|v| { + v.iter_mut().for_each(|c| c.stop()); + }); } } @@ -182,7 +233,7 @@ impl WorkStat { pub struct WorkerLocalStat { work_id_name_map: HashMap, work_counts: HashMap, - work_durations: HashMap, + work_counters: HashMap>>, enabled: AtomicBool, } @@ -201,10 +252,15 @@ impl WorkerLocalStat { type_id: work_id, type_name: work_name, }; - self.work_durations + self.work_counters .entry(work_id) - .or_insert(WorkDuration::new()) - .start(); + .or_insert(WorkerLocalStat::counter_set()) + .iter_mut() + .for_each(|c| c.start()); stat } + + fn counter_set() -> Vec> { + vec![Box::new(WorkDuration::new())] + } } From 712b3b0369eda126e90d5edd84750c3b4dc6c978 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 22:26:40 +1000 Subject: [PATCH 05/28] Fix test --- src/scheduler/scheduler.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index c461fc8f69..6ff6736b52 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -410,6 +410,7 @@ mod tests { // println!("Original: {:?}", data); SCHEDULER.initialize(NUM_WORKERS, &(), VMThread::UNINITIALIZED); + SCHEDULER.enable_stat(); SCHEDULER.work_buckets[WorkBucketStage::Unconstrained] .add(Sort(unsafe { &mut *(data as *mut _) })); SCHEDULER.wait_for_completion(); From b00d7a40d2013fba50a7e16831e5d22ee83f5e6c Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 22:30:08 +1000 Subject: [PATCH 06/28] Fix clippy --- src/scheduler/stat.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index e96f31718a..f10521f386 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -38,13 +38,13 @@ impl WorkCounterBase { fn merge_inplace(&mut self, other: &Self) { self.min = self.min.min(other.min); self.max = self.max.max(other.max); - self.total = self.total + other.total; + self.total += other.total; } fn merge_val(&mut self, val: f64) { self.min = self.min.min(val); self.max = self.max.max(val); - self.total = self.total + val; + self.total += val; } } @@ -197,7 +197,7 @@ impl SchedulerStat { let vs = self .work_counters .entry(*id) - .or_insert(vec![vec![]; counters.len()]); + .or_insert_with(|| vec![vec![]; counters.len()]); for (v, c) in vs.iter_mut().zip(counters.iter()) { v.push(c.clone()); } @@ -254,7 +254,7 @@ impl WorkerLocalStat { }; self.work_counters .entry(work_id) - .or_insert(WorkerLocalStat::counter_set()) + .or_insert_with(WorkerLocalStat::counter_set) .iter_mut() .for_each(|c| c.start()); stat From 69c0342fb20d798c2a1462bfbb984b01b18c4146 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 22:57:21 +1000 Subject: [PATCH 07/28] Refactor out work counters --- src/scheduler/mod.rs | 2 + src/scheduler/stat.rs | 106 +--------------------------------- src/scheduler/work_counter.rs | 105 +++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 105 deletions(-) create mode 100644 src/scheduler/work_counter.rs diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 3ae8bb05ce..e7c986339a 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -35,3 +35,5 @@ pub use gc_work::ProcessEdgesWork; // We should do some refactoring related to Scanning::SCAN_MUTATORS_IN_SAFEPOINT // to make sure this type is not exposed to the bindings. pub use gc_work::ScanStackRoot; + +mod work_counter; \ No newline at end of file diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index f10521f386..f1bf6e3c3b 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -1,7 +1,7 @@ +use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration}; use std::any::TypeId; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, Ordering}; -use std::time::SystemTime; #[derive(Default)] pub struct SchedulerStat { @@ -10,110 +10,6 @@ pub struct SchedulerStat { work_counters: HashMap>>>, } -#[derive(Copy, Clone)] -struct WorkCounterBase { - total: f64, - min: f64, - max: f64, -} - -impl Default for WorkCounterBase { - fn default() -> Self { - WorkCounterBase { - total: 0.0, - min: f64::INFINITY, - max: f64::NEG_INFINITY, - } - } -} - -impl WorkCounterBase { - fn merge(&self, other: &Self) -> Self { - let min = self.min.min(other.min); - let max = self.max.max(other.max); - let total = self.total + other.total; - WorkCounterBase { total, min, max } - } - - fn merge_inplace(&mut self, other: &Self) { - self.min = self.min.min(other.min); - self.max = self.max.max(other.max); - self.total += other.total; - } - - fn merge_val(&mut self, val: f64) { - self.min = self.min.min(val); - self.max = self.max.max(val); - self.total += val; - } -} - -trait WorkCounter: WorkCounterClone { - // TODO: consolidate with crate::util::statistics::counter::Counter; - fn start(&mut self); - fn stop(&mut self); - fn name(&self) -> &'static str; - fn get_base(&self) -> &WorkCounterBase; - fn get_base_mut(&mut self) -> &mut WorkCounterBase; -} - -trait WorkCounterClone { - fn clone_box(&self) -> Box; -} - -impl WorkCounterClone for T { - fn clone_box(&self) -> Box { - Box::new(self.clone()) - } -} - -impl Clone for Box { - fn clone(&self) -> Box { - self.clone_box() - } -} - -#[derive(Copy, Clone)] -struct WorkDuration { - base: WorkCounterBase, - start_value: Option, - running: bool, -} - -impl WorkDuration { - fn new() -> Self { - WorkDuration { - base: Default::default(), - start_value: None, - running: false, - } - } -} - -impl WorkCounter for WorkDuration { - fn start(&mut self) { - self.start_value = Some(SystemTime::now()); - self.running = true; - } - - fn stop(&mut self) { - let duration = self.start_value.unwrap().elapsed().unwrap().as_nanos() as f64; - self.base.merge_val(duration); - } - - fn name(&self) -> &'static str { - "time" - } - - fn get_base(&self) -> &WorkCounterBase { - &self.base - } - - fn get_base_mut(&mut self) -> &mut WorkCounterBase { - &mut self.base - } -} - impl SchedulerStat { /// Extract the work-packet name from the full type name. /// i.e. simplifies `crate::scheduler::gc_work::SomeWorkPacket` to `SomeWorkPacket`. diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs new file mode 100644 index 0000000000..42b52c8aba --- /dev/null +++ b/src/scheduler/work_counter.rs @@ -0,0 +1,105 @@ +use std::time::SystemTime; + +#[derive(Copy, Clone)] +pub(super) struct WorkCounterBase { + pub(super) total: f64, + pub(super) min: f64, + pub(super) max: f64, +} + +pub(super) trait WorkCounterClone { + fn clone_box(&self) -> Box; +} + +impl WorkCounterClone for T { + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } +} + +pub(super) trait WorkCounter: WorkCounterClone { + // TODO: consolidate with crate::util::statistics::counter::Counter; + fn start(&mut self); + fn stop(&mut self); + fn name(&self) -> &'static str; + fn get_base(&self) -> &WorkCounterBase; + fn get_base_mut(&mut self) -> &mut WorkCounterBase; +} + +impl Clone for Box { + fn clone(&self) -> Box { + self.clone_box() + } +} + +impl Default for WorkCounterBase { + fn default() -> Self { + WorkCounterBase { + total: 0.0, + min: f64::INFINITY, + max: f64::NEG_INFINITY, + } + } +} + +impl WorkCounterBase { + pub(super) fn merge(&self, other: &Self) -> Self { + let min = self.min.min(other.min); + let max = self.max.max(other.max); + let total = self.total + other.total; + WorkCounterBase { total, min, max } + } + + pub(super) fn merge_inplace(&mut self, other: &Self) { + self.min = self.min.min(other.min); + self.max = self.max.max(other.max); + self.total += other.total; + } + + pub(super) fn merge_val(&mut self, val: f64) { + self.min = self.min.min(val); + self.max = self.max.max(val); + self.total += val; + } +} + +#[derive(Copy, Clone)] +pub(super) struct WorkDuration { + base: WorkCounterBase, + start_value: Option, + running: bool, +} + +impl WorkDuration { + pub(super) fn new() -> Self { + WorkDuration { + base: Default::default(), + start_value: None, + running: false, + } + } +} + +impl WorkCounter for WorkDuration { + fn start(&mut self) { + self.start_value = Some(SystemTime::now()); + self.running = true; + } + + fn stop(&mut self) { + let duration = self.start_value.unwrap().elapsed().unwrap().as_nanos() as f64; + self.base.merge_val(duration); + } + + fn name(&self) -> &'static str { + "time" + } + + fn get_base(&self) -> &WorkCounterBase { + &self.base + } + + fn get_base_mut(&mut self) -> &mut WorkCounterBase { + &mut self.base + } +} From b19368dda41a7e4e729a6e5489e7d511fe3532f2 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 23:08:48 +1000 Subject: [PATCH 08/28] Add perfmon initialization --- Cargo.toml | 1 + src/util/statistics/stats.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index ef8792aed1..25f72548a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ atomic-traits = "0.2.0" atomic = "0.4.6" spin = "0.5.2" env_logger = "0.8.2" +pfm = "0.0.5" [dev-dependencies] crossbeam = "0.7.3" diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 360551fea9..4011e1ebf6 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -6,6 +6,7 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use std::sync::Mutex; +use pfm::Perfmon; pub const MAX_PHASES: usize = 1 << 12; pub const MAX_COUNTERS: usize = 100; @@ -37,6 +38,7 @@ impl SharedStats { pub struct Stats { gc_count: AtomicUsize, total_time: Arc>, + perfmon: Perfmon, pub shared: Arc, counters: Mutex>>>, @@ -55,9 +57,12 @@ impl Stats { true, false, ))); + let mut perfmon: Perfmon = Default::default(); + perfmon.initialize().expect("Perfmon failed to initialize"); Stats { gc_count: AtomicUsize::new(0), total_time: t.clone(), + perfmon, shared, counters: Mutex::new(vec![t]), From 046988a532261795b4accdff96ce0eb46726bb9b Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 23:09:06 +1000 Subject: [PATCH 09/28] Make counter name a String --- src/scheduler/work_counter.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 42b52c8aba..7382ad462e 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -21,7 +21,7 @@ pub(super) trait WorkCounter: WorkCounterClone { // TODO: consolidate with crate::util::statistics::counter::Counter; fn start(&mut self); fn stop(&mut self); - fn name(&self) -> &'static str; + fn name(&self) -> String; fn get_base(&self) -> &WorkCounterBase; fn get_base_mut(&mut self) -> &mut WorkCounterBase; } @@ -91,8 +91,8 @@ impl WorkCounter for WorkDuration { self.base.merge_val(duration); } - fn name(&self) -> &'static str { - "time" + fn name(&self) -> String { + "time".to_owned() } fn get_base(&self) -> &WorkCounterBase { From dbc189300ab90fc8627a69b95b992f047fbd4546 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Tue, 18 May 2021 23:40:28 +1000 Subject: [PATCH 10/28] Implement perf event work counter --- Cargo.toml | 2 +- src/scheduler/stat.rs | 9 +++++-- src/scheduler/work_counter.rs | 45 +++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 25f72548a8..4912c838a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ atomic-traits = "0.2.0" atomic = "0.4.6" spin = "0.5.2" env_logger = "0.8.2" -pfm = "0.0.5" +pfm = "0.0.6" [dev-dependencies] crossbeam = "0.7.3" diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index f1bf6e3c3b..e32c9140f6 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -1,4 +1,4 @@ -use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration}; +use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration, WorkPerfEvent}; use std::any::TypeId; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, Ordering}; @@ -157,6 +157,11 @@ impl WorkerLocalStat { } fn counter_set() -> Vec> { - vec![Box::new(WorkDuration::new())] + let events = "PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS,PERF_COUNT_HW_CACHE_REFERENCES,PERF_COUNT_HW_CACHE_MISSES,PERF_COUNT_HW_CACHE_L1D:MISS,PERF_COUNT_HW_CACHE_L1I:MISS"; + let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; + for event in events.split(",") { + counters.push(Box::new(WorkPerfEvent::new(event))) + } + counters } } diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 7382ad462e..181234039a 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -1,3 +1,4 @@ +use pfm::PerfEvent; use std::time::SystemTime; #[derive(Copy, Clone)] @@ -103,3 +104,47 @@ impl WorkCounter for WorkDuration { &mut self.base } } + +#[derive(Copy, Clone)] +pub(super) struct WorkPerfEvent { + base: WorkCounterBase, + running: bool, + event_name: &'static str, + pe: PerfEvent, +} + +impl WorkPerfEvent { + pub(super) fn new(name: &'static str) -> WorkPerfEvent { + let mut pe = PerfEvent::new(name).expect(&format!("Failed to create perf event {}", name)); + pe.open().expect(&format!("Failed to open perf event {}", name)); + WorkPerfEvent { + base: Default::default(), + running: false, + event_name: name, + pe, + } + } +} + +impl WorkCounter for WorkPerfEvent { + fn start(&mut self) { + self.running = true; + self.pe.reset(); + self.pe.enable(); + } + fn stop(&mut self) { + self.running = true; + let perf_event_value = self.pe.read().unwrap(); + self.base.merge_val(perf_event_value.value as f64); + self.pe.disable(); + } + fn name(&self) -> String { + self.event_name.to_owned() + } + fn get_base(&self) -> &WorkCounterBase { + &self.base + } + fn get_base_mut(&mut self) -> &mut WorkCounterBase { + &mut self.base + } +} From 488391f1b642ddcabaeea06f15494b210db92181 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Wed, 19 May 2021 18:04:12 +1000 Subject: [PATCH 11/28] Don't insert counters when stat is disabled --- src/scheduler/stat.rs | 12 +++++++----- src/scheduler/work_counter.rs | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index e32c9140f6..e7f0690380 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -148,11 +148,13 @@ impl WorkerLocalStat { type_id: work_id, type_name: work_name, }; - self.work_counters - .entry(work_id) - .or_insert_with(WorkerLocalStat::counter_set) - .iter_mut() - .for_each(|c| c.start()); + if self.is_enabled() { + self.work_counters + .entry(work_id) + .or_insert_with(WorkerLocalStat::counter_set) + .iter_mut() + .for_each(|c| c.start()); + } stat } diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 181234039a..1ad51d55d8 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -1,7 +1,7 @@ use pfm::PerfEvent; use std::time::SystemTime; -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(super) struct WorkCounterBase { pub(super) total: f64, pub(super) min: f64, @@ -18,7 +18,7 @@ impl WorkCounterClone for T { } } -pub(super) trait WorkCounter: WorkCounterClone { +pub(super) trait WorkCounter: WorkCounterClone + std::fmt::Debug { // TODO: consolidate with crate::util::statistics::counter::Counter; fn start(&mut self); fn stop(&mut self); @@ -64,7 +64,7 @@ impl WorkCounterBase { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(super) struct WorkDuration { base: WorkCounterBase, start_value: Option, From bb9a51ed67442fe7d65dee534f5c60ed9f53d3ff Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Wed, 19 May 2021 21:13:05 +1000 Subject: [PATCH 12/28] Implement Debug for WorkPerfEvent --- src/scheduler/work_counter.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 1ad51d55d8..85652694b9 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -1,5 +1,6 @@ use pfm::PerfEvent; use std::time::SystemTime; +use std::fmt; #[derive(Copy, Clone, Debug)] pub(super) struct WorkCounterBase { @@ -126,6 +127,16 @@ impl WorkPerfEvent { } } +impl fmt::Debug for WorkPerfEvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("WorkPerfEvent") + .field("base", &self.base) + .field("running", &self.running) + .field("event_name", &self.event_name) + .finish() + } +} + impl WorkCounter for WorkPerfEvent { fn start(&mut self) { self.running = true; From 00f38d0c1bf11818cf740aa554654fc33487e8d5 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Wed, 19 May 2021 21:14:27 +1000 Subject: [PATCH 13/28] Assert perf events are not multiplexing --- src/scheduler/work_counter.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 85652694b9..d24a767f11 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -147,6 +147,8 @@ impl WorkCounter for WorkPerfEvent { self.running = true; let perf_event_value = self.pe.read().unwrap(); self.base.merge_val(perf_event_value.value as f64); + // assert not multiplexing + assert_eq!(perf_event_value.time_enabled, perf_event_value.time_running); self.pe.disable(); } fn name(&self) -> String { From 7cbf3b5addc09ae74aa1fbb04240eb3f9b4739ad Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 28 May 2021 20:45:29 +1000 Subject: [PATCH 14/28] Fix build error --- src/scheduler/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index cc34b97f29..9fd45e30cc 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -36,5 +36,3 @@ pub use gc_work::ProcessEdgesWork; // We should do some refactoring related to Scanning::SCAN_MUTATORS_IN_SAFEPOINT // to make sure this type is not exposed to the bindings. pub use gc_work::ScanStackRoot; - -mod work_counter; \ No newline at end of file From 792397d85fd4878fea8d0c740088d74e7e255bf9 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 11 Jun 2021 15:52:16 +1000 Subject: [PATCH 15/28] Configure perf events with MMTk options --- src/util/options.rs | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/src/util/options.rs b/src/util/options.rs index cd1f5dd5c8..fe1594532e 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -49,6 +49,23 @@ impl Deref for UnsafeOptionsWrapper { fn always_valid(_: &T) -> bool { true } + +fn validate_perf_events(events: &str) -> bool { + for event in events.split(";") { + let e: Vec<&str> = event.split(",").into_iter().collect(); + if e.len() != 3 { + return false; + } + if e[1].parse::().is_err() { + return false; + } + if e[2].parse::().is_err() { + return false; + } + } + true +} + macro_rules! options { ($($name:ident: $type:ty[$validator:expr] = $default:expr),*,) => [ options!($($name: $type[$validator] = $default),*); @@ -139,9 +156,10 @@ options! { // FIXME: This value is set for JikesRVM. We need a proper way to set options. // We need to set these values programmatically in VM specific code. vm_space_size: usize [|v: &usize| *v > 0] = 0x7cc_cccc, - // An example string option. Can be deleted when we have other string options. - // Make sure to include the string option tests in the unit tests. - example_string_option: String [|v: &str| v.starts_with("hello") ] = "hello world".to_string(), + // Perf events to measure + // Semicolons are used to separate events + // Each event is in the format of event_name,pid,cpu (see man perf_event_open for what pid and cpu mean) + perf_events: String [validate_perf_events] = "".to_string(), } impl Options { @@ -264,7 +282,7 @@ mod tests { fn test_str_option_default() { serial_test(|| { let options = Options::default(); - assert_eq!(&options.example_string_option as &str, "hello world"); + assert_eq!(&options.perf_events as &str, ""); }) } @@ -273,13 +291,13 @@ mod tests { serial_test(|| { with_cleanup( || { - std::env::set_var("MMTK_EXAMPLE_STRING_OPTION", "hello string"); + std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES,0,-1"); let options = Options::default(); - assert_eq!(&options.example_string_option as &str, "hello string"); + assert_eq!(&options.perf_events as &str, "PERF_COUNT_HW_CPU_CYCLES,0,-1"); }, || { - std::env::remove_var("MMTK_EXAMPLE_STRING_OPTION"); + std::env::remove_var("MMTK_PERF_EVENTS"); }, ) }) @@ -291,14 +309,14 @@ mod tests { with_cleanup( || { // The option needs to start with "hello", otherwise it is invalid. - std::env::set_var("MMTK_EXAMPLE_STRING_OPTION", "abc"); + std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES"); let options = Options::default(); // invalid value from env var, use default. - assert_eq!(&options.example_string_option as &str, "hello world"); + assert_eq!(&options.perf_events as &str, ""); }, || { - std::env::remove_var("MMTK_EXAMPLE_STRING_OPTION"); + std::env::remove_var("MMTK_PERF_EVENTS"); }, ) }) From ad77838a494f4ba9439a9d5519db7fbd48e22ca5 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Sun, 13 Jun 2021 15:08:44 +1000 Subject: [PATCH 16/28] Use new MMTk option and conditional compilation --- Cargo.toml | 4 +- src/memory_manager.rs | 7 ++ src/scheduler/mod.rs | 4 +- src/scheduler/stat.rs | 17 +++- src/scheduler/work_counter.rs | 147 ++++++++++++++++++++++------------ src/util/options.rs | 22 ++--- src/util/statistics/stats.rs | 13 ++- 7 files changed, 140 insertions(+), 74 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 87cfa86e27..0ddf601ffd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ atomic-traits = "0.2.0" atomic = "0.4.6" spin = "0.5.2" env_logger = "0.8.2" -pfm = "0.0.6" +pfm = {version = "0.0.7", optional = true} [dev-dependencies] crossbeam = "0.7.3" @@ -71,6 +71,8 @@ side_gc_header = [] # To run expensive comprehensive runtime checks, such as checking duplicate edges extreme_assertions = [] +perf = ["pfm"] + # Do not modify the following line - ci-common.sh matches it # -- Mutally exclusive features -- # Only one feature from each group can be provided. Otherwise build will fail. diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 0140482fcc..0a484c90a9 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -169,6 +169,13 @@ pub fn start_worker( /// * `tls`: The thread that wants to enable the collection. This value will be passed back to the VM in /// Collection::spawn_worker_thread() so that the VM knows the context. pub fn enable_collection(mmtk: &'static MMTK, tls: VMThread) { + #[cfg(feature = "perf")] + { + use crate::scheduler::stat::PERF_EVENTS; + use crate::scheduler::work_counter::parse_perf_events; + let mut w = PERF_EVENTS.write().unwrap(); + *w = parse_perf_events(&mmtk.options.perf_events) + } mmtk.scheduler.initialize(mmtk.options.threads, mmtk, tls); VM::VMCollection::spawn_worker_thread(tls, None); // spawn controller thread mmtk.plan.base().initialized.store(true, Ordering::SeqCst); diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 9fd45e30cc..ebefc16bc2 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -14,8 +14,8 @@ pub(crate) use scheduler::CoordinatorMessage; pub(crate) use scheduler::MMTkScheduler; pub(self) use scheduler::Scheduler; -mod stat; -mod work_counter; +pub(crate) mod stat; +pub(crate) mod work_counter; mod work; pub use work::CoordinatorWork; diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index 262e53c4b3..f1b09d423f 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -1,8 +1,13 @@ //! Statistics for work packets -use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration, WorkPerfEvent}; +use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration}; use std::any::TypeId; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::RwLock; + +lazy_static! { + pub static ref PERF_EVENTS: RwLock> = RwLock::new(vec![]); +} /// Merge and print the work-packet level statistics from all worker threads #[derive(Default)] @@ -214,11 +219,15 @@ impl WorkerLocalStat { } // The set of work counters for all work packet types + #[allow(unused_mut)] fn counter_set() -> Vec> { - let events = "PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS,PERF_COUNT_HW_CACHE_REFERENCES,PERF_COUNT_HW_CACHE_MISSES,PERF_COUNT_HW_CACHE_L1D:MISS,PERF_COUNT_HW_CACHE_L1I:MISS"; let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; - for event in events.split(",") { - counters.push(Box::new(WorkPerfEvent::new(event))) + #[cfg(feature = "perf")] + { + use super::work_counter::WorkPerfEvent; + for e in &*PERF_EVENTS.read().unwrap() { + counters.push(box WorkPerfEvent::new(&e.0, e.1, e.2)); + } } counters } diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index de7b44dfbe..2b55bcee45 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -4,9 +4,7 @@ //! work-packet level statistics //! //! See [`crate::util::statistics`] for collecting statistics over a GC cycle -use pfm::PerfEvent; use std::time::SystemTime; -use std::fmt; /// Common struct for different work counters /// @@ -136,58 +134,107 @@ impl WorkCounter for WorkDuration { } } -#[derive(Copy, Clone)] -pub(super) struct WorkPerfEvent { - base: WorkCounterBase, - running: bool, - event_name: &'static str, - pe: PerfEvent, -} - -impl WorkPerfEvent { - pub(super) fn new(name: &'static str) -> WorkPerfEvent { - let mut pe = PerfEvent::new(name).expect(&format!("Failed to create perf event {}", name)); - pe.open().expect(&format!("Failed to open perf event {}", name)); - WorkPerfEvent { - base: Default::default(), - running: false, - event_name: name, - pe, +#[cfg(feature = "perf")] +mod perf_event { + use super::*; + use libc::{c_int, pid_t}; + use pfm::PerfEvent; + use std::fmt; + + pub fn validate_perf_events(events: &str) -> bool { + for event in events.split(";") { + let e: Vec<&str> = event.split(",").into_iter().collect(); + if e.len() != 3 { + return false; + } + if e[1].parse::().is_err() { + return false; + } + if e[2].parse::().is_err() { + return false; + } + } + true + } + + pub fn parse_perf_events(events: &str) -> Vec<(String, i32, i32)> { + events + .split(";") + .map(|e| { + let e: Vec<&str> = e.split(",").into_iter().collect(); + if e.len() != 3 { + panic!("Please supply (event name, pid, cpu)"); + } + // 0, -1 measures the callthing thread on all CPUs + // -1, 0 measures all threads on CPU 0 + // -1, -1 is invalid + ( + e[0].into(), + e[1].parse().expect("Failed to parse pid"), + e[2].parse().expect("Failed to parse cpu"), + ) + }) + .collect() + } + + #[derive(Clone)] + pub struct WorkPerfEvent { + base: WorkCounterBase, + running: bool, + event_name: String, + pe: PerfEvent, + } + + impl WorkPerfEvent { + pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent { + let mut pe = + PerfEvent::new(name).expect(&format!("Failed to create perf event {}", name)); + pe.open(pid, cpu) + .expect(&format!("Failed to open perf event {}", name)); + WorkPerfEvent { + base: Default::default(), + running: false, + event_name: name.to_string(), + pe, + } } } -} -impl fmt::Debug for WorkPerfEvent { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("WorkPerfEvent") - .field("base", &self.base) - .field("running", &self.running) - .field("event_name", &self.event_name) - .finish() + impl fmt::Debug for WorkPerfEvent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("WorkPerfEvent") + .field("base", &self.base) + .field("running", &self.running) + .field("event_name", &self.event_name) + .finish() + } } -} -impl WorkCounter for WorkPerfEvent { - fn start(&mut self) { - self.running = true; - self.pe.reset(); - self.pe.enable(); - } - fn stop(&mut self) { - self.running = true; - let perf_event_value = self.pe.read().unwrap(); - self.base.merge_val(perf_event_value.value as f64); - // assert not multiplexing - assert_eq!(perf_event_value.time_enabled, perf_event_value.time_running); - self.pe.disable(); - } - fn name(&self) -> String { - self.event_name.to_owned() - } - fn get_base(&self) -> &WorkCounterBase { - &self.base - } - fn get_base_mut(&mut self) -> &mut WorkCounterBase { - &mut self.base + impl WorkCounter for WorkPerfEvent { + fn start(&mut self) { + self.running = true; + self.pe.reset(); + self.pe.enable(); + } + fn stop(&mut self) { + self.running = true; + let perf_event_value = self.pe.read().unwrap(); + self.base.merge_val(perf_event_value.value as f64); + // assert not multiplexing + assert_eq!(perf_event_value.time_enabled, perf_event_value.time_running); + self.pe.disable(); + } + fn name(&self) -> String { + self.event_name.to_owned() + } + fn get_base(&self) -> &WorkCounterBase { + &self.base + } + fn get_base_mut(&mut self) -> &mut WorkCounterBase { + &mut self.base + } } } + +#[cfg(feature = "perf")] +pub use perf_event::{parse_perf_events, validate_perf_events, WorkPerfEvent}; diff --git a/src/util/options.rs b/src/util/options.rs index fe1594532e..ca0675fa95 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "perf")] +use crate::scheduler::work_counter::validate_perf_events; use crate::util::constants::DEFAULT_STRESS_FACTOR; use std::cell::UnsafeCell; use std::default::Default; @@ -50,20 +52,9 @@ fn always_valid(_: &T) -> bool { true } +#[cfg(not(feature = "perf"))] fn validate_perf_events(events: &str) -> bool { - for event in events.split(";") { - let e: Vec<&str> = event.split(",").into_iter().collect(); - if e.len() != 3 { - return false; - } - if e[1].parse::().is_err() { - return false; - } - if e[2].parse::().is_err() { - return false; - } - } - true + !(events.len() > 0) } macro_rules! options { @@ -294,7 +285,10 @@ mod tests { std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES,0,-1"); let options = Options::default(); - assert_eq!(&options.perf_events as &str, "PERF_COUNT_HW_CPU_CYCLES,0,-1"); + assert_eq!( + &options.perf_events as &str, + "PERF_COUNT_HW_CPU_CYCLES,0,-1" + ); }, || { std::env::remove_var("MMTK_PERF_EVENTS"); diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index abc335e744..6b46ed53d2 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -2,11 +2,13 @@ use crate::mmtk::MMTK; use crate::util::statistics::counter::*; use crate::util::statistics::Timer; use crate::vm::VMBinding; + +#[cfg(feature = "perf")] +use pfm::Perfmon; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use std::sync::Mutex; -use pfm::Perfmon; pub const MAX_PHASES: usize = 1 << 12; pub const MAX_COUNTERS: usize = 100; @@ -38,6 +40,7 @@ impl SharedStats { pub struct Stats { gc_count: AtomicUsize, total_time: Arc>, + #[cfg(feature = "perf")] perfmon: Perfmon, pub shared: Arc, @@ -57,11 +60,15 @@ impl Stats { true, false, ))); - let mut perfmon: Perfmon = Default::default(); - perfmon.initialize().expect("Perfmon failed to initialize"); + #[cfg(feature = "perf")] + { + let mut perfmon: Perfmon = Default::default(); + perfmon.initialize().expect("Perfmon failed to initialize"); + } Stats { gc_count: AtomicUsize::new(0), total_time: t.clone(), + #[cfg(feature = "perf")] perfmon, shared, From 32e8561eb6ad831a3f16cfbd656143f03f8e2e79 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Sun, 13 Jun 2021 15:17:47 +1000 Subject: [PATCH 17/28] Fix scope issue --- src/scheduler/work_counter.rs | 3 ++- src/util/statistics/stats.rs | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 2b55bcee45..758da7ddef 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -160,12 +160,13 @@ mod perf_event { pub fn parse_perf_events(events: &str) -> Vec<(String, i32, i32)> { events .split(";") + .filter(|e| e.len() > 0) .map(|e| { let e: Vec<&str> = e.split(",").into_iter().collect(); if e.len() != 3 { panic!("Please supply (event name, pid, cpu)"); } - // 0, -1 measures the callthing thread on all CPUs + // 0, -1 measures the calling thread on all CPUs // -1, 0 measures all threads on CPU 0 // -1, -1 is invalid ( diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 6b46ed53d2..682c235c0d 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -61,10 +61,9 @@ impl Stats { false, ))); #[cfg(feature = "perf")] - { - let mut perfmon: Perfmon = Default::default(); - perfmon.initialize().expect("Perfmon failed to initialize"); - } + let mut perfmon: Perfmon = Default::default(); + #[cfg(feature = "perf")] + perfmon.initialize().expect("Perfmon failed to initialize"); Stats { gc_count: AtomicUsize::new(0), total_time: t.clone(), From 397aa20cba929a4b1af12cf41dd46dd7002f2322 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Sun, 13 Jun 2021 15:39:18 +1000 Subject: [PATCH 18/28] Documentation --- Cargo.toml | 6 ++++-- src/memory_manager.rs | 1 + src/scheduler/stat.rs | 2 ++ src/scheduler/work_counter.rs | 20 ++++++++++++++++---- src/util/options.rs | 2 +- src/util/statistics/stats.rs | 3 +++ 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0ddf601ffd..9db43848d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,10 @@ rand = "0.7.3" [features] default = [] +# This feature is only supported on x86-64 for now +# Not included below so that CI won't fail +perf = ["pfm"] + # .github/scripts/ci-common.sh extracts features from the following part (including from comments). # So be careful when editing or adding stuff to the section below. @@ -71,8 +75,6 @@ side_gc_header = [] # To run expensive comprehensive runtime checks, such as checking duplicate edges extreme_assertions = [] -perf = ["pfm"] - # Do not modify the following line - ci-common.sh matches it # -- Mutally exclusive features -- # Only one feature from each group can be provided. Otherwise build will fail. diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 0a484c90a9..6c0c6691c1 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -169,6 +169,7 @@ pub fn start_worker( /// * `tls`: The thread that wants to enable the collection. This value will be passed back to the VM in /// Collection::spawn_worker_thread() so that the VM knows the context. pub fn enable_collection(mmtk: &'static MMTK, tls: VMThread) { + // initialize a list of perf events for work thread initialization #[cfg(feature = "perf")] { use crate::scheduler::stat::PERF_EVENTS; diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index f1b09d423f..8cce33a46d 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -3,8 +3,10 @@ use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration}; use std::any::TypeId; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, Ordering}; +#[cfg(feature = "perf")] use std::sync::RwLock; +#[cfg(feature = "perf")] lazy_static! { pub static ref PERF_EVENTS: RwLock> = RwLock::new(vec![]); } diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 758da7ddef..ca9705d9d2 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -136,13 +136,20 @@ impl WorkCounter for WorkDuration { #[cfg(feature = "perf")] mod perf_event { + //! Measure the perf events of work packets + //! + //! This is built on top of libpfm4. + //! The events to measure are parsed from MMTk option `perf_events` + //! The format is + //! ::= "," "," + //! ::= ";" | | "" use super::*; use libc::{c_int, pid_t}; use pfm::PerfEvent; use std::fmt; pub fn validate_perf_events(events: &str) -> bool { - for event in events.split(";") { + for event in events.split(";").filter(|e| e.len() > 0) { let e: Vec<&str> = event.split(",").into_iter().collect(); if e.len() != 3 { return false; @@ -166,9 +173,6 @@ mod perf_event { if e.len() != 3 { panic!("Please supply (event name, pid, cpu)"); } - // 0, -1 measures the calling thread on all CPUs - // -1, 0 measures all threads on CPU 0 - // -1, -1 is invalid ( e[0].into(), e[1].parse().expect("Failed to parse pid"), @@ -178,6 +182,7 @@ mod perf_event { .collect() } + /// Work counter for perf events #[derive(Clone)] pub struct WorkPerfEvent { base: WorkCounterBase, @@ -187,6 +192,13 @@ mod perf_event { } impl WorkPerfEvent { + /// Create a work counter + /// + /// See `perf_event_open` for more details on `pid` and `cpu` + /// Examples: + /// 0, -1 measures the calling thread on all CPUs + /// -1, 0 measures all threads on CPU 0 + /// -1, -1 is invalid pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent { let mut pe = PerfEvent::new(name).expect(&format!("Failed to create perf event {}", name)); diff --git a/src/util/options.rs b/src/util/options.rs index ca0675fa95..272be8471e 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -54,7 +54,7 @@ fn always_valid(_: &T) -> bool { #[cfg(not(feature = "perf"))] fn validate_perf_events(events: &str) -> bool { - !(events.len() > 0) + !events.is_empty() } macro_rules! options { diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 682c235c0d..329c323e95 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -60,6 +60,9 @@ impl Stats { true, false, ))); + // The following lines are NOT put into a conditionally compiled block + // Otherwise, the perfmon variable will not be accessible to the outer + // scope #[cfg(feature = "perf")] let mut perfmon: Perfmon = Default::default(); #[cfg(feature = "perf")] From 29fd7a78c0739a008899484cc9be7542b692a53d Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Sun, 13 Jun 2021 15:52:49 +1000 Subject: [PATCH 19/28] Fix test error when compiled without perf feature --- src/scheduler/work_counter.rs | 34 +++++++++++++++++----------------- src/util/options.rs | 6 ------ 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index ca9705d9d2..ea36e6a5c4 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -134,6 +134,22 @@ impl WorkCounter for WorkDuration { } } +pub fn validate_perf_events(events: &str) -> bool { + for event in events.split(";").filter(|e| e.len() > 0) { + let e: Vec<&str> = event.split(",").into_iter().collect(); + if e.len() != 3 { + return false; + } + if e[1].parse::().is_err() { + return false; + } + if e[2].parse::().is_err() { + return false; + } + } + true +} + #[cfg(feature = "perf")] mod perf_event { //! Measure the perf events of work packets @@ -148,22 +164,6 @@ mod perf_event { use pfm::PerfEvent; use std::fmt; - pub fn validate_perf_events(events: &str) -> bool { - for event in events.split(";").filter(|e| e.len() > 0) { - let e: Vec<&str> = event.split(",").into_iter().collect(); - if e.len() != 3 { - return false; - } - if e[1].parse::().is_err() { - return false; - } - if e[2].parse::().is_err() { - return false; - } - } - true - } - pub fn parse_perf_events(events: &str) -> Vec<(String, i32, i32)> { events .split(";") @@ -250,4 +250,4 @@ mod perf_event { } #[cfg(feature = "perf")] -pub use perf_event::{parse_perf_events, validate_perf_events, WorkPerfEvent}; +pub use perf_event::{parse_perf_events, WorkPerfEvent}; diff --git a/src/util/options.rs b/src/util/options.rs index 272be8471e..0a56a0eca0 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -1,4 +1,3 @@ -#[cfg(feature = "perf")] use crate::scheduler::work_counter::validate_perf_events; use crate::util::constants::DEFAULT_STRESS_FACTOR; use std::cell::UnsafeCell; @@ -52,11 +51,6 @@ fn always_valid(_: &T) -> bool { true } -#[cfg(not(feature = "perf"))] -fn validate_perf_events(events: &str) -> bool { - !events.is_empty() -} - macro_rules! options { ($($name:ident: $type:ty[$validator:expr] = $default:expr),*,) => [ options!($($name: $type[$validator] = $default),*); From 26b76003dda923c17e80fafbe4631d98fbc94180 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Sun, 13 Jun 2021 16:29:03 +1000 Subject: [PATCH 20/28] Clippy fixes --- src/scheduler/work_counter.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index ea36e6a5c4..541bfb9ebc 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -135,8 +135,8 @@ impl WorkCounter for WorkDuration { } pub fn validate_perf_events(events: &str) -> bool { - for event in events.split(";").filter(|e| e.len() > 0) { - let e: Vec<&str> = event.split(",").into_iter().collect(); + for event in events.split(';').filter(|e| !e.is_empty()) { + let e: Vec<&str> = event.split(',').into_iter().collect(); if e.len() != 3 { return false; } @@ -166,10 +166,10 @@ mod perf_event { pub fn parse_perf_events(events: &str) -> Vec<(String, i32, i32)> { events - .split(";") - .filter(|e| e.len() > 0) + .split(';') + .filter(|e| !e.is_empty()) .map(|e| { - let e: Vec<&str> = e.split(",").into_iter().collect(); + let e: Vec<&str> = e.split(',').into_iter().collect(); if e.len() != 3 { panic!("Please supply (event name, pid, cpu)"); } @@ -201,9 +201,9 @@ mod perf_event { /// -1, -1 is invalid pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent { let mut pe = - PerfEvent::new(name).expect(&format!("Failed to create perf event {}", name)); + PerfEvent::new(name).unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); pe.open(pid, cpu) - .expect(&format!("Failed to open perf event {}", name)); + .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); WorkPerfEvent { base: Default::default(), running: false, From ff9a63673585596a380fe0547135daa023757a19 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Sun, 13 Jun 2021 20:57:56 +1000 Subject: [PATCH 21/28] cargo fmt --- src/scheduler/work_counter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index 541bfb9ebc..c3a6d82e31 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -200,8 +200,8 @@ mod perf_event { /// -1, 0 measures all threads on CPU 0 /// -1, -1 is invalid pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent { - let mut pe = - PerfEvent::new(name).unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); + let mut pe = PerfEvent::new(name) + .unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); pe.open(pid, cpu) .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); WorkPerfEvent { From 6fd31d5c9bb5daf5af58fa114d80243ebb4313b7 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 25 Jun 2021 14:28:27 +1000 Subject: [PATCH 22/28] Get rid of duplicated code --- src/scheduler/stat.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index 8cce33a46d..71f6712909 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -103,19 +103,6 @@ impl SchedulerStat { format!("{:.2}", duration_overall.max), ); - stat.insert( - "total-work.time.total".to_owned(), - format!("{:.2}", duration_overall.total), - ); - stat.insert( - "total-work.time.min".to_owned(), - format!("{:.2}", duration_overall.min), - ); - stat.insert( - "total-work.time.max".to_owned(), - format!("{:.2}", duration_overall.max), - ); - stat } /// Merge work counters from different worker threads From 01019787a527957640d254eef0d55b42d466efe1 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 25 Jun 2021 14:32:26 +1000 Subject: [PATCH 23/28] Fix initialization of Perfmon --- src/util/statistics/stats.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 329c323e95..2df5b7591d 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -40,6 +40,8 @@ impl SharedStats { pub struct Stats { gc_count: AtomicUsize, total_time: Arc>, + // crate `pfm` uses libpfm4 under the hood for parsing perf event names + // Initialization of libpfm4 is required before we can use `PerfEvent` types #[cfg(feature = "perf")] perfmon: Perfmon, @@ -60,18 +62,15 @@ impl Stats { true, false, ))); - // The following lines are NOT put into a conditionally compiled block - // Otherwise, the perfmon variable will not be accessible to the outer - // scope - #[cfg(feature = "perf")] - let mut perfmon: Perfmon = Default::default(); - #[cfg(feature = "perf")] - perfmon.initialize().expect("Perfmon failed to initialize"); Stats { gc_count: AtomicUsize::new(0), total_time: t.clone(), #[cfg(feature = "perf")] - perfmon, + perfmon: { + let mut perfmon: Perfmon = Default::default(); + perfmon.initialize().expect("Perfmon failed to initialize"); + perfmon + }, shared, counters: Mutex::new(vec![t]), From bda23d296a20ea6ccb45f96ab87ba0deb358da0a Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 25 Jun 2021 16:30:43 +1000 Subject: [PATCH 24/28] Fix typo --- .github/scripts/ci-common.sh | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/scripts/ci-common.sh b/.github/scripts/ci-common.sh index a443e465c0..271b67b9ce 100644 --- a/.github/scripts/ci-common.sh +++ b/.github/scripts/ci-common.sh @@ -40,7 +40,7 @@ init_non_exclusive_features() { while IFS= read -r line; do # Only parse non mutally exclusive features - if [[ $line == *"-- Non mutally exclusive features --"* ]]; then + if [[ $line == *"-- Non mutually exclusive features --"* ]]; then parse_features=true continue fi diff --git a/Cargo.toml b/Cargo.toml index 9db43848d7..09642e1ed4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,7 @@ perf = ["pfm"] # So be careful when editing or adding stuff to the section below. # Do not modify the following line - ci-common.sh matches it -# -- Non mutally exclusive features -- +# -- Non mutually exclusive features -- # spaces vm_space = [] From 1cfc308ef9c2ec7932b60300be109a26358787d9 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 25 Jun 2021 16:39:42 +1000 Subject: [PATCH 25/28] Refactor the perf event option into a struct --- Cargo.toml | 2 +- src/memory_manager.rs | 8 ---- src/scheduler/mod.rs | 4 +- src/scheduler/stat.rs | 72 ++++++++++++++++++++++++----------- src/scheduler/work.rs | 2 +- src/scheduler/work_counter.rs | 43 ++------------------- src/scheduler/worker.rs | 2 +- src/util/options.rs | 48 ++++++++++++++++++++++- src/util/statistics/stats.rs | 6 +-- 9 files changed, 106 insertions(+), 81 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 09642e1ed4..7fdd10caa6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ default = [] # This feature is only supported on x86-64 for now # Not included below so that CI won't fail -perf = ["pfm"] +perf_counter = ["pfm"] # .github/scripts/ci-common.sh extracts features from the following part (including from comments). # So be careful when editing or adding stuff to the section below. diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 6c0c6691c1..0140482fcc 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -169,14 +169,6 @@ pub fn start_worker( /// * `tls`: The thread that wants to enable the collection. This value will be passed back to the VM in /// Collection::spawn_worker_thread() so that the VM knows the context. pub fn enable_collection(mmtk: &'static MMTK, tls: VMThread) { - // initialize a list of perf events for work thread initialization - #[cfg(feature = "perf")] - { - use crate::scheduler::stat::PERF_EVENTS; - use crate::scheduler::work_counter::parse_perf_events; - let mut w = PERF_EVENTS.write().unwrap(); - *w = parse_perf_events(&mmtk.options.perf_events) - } mmtk.scheduler.initialize(mmtk.options.threads, mmtk, tls); VM::VMCollection::spawn_worker_thread(tls, None); // spawn controller thread mmtk.plan.base().initialized.store(true, Ordering::SeqCst); diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index ebefc16bc2..f659a2c070 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -14,8 +14,8 @@ pub(crate) use scheduler::CoordinatorMessage; pub(crate) use scheduler::MMTkScheduler; pub(self) use scheduler::Scheduler; -pub(crate) mod stat; -pub(crate) mod work_counter; +mod stat; +pub(self) mod work_counter; mod work; pub use work::CoordinatorWork; diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index 71f6712909..4fb68f7f35 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -1,15 +1,14 @@ //! Statistics for work packets use super::work_counter::{WorkCounter, WorkCounterBase, WorkDuration}; +#[cfg(feature = "perf_counter")] +use crate::scheduler::work_counter::WorkPerfEvent; +use crate::scheduler::Context; +use crate::vm::VMBinding; +use crate::MMTK; use std::any::TypeId; use std::collections::HashMap; +use std::marker::PhantomData; use std::sync::atomic::{AtomicBool, Ordering}; -#[cfg(feature = "perf")] -use std::sync::RwLock; - -#[cfg(feature = "perf")] -lazy_static! { - pub static ref PERF_EVENTS: RwLock> = RwLock::new(vec![]); -} /// Merge and print the work-packet level statistics from all worker threads #[derive(Default)] @@ -106,7 +105,7 @@ impl SchedulerStat { stat } /// Merge work counters from different worker threads - pub fn merge(&mut self, stat: &WorkerLocalStat) { + pub fn merge(&mut self, stat: &WorkerLocalStat) { // Merge work packet type ID to work packet name mapping for (id, name) in &stat.work_id_name_map { self.work_id_name_map.insert(*id, *name); @@ -151,7 +150,7 @@ impl WorkStat { /// Stop all work counters for the work packet type of the just executed /// work packet #[inline(always)] - pub fn end_of_work(&self, worker_stat: &mut WorkerLocalStat) { + pub fn end_of_work(&self, worker_stat: &mut WorkerLocalStat) { if !worker_stat.is_enabled() { return; }; @@ -172,15 +171,27 @@ impl WorkStat { } /// Worker thread local counterpart of [`SchedulerStat`] -#[derive(Default)] -pub struct WorkerLocalStat { +pub struct WorkerLocalStat { work_id_name_map: HashMap, work_counts: HashMap, work_counters: HashMap>>, enabled: AtomicBool, + _phantom: PhantomData, +} + +impl Default for WorkerLocalStat { + fn default() -> Self { + WorkerLocalStat { + work_id_name_map: Default::default(), + work_counts: Default::default(), + work_counters: Default::default(), + enabled: AtomicBool::new(false), + _phantom: Default::default(), + } + } } -impl WorkerLocalStat { +impl WorkerLocalStat { #[inline] pub fn is_enabled(&self) -> bool { self.enabled.load(Ordering::SeqCst) @@ -192,7 +203,12 @@ impl WorkerLocalStat { /// Measure the execution of a work packet by starting all counters for that /// type #[inline] - pub fn measure_work(&mut self, work_id: TypeId, work_name: &'static str) -> WorkStat { + pub fn measure_work( + &mut self, + work_id: TypeId, + work_name: &'static str, + context: &'static C, + ) -> WorkStat { let stat = WorkStat { type_id: work_id, type_name: work_name, @@ -200,23 +216,33 @@ impl WorkerLocalStat { if self.is_enabled() { self.work_counters .entry(work_id) - .or_insert_with(WorkerLocalStat::counter_set) + .or_insert_with(|| C::counter_set(context)) .iter_mut() .for_each(|c| c.start()); } stat } +} + +/// Private trait to let different contexts supply different sets of default +/// counters +trait HasCounterSet { + fn counter_set(context: &'static Self) -> Vec>; +} - // The set of work counters for all work packet types - #[allow(unused_mut)] - fn counter_set() -> Vec> { +impl HasCounterSet for C { + default fn counter_set(_context: &'static Self) -> Vec> { + vec![Box::new(WorkDuration::new())] + } +} + +/// Specialization for MMTk to read the options +impl HasCounterSet for MMTK { + fn counter_set(mmtk: &'static Self) -> Vec> { let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; - #[cfg(feature = "perf")] - { - use super::work_counter::WorkPerfEvent; - for e in &*PERF_EVENTS.read().unwrap() { - counters.push(box WorkPerfEvent::new(&e.0, e.1, e.2)); - } + #[cfg(feature = "perf_counter")] + for e in &mmtk.options.perf_events.events { + counters.push(box WorkPerfEvent::new(&e.0, e.1, e.2)); } counters } diff --git a/src/scheduler/work.rs b/src/scheduler/work.rs index 61ba34bf68..14e69e3b84 100644 --- a/src/scheduler/work.rs +++ b/src/scheduler/work.rs @@ -10,7 +10,7 @@ pub trait Work: 'static + Send { fn do_work_with_stat(&mut self, worker: &mut Worker, context: &'static C) { let stat = worker .stat - .measure_work(TypeId::of::(), type_name::()); + .measure_work(TypeId::of::(), type_name::(), context); self.do_work(worker, context); stat.end_of_work(&mut worker.stat); } diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index c3a6d82e31..c0bfb00d55 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -134,54 +134,17 @@ impl WorkCounter for WorkDuration { } } -pub fn validate_perf_events(events: &str) -> bool { - for event in events.split(';').filter(|e| !e.is_empty()) { - let e: Vec<&str> = event.split(',').into_iter().collect(); - if e.len() != 3 { - return false; - } - if e[1].parse::().is_err() { - return false; - } - if e[2].parse::().is_err() { - return false; - } - } - true -} - -#[cfg(feature = "perf")] +#[cfg(feature = "perf_counter")] mod perf_event { //! Measure the perf events of work packets //! //! This is built on top of libpfm4. //! The events to measure are parsed from MMTk option `perf_events` - //! The format is - //! ::= "," "," - //! ::= ";" | | "" use super::*; use libc::{c_int, pid_t}; use pfm::PerfEvent; use std::fmt; - pub fn parse_perf_events(events: &str) -> Vec<(String, i32, i32)> { - events - .split(';') - .filter(|e| !e.is_empty()) - .map(|e| { - let e: Vec<&str> = e.split(',').into_iter().collect(); - if e.len() != 3 { - panic!("Please supply (event name, pid, cpu)"); - } - ( - e[0].into(), - e[1].parse().expect("Failed to parse pid"), - e[2].parse().expect("Failed to parse cpu"), - ) - }) - .collect() - } - /// Work counter for perf events #[derive(Clone)] pub struct WorkPerfEvent { @@ -249,5 +212,5 @@ mod perf_event { } } -#[cfg(feature = "perf")] -pub use perf_event::{parse_perf_events, WorkPerfEvent}; +#[cfg(feature = "perf_counter")] +pub(super) use perf_event::WorkPerfEvent; diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index d9ab24edc2..0ac96d3c95 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -52,7 +52,7 @@ pub struct Worker { local: WorkerLocalPtr, pub local_work_bucket: WorkBucket, pub sender: Sender>, - pub stat: WorkerLocalStat, + pub stat: WorkerLocalStat, context: Option<&'static C>, is_coordinator: bool, local_work_buffer: Vec<(WorkBucketStage, Box>)>, diff --git a/src/util/options.rs b/src/util/options.rs index 0a56a0eca0..14489baffb 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -1,8 +1,8 @@ -use crate::scheduler::work_counter::validate_perf_events; use crate::util::constants::DEFAULT_STRESS_FACTOR; use std::cell::UnsafeCell; use std::default::Default; use std::ops::Deref; +use std::str::FromStr; custom_derive! { #[derive(Copy, Clone, EnumFromStr, Debug)] @@ -24,6 +24,50 @@ custom_derive! { } } +/// MMTk option for perf events +/// +/// The format is +/// ``` +/// ::= "," "," +/// ::= ";" | | "" +/// ``` +#[derive(Debug, Clone)] +pub struct PerfEventOptions { + pub events: Vec<(String, i32, i32)>, +} + +impl PerfEventOptions { + fn parse_perf_events(events: &str) -> Result, String> { + events + .split(';') + .filter(|e| !e.is_empty()) + .map(|e| { + let e: Vec<&str> = e.split(',').into_iter().collect(); + if e.len() != 3 { + Err("Please supply (event name, pid, cpu)".into()) + } else { + let event_name = e[0].into(); + let pid = e[1] + .parse() + .map_err(|_| String::from("Failed to parse cpu"))?; + let cpu = e[2] + .parse() + .map_err(|_| String::from("Failed to parse cpu"))?; + Ok((event_name, pid, cpu)) + } + }) + .collect() + } +} + +impl FromStr for PerfEventOptions { + type Err = String; + + fn from_str(s: &str) -> Result { + PerfEventOptions::parse_perf_events(s).map(|events| PerfEventOptions { events }) + } +} + pub struct UnsafeOptionsWrapper(UnsafeCell); // TODO: We should carefully examine the unsync with UnsafeCell. We should be able to provide a safe implementation. @@ -144,7 +188,7 @@ options! { // Perf events to measure // Semicolons are used to separate events // Each event is in the format of event_name,pid,cpu (see man perf_event_open for what pid and cpu mean) - perf_events: String [validate_perf_events] = "".to_string(), + perf_events: PerfEventOptions [always_valid] = PerfEventOptions {events: vec![]} } impl Options { diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 2df5b7591d..5c271bb3df 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -3,7 +3,7 @@ use crate::util::statistics::counter::*; use crate::util::statistics::Timer; use crate::vm::VMBinding; -#[cfg(feature = "perf")] +#[cfg(feature = "perf_counter")] use pfm::Perfmon; use std::collections::HashMap; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; @@ -42,7 +42,7 @@ pub struct Stats { total_time: Arc>, // crate `pfm` uses libpfm4 under the hood for parsing perf event names // Initialization of libpfm4 is required before we can use `PerfEvent` types - #[cfg(feature = "perf")] + #[cfg(feature = "perf_counter")] perfmon: Perfmon, pub shared: Arc, @@ -65,7 +65,7 @@ impl Stats { Stats { gc_count: AtomicUsize::new(0), total_time: t.clone(), - #[cfg(feature = "perf")] + #[cfg(feature = "perf_counter")] perfmon: { let mut perfmon: Perfmon = Default::default(); perfmon.initialize().expect("Perfmon failed to initialize"); From 7a1e3b3601d553826e2ea3fc758538a6f1265fd7 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 25 Jun 2021 16:43:48 +1000 Subject: [PATCH 26/28] Include perf_counter feature in CI --- .github/scripts/ci-build.sh | 1 + .github/scripts/ci-style.sh | 3 +++ .github/scripts/ci-test.sh | 1 + Cargo.toml | 2 +- 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/scripts/ci-build.sh b/.github/scripts/ci-build.sh index 092480d954..4e69251937 100755 --- a/.github/scripts/ci-build.sh +++ b/.github/scripts/ci-build.sh @@ -16,4 +16,5 @@ if [[ $arch == "x86_64" && $os == "linux" ]]; then cargo build --target i686-unknown-linux-gnu for_all_features "cargo build --target i686-unknown-linux-gnu" for_all_features "cargo build --release --target i686-unknown-linux-gnu" + cargo build --features perf_counter fi \ No newline at end of file diff --git a/.github/scripts/ci-style.sh b/.github/scripts/ci-style.sh index 9715e8d1b2..545bd414b6 100755 --- a/.github/scripts/ci-style.sh +++ b/.github/scripts/ci-style.sh @@ -17,6 +17,9 @@ cargo clippy --manifest-path=vmbindings/dummyvm/Cargo.toml if [[ $arch == "x86_64" && $os == "linux" ]]; then for_all_features "cargo clippy --target i686-unknown-linux-gnu" for_all_features "cargo clippy --release --target i686-unknown-linux-gnu" + cargo clippy --features perf_counter + cargo clippy --release --features perf_counter + cargo clippy --tests --features perf_counter fi # check format diff --git a/.github/scripts/ci-test.sh b/.github/scripts/ci-test.sh index 1c701028be..90a7589ade 100755 --- a/.github/scripts/ci-test.sh +++ b/.github/scripts/ci-test.sh @@ -5,6 +5,7 @@ for_all_features "cargo test" # For x86_64-linux, also check for i686 if [[ $arch == "x86_64" && $os == "linux" ]]; then for_all_features "cargo test --target i686-unknown-linux-gnu" + cargo test --features perf_counter fi python examples/build.py diff --git a/Cargo.toml b/Cargo.toml index 7fdd10caa6..b399e6c957 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,7 @@ rand = "0.7.3" default = [] # This feature is only supported on x86-64 for now -# Not included below so that CI won't fail +# It's manually added to CI scripts perf_counter = ["pfm"] # .github/scripts/ci-common.sh extracts features from the following part (including from comments). From 2edb2a58d4cfe2aa3271356646d9fcfa12cc2f46 Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 25 Jun 2021 16:51:55 +1000 Subject: [PATCH 27/28] Suppress warning due to conditional compilation --- src/scheduler/stat.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index 4fb68f7f35..ed5147eb13 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -237,6 +237,7 @@ impl HasCounterSet for C { } /// Specialization for MMTk to read the options +#[allow(unused_variables,unused_mut)] impl HasCounterSet for MMTK { fn counter_set(mmtk: &'static Self) -> Vec> { let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; From 8163837063694c83c700b46412f26d59a0f9e56b Mon Sep 17 00:00:00 2001 From: Zixian Cai Date: Fri, 25 Jun 2021 17:05:24 +1000 Subject: [PATCH 28/28] Fix unit tests --- src/scheduler/stat.rs | 2 +- src/util/options.rs | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index ed5147eb13..259a9b24c8 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -237,7 +237,7 @@ impl HasCounterSet for C { } /// Specialization for MMTk to read the options -#[allow(unused_variables,unused_mut)] +#[allow(unused_variables, unused_mut)] impl HasCounterSet for MMTK { fn counter_set(mmtk: &'static Self) -> Vec> { let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; diff --git a/src/util/options.rs b/src/util/options.rs index 14489baffb..e51237ae49 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -25,13 +25,13 @@ custom_derive! { } /// MMTk option for perf events -/// +/// /// The format is /// ``` /// ::= "," "," /// ::= ";" | | "" /// ``` -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct PerfEventOptions { pub events: Vec<(String, i32, i32)>, } @@ -222,6 +222,7 @@ impl Options { #[cfg(test)] mod tests { + use super::*; use crate::util::constants::DEFAULT_STRESS_FACTOR; use crate::util::options::Options; use crate::util::test_util::{serial_test, with_cleanup}; @@ -311,7 +312,7 @@ mod tests { fn test_str_option_default() { serial_test(|| { let options = Options::default(); - assert_eq!(&options.perf_events as &str, ""); + assert_eq!(&options.perf_events, &PerfEventOptions { events: vec![] }); }) } @@ -324,8 +325,10 @@ mod tests { let options = Options::default(); assert_eq!( - &options.perf_events as &str, - "PERF_COUNT_HW_CPU_CYCLES,0,-1" + &options.perf_events, + &PerfEventOptions { + events: vec![("PERF_COUNT_HW_CPU_CYCLES".into(), 0, -1)] + } ); }, || { @@ -345,7 +348,7 @@ mod tests { let options = Options::default(); // invalid value from env var, use default. - assert_eq!(&options.perf_events as &str, ""); + assert_eq!(&options.perf_events, &PerfEventOptions { events: vec![] }); }, || { std::env::remove_var("MMTK_PERF_EVENTS");