diff --git a/Cargo.toml b/Cargo.toml index bfbc2ba3b1..0eab3b7e6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ atomic-traits = "0.2.0" atomic = "0.4.6" spin = "0.5.2" env_logger = "0.8.2" -pfm = {version = "0.0.7", optional = true} +pfm = {version = "0.0.8", optional = true} [dev-dependencies] crossbeam = "0.7.3" diff --git a/src/memory_manager.rs b/src/memory_manager.rs index f240cbd8c3..0b4b26207d 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -50,6 +50,23 @@ pub fn gc_init(mmtk: &'static mut MMTK, heap_size: usize) { "MMTk failed to initialize the logger. Possibly a logger has been initialized by user." ), } + #[cfg(all(feature = "perf_counter", target_os = "linux"))] + { + use std::fs::File; + use std::io::Read; + let mut status = File::open("/proc/self/status").unwrap(); + let mut contents = String::new(); + status.read_to_string(&mut contents).unwrap(); + for line in contents.lines() { + let split: Vec<&str> = line.split('\t').collect(); + if split[0] == "Threads:" { + let threads = split[1].parse::().unwrap(); + if threads != 1 { + warn!("Current process has {} threads, process-wide perf event measurement will only include child threads spawned from this threadas", threads); + } + } + } + } assert!(heap_size > 0, "Invalid heap size"); mmtk.plan .gc_init(heap_size, &crate::VM_MAP, &mmtk.scheduler); diff --git a/src/plan/global.rs b/src/plan/global.rs index f0ee07f516..e001444210 100644 --- a/src/plan/global.rs +++ b/src/plan/global.rs @@ -411,7 +411,7 @@ impl BasePlan { constraints: &'static PlanConstraints, global_side_metadata_specs: Vec, ) -> BasePlan { - let stats = Stats::new(); + let stats = Stats::new(&options); // Initializing the analysis manager and routines #[cfg(feature = "analysis")] let analysis_manager = AnalysisManager::new(&stats); diff --git a/src/scheduler/stat.rs b/src/scheduler/stat.rs index 259a9b24c8..4db0099008 100644 --- a/src/scheduler/stat.rs +++ b/src/scheduler/stat.rs @@ -242,7 +242,7 @@ impl HasCounterSet for MMTK { fn counter_set(mmtk: &'static Self) -> Vec> { let mut counters: Vec> = vec![Box::new(WorkDuration::new())]; #[cfg(feature = "perf_counter")] - for e in &mmtk.options.perf_events.events { + for e in &mmtk.options.work_perf_events.events { counters.push(box WorkPerfEvent::new(&e.0, e.1, e.2)); } counters diff --git a/src/scheduler/work_counter.rs b/src/scheduler/work_counter.rs index c0bfb00d55..39e5686384 100644 --- a/src/scheduler/work_counter.rs +++ b/src/scheduler/work_counter.rs @@ -163,7 +163,7 @@ mod perf_event { /// -1, 0 measures all threads on CPU 0 /// -1, -1 is invalid pub fn new(name: &str, pid: pid_t, cpu: c_int) -> WorkPerfEvent { - let mut pe = PerfEvent::new(name) + let mut pe = PerfEvent::new(name, false) .unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); pe.open(pid, cpu) .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); diff --git a/src/util/options.rs b/src/util/options.rs index 8410d10ec9..7d3ca35b29 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -189,7 +189,11 @@ options! { // Perf events to measure // Semicolons are used to separate events // Each event is in the format of event_name,pid,cpu (see man perf_event_open for what pid and cpu mean) - perf_events: PerfEventOptions [always_valid] = PerfEventOptions {events: vec![]} + // + // Measuring perf events for work packets + work_perf_events: PerfEventOptions [always_valid] = PerfEventOptions {events: vec![]}, + // Measuring perf events for GC and mutators + phase_perf_events: PerfEventOptions [always_valid] = PerfEventOptions {events: vec![]} } impl Options { @@ -313,7 +317,10 @@ mod tests { fn test_str_option_default() { serial_test(|| { let options = Options::default(); - assert_eq!(&options.perf_events, &PerfEventOptions { events: vec![] }); + assert_eq!( + &options.work_perf_events, + &PerfEventOptions { events: vec![] } + ); }) } @@ -322,18 +329,18 @@ mod tests { serial_test(|| { with_cleanup( || { - std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES,0,-1"); + std::env::set_var("MMTK_WORK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES,0,-1"); let options = Options::default(); assert_eq!( - &options.perf_events, + &options.work_perf_events, &PerfEventOptions { events: vec![("PERF_COUNT_HW_CPU_CYCLES".into(), 0, -1)] } ); }, || { - std::env::remove_var("MMTK_PERF_EVENTS"); + std::env::remove_var("MMTK_WORK_PERF_EVENTS"); }, ) }) @@ -345,14 +352,17 @@ mod tests { with_cleanup( || { // The option needs to start with "hello", otherwise it is invalid. - std::env::set_var("MMTK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES"); + std::env::set_var("MMTK_WORK_PERF_EVENTS", "PERF_COUNT_HW_CPU_CYCLES"); let options = Options::default(); // invalid value from env var, use default. - assert_eq!(&options.perf_events, &PerfEventOptions { events: vec![] }); + assert_eq!( + &options.work_perf_events, + &PerfEventOptions { events: vec![] } + ); }, || { - std::env::remove_var("MMTK_PERF_EVENTS"); + std::env::remove_var("MMTK_WORK_PERF_EVENTS"); }, ) }) diff --git a/src/util/statistics/counter/long_counter.rs b/src/util/statistics/counter/long_counter.rs index ab49eaea5f..7dfa9bfaa8 100644 --- a/src/util/statistics/counter/long_counter.rs +++ b/src/util/statistics/counter/long_counter.rs @@ -8,6 +8,7 @@ pub struct LongCounter { pub implicitly_start: bool, merge_phases: bool, count: Box<[u64; MAX_PHASES]>, // FIXME make this resizable + diffable: T, start_value: Option, total_count: u64, running: bool, @@ -27,7 +28,7 @@ impl Counter for LongCounter { } debug_assert!(!self.running); self.running = true; - self.start_value = Some(T::current_value()); + self.start_value = Some(self.diffable.current_value()); } fn stop(&mut self) { @@ -36,14 +37,15 @@ impl Counter for LongCounter { } debug_assert!(self.running); self.running = false; - let delta = T::diff(&T::current_value(), self.start_value.as_ref().unwrap()); + let current_value = self.diffable.current_value(); + let delta = T::diff(¤t_value, self.start_value.as_ref().unwrap()); self.count[self.stats.get_phase()] += delta; self.total_count += delta; } fn phase_change(&mut self, old_phase: usize) { if self.running { - let now = T::current_value(); + let now = self.diffable.current_value(); let delta = T::diff(&now, self.start_value.as_ref().unwrap()); self.count[old_phase] += delta; self.total_count += delta; @@ -125,12 +127,14 @@ impl LongCounter { stats: Arc, implicitly_start: bool, merge_phases: bool, + diffable: T, ) -> Self { LongCounter { name, implicitly_start, merge_phases, count: box [0; MAX_PHASES], + diffable, start_value: None, total_count: 0, running: false, diff --git a/src/util/statistics/counter/mod.rs b/src/util/statistics/counter/mod.rs index 9539865b67..c8a8df256b 100644 --- a/src/util/statistics/counter/mod.rs +++ b/src/util/statistics/counter/mod.rs @@ -2,10 +2,14 @@ use std::time::Instant; mod event_counter; mod long_counter; +#[cfg(feature = "perf_counter")] +mod perf_event; mod size_counter; pub use self::event_counter::EventCounter; pub use self::long_counter::{LongCounter, Timer}; +#[cfg(feature = "perf_counter")] +pub use self::perf_event::PerfEventDiffable; pub use self::size_counter::SizeCounter; pub trait Counter { @@ -22,9 +26,11 @@ pub trait Counter { fn name(&self) -> &String; } +/// A Diffable object could be stateless (e.g. a timer that reads the wall +/// clock), or stateful (e.g. holds reference to a perf event fd) pub trait Diffable { type Val; - fn current_value() -> Self::Val; + fn current_value(&mut self) -> Self::Val; fn diff(current: &Self::Val, earlier: &Self::Val) -> u64; fn print_diff(val: u64); } @@ -34,7 +40,7 @@ pub struct MonotoneNanoTime; impl Diffable for MonotoneNanoTime { type Val = Instant; - fn current_value() -> Instant { + fn current_value(&mut self) -> Instant { Instant::now() } diff --git a/src/util/statistics/counter/perf_event.rs b/src/util/statistics/counter/perf_event.rs new file mode 100644 index 0000000000..57b8bfbfa0 --- /dev/null +++ b/src/util/statistics/counter/perf_event.rs @@ -0,0 +1,41 @@ +use super::Diffable; +use pfm::{PerfEvent, PerfEventValue}; + +/// A [`Diffable`] helper type for measuring overall perf events for mutators +/// and GC +/// This is the process-wide counterpart of [`crate::scheduler::work_counter::WorkPerfEvent`]. +pub struct PerfEventDiffable { + pe: PerfEvent, +} + +impl PerfEventDiffable { + pub fn new(name: &str) -> Self { + let mut pe = PerfEvent::new(name, true) + .unwrap_or_else(|_| panic!("Failed to create perf event {}", name)); + // measures the calling thread (and all child threads) on all CPUs + pe.open(0, -1) + .unwrap_or_else(|_| panic!("Failed to open perf event {}", name)); + PerfEventDiffable { pe } + } +} + +impl Diffable for PerfEventDiffable { + type Val = PerfEventValue; + + fn current_value(&mut self) -> Self::Val { + let val = self.pe.read().unwrap(); + self.pe.enable(); + self.pe.reset(); + val + } + + fn diff(current: &Self::Val, _earlier: &Self::Val) -> u64 { + // earlier value is not used as the counter is reset after each use + assert_eq!(current.time_enabled, current.time_running); + current.value as u64 + } + + fn print_diff(val: u64) { + print!("{}", val); + } +} diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 5c271bb3df..1a2e2526e2 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -1,4 +1,5 @@ use crate::mmtk::MMTK; +use crate::util::options::Options; use crate::util::statistics::counter::*; use crate::util::statistics::Timer; use crate::vm::VMBinding; @@ -51,29 +52,45 @@ pub struct Stats { } impl Stats { - pub fn new() -> Self { + #[allow(unused)] + pub fn new(options: &Options) -> Self { + #[cfg(feature = "perf_counter")] + let perfmon = { + let mut perfmon: Perfmon = Default::default(); + perfmon.initialize().expect("Perfmon failed to initialize"); + perfmon + }; let shared = Arc::new(SharedStats { phase: AtomicUsize::new(0), gathering_stats: AtomicBool::new(false), }); + let mut counters: Vec>> = vec![]; let t = Arc::new(Mutex::new(LongCounter::new( "time".to_string(), shared.clone(), true, false, + MonotoneNanoTime {}, ))); + counters.push(t.clone()); + #[cfg(feature = "perf_counter")] + for e in &options.phase_perf_events.events { + counters.push(Arc::new(Mutex::new(LongCounter::new( + e.0.clone(), + shared.clone(), + true, + false, + PerfEventDiffable::new(&e.0), + )))); + } Stats { gc_count: AtomicUsize::new(0), - total_time: t.clone(), + total_time: t, #[cfg(feature = "perf_counter")] - perfmon: { - let mut perfmon: Perfmon = Default::default(); - perfmon.initialize().expect("Perfmon failed to initialize"); - perfmon - }, + perfmon, shared, - counters: Mutex::new(vec![t]), + counters: Mutex::new(counters), exceeded_phase_limit: AtomicBool::new(false), } } @@ -118,6 +135,7 @@ impl Stats { self.shared.clone(), implicit_start, merge_phases, + MonotoneNanoTime {}, ))); guard.push(counter.clone()); counter @@ -237,9 +255,3 @@ impl Stats { self.shared.get_gathering_stats() } } - -impl Default for Stats { - fn default() -> Self { - Self::new() - } -}