diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 86779eaae6..63f9c493f9 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -555,6 +555,23 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +[[package]] +name = "coverage" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap 4.0.26", + "debuggable-module", + "debugger", + "iced-x86", + "log", + "pete", + "procfs", + "regex", + "symbolic 10.1.4", + "thiserror", +] + [[package]] name = "coverage-legacy" version = "0.1.0" diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index 1a2927cd21..2421c248e6 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "atexit", + "coverage", "coverage-legacy", "debuggable-module", "debugger", diff --git a/src/agent/coverage/Cargo.toml b/src/agent/coverage/Cargo.toml new file mode 100644 index 0000000000..00bbfbf4c8 --- /dev/null +++ b/src/agent/coverage/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "coverage" +version = "0.1.0" +edition = "2021" +license = "MIT" + +[dependencies] +anyhow = "1.0" +debuggable-module = { path = "../debuggable-module" } +iced-x86 = "1.17" +log = "0.4.17" +regex = "1.0" +symbolic = { version = "10.1", features = ["debuginfo", "demangle", "symcache"] } +thiserror = "1.0" + +[target.'cfg(target_os = "windows")'.dependencies] +debugger = { path = "../debugger" } + +[target.'cfg(target_os = "linux")'.dependencies] +pete = "0.9" +# For procfs, opt out of the `chrono` freature; it pulls in an old version +# of `time`. We do not use the methods that the `chrono` feature enables. +procfs = { version = "0.12", default-features = false, features=["flate2"] } + +[dev-dependencies] +clap = { version = "4.0", features = ["derive"] } diff --git a/src/agent/coverage/examples/coverage.rs b/src/agent/coverage/examples/coverage.rs new file mode 100644 index 0000000000..e9e0e5b1da --- /dev/null +++ b/src/agent/coverage/examples/coverage.rs @@ -0,0 +1,65 @@ +use std::process::Command; +use std::time::Duration; + +use anyhow::Result; +use clap::Parser; +use coverage::allowlist::{AllowList, TargetAllowList}; +use coverage::binary::BinaryCoverage; + +#[derive(Parser, Debug)] +struct Args { + #[arg(long)] + module_allowlist: Option, + + #[arg(long)] + source_allowlist: Option, + + #[arg(short, long)] + timeout: Option, + + command: Vec, +} + +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(5); + +fn main() -> Result<()> { + let args = Args::parse(); + + let timeout = args + .timeout + .map(Duration::from_millis) + .unwrap_or(DEFAULT_TIMEOUT); + + let mut cmd = Command::new(&args.command[0]); + if args.command.len() > 1 { + cmd.args(&args.command[1..]); + } + + let mut allowlist = TargetAllowList::default(); + + if let Some(path) = &args.module_allowlist { + allowlist.modules = AllowList::load(path)?; + } + + if let Some(path) = &args.source_allowlist { + allowlist.source_files = AllowList::load(path)?; + } + + let coverage = coverage::record::record(cmd, timeout, allowlist)?; + + dump_modoff(coverage)?; + + Ok(()) +} + +fn dump_modoff(coverage: BinaryCoverage) -> Result<()> { + for (module, coverage) in &coverage.modules { + for (offset, count) in coverage.as_ref() { + if count.reached() { + println!("{}+{offset:x}", module.base_name()); + } + } + } + + Ok(()) +} diff --git a/src/agent/coverage/src/allowlist.rs b/src/agent/coverage/src/allowlist.rs new file mode 100644 index 0000000000..2a0e807d17 --- /dev/null +++ b/src/agent/coverage/src/allowlist.rs @@ -0,0 +1,156 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use anyhow::Result; +use regex::{Regex, RegexSet}; +use std::path::Path; + +#[derive(Clone, Debug, Default)] +pub struct TargetAllowList { + pub functions: AllowList, + pub modules: AllowList, + pub source_files: AllowList, +} + +impl TargetAllowList { + pub fn new(modules: AllowList, source_files: AllowList) -> Self { + // Allow all. + let functions = AllowList::default(); + + Self { + functions, + modules, + source_files, + } + } +} + +#[derive(Clone, Debug)] +pub struct AllowList { + allow: RegexSet, + deny: RegexSet, +} + +impl AllowList { + pub fn new(allow: RegexSet, deny: RegexSet) -> Self { + Self { allow, deny } + } + + pub fn load(path: impl AsRef) -> Result { + let path = path.as_ref(); + let text = std::fs::read_to_string(path)?; + Self::parse(&text) + } + + pub fn parse(text: &str) -> Result { + use std::io::{BufRead, BufReader}; + + let reader = BufReader::new(text.as_bytes()); + + let mut allow = vec![]; + let mut deny = vec![]; + + // We could just collect and pass to the `RegexSet` ctor. + // + // Instead, check each rule individually for diagnostic purposes. + for (index, line) in reader.lines().enumerate() { + let line = line?; + + match AllowListLine::parse(&line) { + Ok(valid) => { + use AllowListLine::*; + + match valid { + Blank | Comment => { + // Ignore. + } + Allow(re) => { + allow.push(re); + } + Deny(re) => { + deny.push(re); + } + } + } + Err(err) => { + // Ignore invalid lines, but warn. + let line_number = index + 1; + warn!("error at line {}: {}", line_number, err); + } + } + } + + let allow = RegexSet::new(allow.iter().map(|re| re.as_str()))?; + let deny = RegexSet::new(deny.iter().map(|re| re.as_str()))?; + let allowlist = AllowList::new(allow, deny); + + Ok(allowlist) + } + + pub fn is_allowed(&self, path: impl AsRef) -> bool { + let path = path.as_ref(); + + // Allowed if rule-allowed but not excluded by a negative (deny) rule. + self.allow.is_match(path) && !self.deny.is_match(path) + } +} + +impl Default for AllowList { + fn default() -> Self { + // Unwrap-safe due to valid constant expr. + let allow = RegexSet::new([".*"]).unwrap(); + let deny = RegexSet::empty(); + + AllowList::new(allow, deny) + } +} + +pub enum AllowListLine { + Blank, + Comment, + Allow(Regex), + Deny(Regex), +} + +impl AllowListLine { + pub fn parse(line: &str) -> Result { + let line = line.trim(); + + // Allow and ignore blank lines. + if line.is_empty() { + return Ok(Self::Blank); + } + + // Support comments of the form `# `. + if line.starts_with("# ") { + return Ok(Self::Comment); + } + + // Deny rules are of the form `! `. + if let Some(expr) = line.strip_prefix("! ") { + let re = glob_to_regex(expr)?; + return Ok(Self::Deny(re)); + } + + // Try to interpret as allow rule. + let re = glob_to_regex(line)?; + Ok(Self::Allow(re)) + } +} + +#[allow(clippy::single_char_pattern)] +fn glob_to_regex(expr: &str) -> Result { + // Don't make users escape Windows path separators. + let expr = expr.replace(r"\", r"\\"); + + // Translate glob wildcards into quantified regexes. + let expr = expr.replace("*", ".*"); + + // Anchor to line start and end. + let expr = format!("^{expr}$"); + + Ok(Regex::new(&expr)?) +} + +#[cfg(test)] +mod tests; diff --git a/src/agent/coverage/src/allowlist/test-data/allow-all-glob-except-commented.txt b/src/agent/coverage/src/allowlist/test-data/allow-all-glob-except-commented.txt new file mode 100644 index 0000000000..6ef5c08319 --- /dev/null +++ b/src/agent/coverage/src/allowlist/test-data/allow-all-glob-except-commented.txt @@ -0,0 +1,3 @@ +a/* +! a/c +# c diff --git a/src/agent/coverage/src/allowlist/test-data/allow-all-glob-except.txt b/src/agent/coverage/src/allowlist/test-data/allow-all-glob-except.txt new file mode 100644 index 0000000000..a028542f1e --- /dev/null +++ b/src/agent/coverage/src/allowlist/test-data/allow-all-glob-except.txt @@ -0,0 +1,3 @@ +a/* +! a/c +c diff --git a/src/agent/coverage/src/allowlist/test-data/allow-all-glob.txt b/src/agent/coverage/src/allowlist/test-data/allow-all-glob.txt new file mode 100644 index 0000000000..72e8ffc0db --- /dev/null +++ b/src/agent/coverage/src/allowlist/test-data/allow-all-glob.txt @@ -0,0 +1 @@ +* diff --git a/src/agent/coverage/src/allowlist/test-data/allow-all.txt b/src/agent/coverage/src/allowlist/test-data/allow-all.txt new file mode 100644 index 0000000000..0ee448f0b5 --- /dev/null +++ b/src/agent/coverage/src/allowlist/test-data/allow-all.txt @@ -0,0 +1,4 @@ +a +a/b +b +c diff --git a/src/agent/coverage/src/allowlist/test-data/allow-some.txt b/src/agent/coverage/src/allowlist/test-data/allow-some.txt new file mode 100644 index 0000000000..422c2b7ab3 --- /dev/null +++ b/src/agent/coverage/src/allowlist/test-data/allow-some.txt @@ -0,0 +1,2 @@ +a +b diff --git a/src/agent/coverage/src/allowlist/test-data/empty.txt b/src/agent/coverage/src/allowlist/test-data/empty.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/agent/coverage/src/allowlist/tests.rs b/src/agent/coverage/src/allowlist/tests.rs new file mode 100644 index 0000000000..7c189aae88 --- /dev/null +++ b/src/agent/coverage/src/allowlist/tests.rs @@ -0,0 +1,101 @@ +use anyhow::Result; + +use super::AllowList; + +#[test] +fn test_default() -> Result<()> { + let allowlist = AllowList::default(); + + // All allowed. + assert!(allowlist.is_allowed("a")); + assert!(allowlist.is_allowed("a/b")); + assert!(allowlist.is_allowed("b")); + assert!(allowlist.is_allowed("c")); + + Ok(()) +} + +#[test] +fn test_empty() -> Result<()> { + let text = include_str!("test-data/empty.txt"); + let allowlist = AllowList::parse(text)?; + + // All excluded. + assert!(!allowlist.is_allowed("a")); + assert!(!allowlist.is_allowed("a/b")); + assert!(!allowlist.is_allowed("b")); + assert!(!allowlist.is_allowed("c")); + + Ok(()) +} + +#[test] +fn test_allow_some() -> Result<()> { + let text = include_str!("test-data/allow-some.txt"); + let allowlist = AllowList::parse(text)?; + + assert!(allowlist.is_allowed("a")); + assert!(!allowlist.is_allowed("a/b")); + assert!(allowlist.is_allowed("b")); + assert!(!allowlist.is_allowed("c")); + + Ok(()) +} + +#[test] +fn test_allow_all() -> Result<()> { + let text = include_str!("test-data/allow-all.txt"); + let allowlist = AllowList::parse(text)?; + + assert!(allowlist.is_allowed("a")); + assert!(allowlist.is_allowed("a/b")); + assert!(allowlist.is_allowed("b")); + assert!(allowlist.is_allowed("c")); + + Ok(()) +} + +#[test] +fn test_allow_all_glob() -> Result<()> { + let text = include_str!("test-data/allow-all-glob.txt"); + let allowlist = AllowList::parse(text)?; + + assert!(allowlist.is_allowed("a")); + assert!(allowlist.is_allowed("a/b")); + assert!(allowlist.is_allowed("b")); + assert!(allowlist.is_allowed("c")); + + Ok(()) +} + +#[test] +fn test_allow_glob_except() -> Result<()> { + let text = include_str!("test-data/allow-all-glob-except.txt"); + let allowlist = AllowList::parse(text)?; + + assert!(!allowlist.is_allowed("a")); + assert!(allowlist.is_allowed("a/b")); + assert!(!allowlist.is_allowed("a/c")); + assert!(allowlist.is_allowed("a/d")); + assert!(!allowlist.is_allowed("b")); + assert!(allowlist.is_allowed("c")); + + Ok(()) +} + +#[test] +fn test_allow_glob_except_commented() -> Result<()> { + let text = include_str!("test-data/allow-all-glob-except-commented.txt"); + let allowlist = AllowList::parse(text)?; + + assert!(!allowlist.is_allowed("a")); + assert!(allowlist.is_allowed("a/b")); + assert!(!allowlist.is_allowed("a/c")); + assert!(allowlist.is_allowed("a/d")); + assert!(!allowlist.is_allowed("b")); + + // Allowed by the rule `c`, but not allowed because `# c` is a comment. + assert!(!allowlist.is_allowed("c")); + + Ok(()) +} diff --git a/src/agent/coverage/src/binary.rs b/src/agent/coverage/src/binary.rs new file mode 100644 index 0000000000..0ef6640f47 --- /dev/null +++ b/src/agent/coverage/src/binary.rs @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::collections::{BTreeMap, BTreeSet}; + +use anyhow::{bail, Result}; +use debuggable_module::{block, path::FilePath, Module, Offset}; +use symbolic::debuginfo::Object; +use symbolic::symcache::{SymCache, SymCacheConverter}; + +use crate::allowlist::TargetAllowList; + +#[derive(Clone, Debug, Default)] +pub struct BinaryCoverage { + pub modules: BTreeMap, +} + +#[derive(Clone, Debug, Default)] +pub struct ModuleBinaryCoverage { + pub offsets: BTreeMap, +} + +impl ModuleBinaryCoverage { + pub fn increment(&mut self, offset: Offset) -> Result<()> { + if let Some(count) = self.offsets.get_mut(&offset) { + count.increment(); + } else { + bail!("unknown coverage offset: {offset:x}"); + }; + + Ok(()) + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct Count(pub u32); + +impl Count { + pub fn increment(&mut self) { + self.0 = self.0.saturating_add(1); + } + + pub fn reached(&self) -> bool { + self.0 > 0 + } +} + +pub fn find_coverage_sites<'data>( + module: &dyn Module<'data>, + allowlist: &TargetAllowList, +) -> Result { + let debuginfo = module.debuginfo()?; + + let mut symcache = vec![]; + let mut converter = SymCacheConverter::new(); + let exe = Object::parse(module.executable_data())?; + converter.process_object(&exe)?; + let di = Object::parse(module.debuginfo_data())?; + converter.process_object(&di)?; + converter.serialize(&mut std::io::Cursor::new(&mut symcache))?; + let symcache = SymCache::parse(&symcache)?; + + let mut offsets = BTreeSet::new(); + + for function in debuginfo.functions() { + if !allowlist.functions.is_allowed(&function.name) { + continue; + } + + if let Some(location) = symcache.lookup(function.offset.0).next() { + if let Some(file) = location.file() { + let path = file.full_path(); + + if allowlist.source_files.is_allowed(&path) { + let blocks = + block::sweep_region(module, &debuginfo, function.offset, function.size)?; + offsets.extend(blocks.iter().map(|b| b.offset)); + } + } + } + } + + let mut coverage = ModuleBinaryCoverage::default(); + coverage + .offsets + .extend(offsets.into_iter().map(|o| (o, Count(0)))); + + Ok(coverage) +} + +impl AsRef> for ModuleBinaryCoverage { + fn as_ref(&self) -> &BTreeMap { + &self.offsets + } +} diff --git a/src/agent/coverage/src/lib.rs b/src/agent/coverage/src/lib.rs new file mode 100644 index 0000000000..fe395cdf4a --- /dev/null +++ b/src/agent/coverage/src/lib.rs @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#[macro_use] +extern crate log; + +pub mod allowlist; +pub mod binary; +pub mod record; +pub mod source; +mod timer; + +#[doc(inline)] +pub use allowlist::{AllowList, TargetAllowList}; + +#[doc(inline)] +pub use record::record; diff --git a/src/agent/coverage/src/record.rs b/src/agent/coverage/src/record.rs new file mode 100644 index 0000000000..92336d0065 --- /dev/null +++ b/src/agent/coverage/src/record.rs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#[cfg(target_os = "linux")] +pub mod linux; + +#[cfg(target_os = "windows")] +pub mod windows; + +#[cfg(target_os = "linux")] +pub use crate::record::linux::record; + +#[cfg(target_os = "windows")] +pub use crate::record::windows::record; diff --git a/src/agent/coverage/src/record/linux.rs b/src/agent/coverage/src/record/linux.rs new file mode 100644 index 0000000000..4f331a3241 --- /dev/null +++ b/src/agent/coverage/src/record/linux.rs @@ -0,0 +1,131 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::collections::BTreeMap; +use std::process::Command; +use std::time::Duration; + +use anyhow::{bail, Result}; +use debuggable_module::linux::LinuxModule; +use debuggable_module::load_module::LoadModule; +use debuggable_module::loader::Loader; +use debuggable_module::path::FilePath; +use debuggable_module::Address; +use pete::Tracee; + +pub mod debugger; +use debugger::{DebugEventHandler, Debugger, DebuggerContext, ModuleImage}; + +use crate::allowlist::TargetAllowList; +use crate::binary::{self, BinaryCoverage}; + +pub fn record( + cmd: Command, + timeout: Duration, + allowlist: impl Into>, +) -> Result { + let loader = Loader::new(); + let allowlist = allowlist.into().unwrap_or_default(); + + crate::timer::timed(timeout, move || { + let mut recorder = LinuxRecorder::new(&loader, allowlist); + let dbg = Debugger::new(&mut recorder); + dbg.run(cmd)?; + + Ok(recorder.coverage) + })? +} + +pub struct LinuxRecorder<'data> { + allowlist: TargetAllowList, + coverage: BinaryCoverage, + loader: &'data Loader, + modules: BTreeMap>, +} + +impl<'data> LinuxRecorder<'data> { + pub fn new(loader: &'data Loader, allowlist: TargetAllowList) -> Self { + let coverage = BinaryCoverage::default(); + let modules = BTreeMap::new(); + + Self { + allowlist, + coverage, + loader, + modules, + } + } + + fn do_on_breakpoint( + &mut self, + context: &mut DebuggerContext, + tracee: &mut Tracee, + ) -> Result<()> { + let regs = tracee.registers()?; + let addr = Address(regs.rip); + + if let Some(image) = context.find_image_for_addr(addr) { + if let Some(coverage) = self.coverage.modules.get_mut(image.path()) { + let offset = addr.offset_from(image.base())?; + coverage.increment(offset)?; + } else { + bail!("coverage not initialized for module {}", image.path()); + } + } else { + bail!("no image for addr: {addr:x}"); + } + + Ok(()) + } + + fn do_on_module_load( + &mut self, + context: &mut DebuggerContext, + tracee: &mut Tracee, + image: &ModuleImage, + ) -> Result<()> { + info!("module load: {}", image.path()); + + let path = image.path(); + + if !self.allowlist.modules.is_allowed(path) { + debug!("not inserting denylisted module: {path}"); + return Ok(()); + } + + let module = if let Ok(module) = LinuxModule::load(self.loader, path.clone()) { + module + } else { + debug!("skipping undebuggable module: {path}"); + return Ok(()); + }; + + let coverage = binary::find_coverage_sites(&module, &self.allowlist)?; + + for offset in coverage.as_ref().keys().copied() { + let addr = image.base().offset_by(offset)?; + context.breakpoints.set(tracee, addr)?; + } + + self.coverage.modules.insert(path.clone(), coverage); + + self.modules.insert(path.clone(), module); + + Ok(()) + } +} + +impl<'data> DebugEventHandler for LinuxRecorder<'data> { + fn on_breakpoint(&mut self, context: &mut DebuggerContext, tracee: &mut Tracee) -> Result<()> { + self.do_on_breakpoint(context, tracee) + } + + fn on_module_load( + &mut self, + context: &mut DebuggerContext, + tracee: &mut Tracee, + image: &ModuleImage, + ) -> Result<()> { + self.do_on_module_load(context, tracee, image) + } +} diff --git a/src/agent/coverage/src/record/linux/debugger.rs b/src/agent/coverage/src/record/linux/debugger.rs new file mode 100644 index 0000000000..5f300d6ccc --- /dev/null +++ b/src/agent/coverage/src/record/linux/debugger.rs @@ -0,0 +1,370 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::collections::BTreeMap; +use std::process::Command; + +use anyhow::{bail, format_err, Result}; +use debuggable_module::path::FilePath; +use debuggable_module::Address; +use pete::{Ptracer, Restart, Signal, Stop, Tracee}; +use procfs::process::{MMapPath, MemoryMap, Process}; + +pub trait DebugEventHandler { + fn on_breakpoint(&mut self, dbg: &mut DebuggerContext, tracee: &mut Tracee) -> Result<()>; + + fn on_module_load( + &mut self, + db: &mut DebuggerContext, + tracee: &mut Tracee, + image: &ModuleImage, + ) -> Result<()>; +} + +pub struct Debugger<'eh> { + context: DebuggerContext, + event_handler: &'eh mut dyn DebugEventHandler, +} + +impl<'eh> Debugger<'eh> { + pub fn new(event_handler: &'eh mut dyn DebugEventHandler) -> Self { + let context = DebuggerContext::new(); + + Self { + context, + event_handler, + } + } + + pub fn run(mut self, cmd: Command) -> Result<()> { + let mut child = self.context.tracer.spawn(cmd)?; + + if let Err(err) = self.wait_on_stops() { + // Ignore error if child already exited. + let _ = child.kill(); + + return Err(err); + } + + Ok(()) + } + + fn wait_on_stops(mut self) -> Result<()> { + use pete::ptracer::Options; + + // Continue the tracee process until the return from its initial `execve()`. + let mut tracee = continue_to_init_execve(&mut self.context.tracer)?; + + // Do not follow forks. + // + // After this, we assume that any new tracee is a thread in the same + // group as the root tracee. + let mut options = Options::all(); + options.remove(Options::PTRACE_O_TRACEFORK); + options.remove(Options::PTRACE_O_TRACEVFORK); + options.remove(Options::PTRACE_O_TRACEEXEC); + tracee.set_options(options)?; + + // Initialize index of mapped modules now that we have a PID to query. + self.context.images = Some(Images::new(tracee.pid.as_raw())); + self.update_images(&mut tracee)?; + + // Restart tracee and enter the main debugger loop. + self.context.tracer.restart(tracee, Restart::Syscall)?; + + while let Some(mut tracee) = self.context.tracer.wait()? { + match tracee.stop { + Stop::SyscallEnter => trace!("syscall-enter: {:?}", tracee.stop), + Stop::SyscallExit => { + self.update_images(&mut tracee)?; + } + Stop::SignalDelivery { + signal: Signal::SIGTRAP, + } => { + self.restore_and_call_if_breakpoint(&mut tracee)?; + } + Stop::Clone { new: pid } => { + // Only seen when the `VM_CLONE` flag is set, as of Linux 4.15. + info!("new thread: {}", pid); + } + _ => { + debug!("stop: {:?}", tracee.stop); + } + } + + if let Err(err) = self.context.tracer.restart(tracee, Restart::Syscall) { + error!("unable to restart tracee: {}", err); + } + } + + Ok(()) + } + + fn restore_and_call_if_breakpoint(&mut self, tracee: &mut Tracee) -> Result<()> { + let mut regs = tracee.registers()?; + + // Compute what the last PC would have been _if_ we stopped due to a soft breakpoint. + // + // If we don't have a registered breakpoint, then we will not use this value. + let pc = Address(regs.rip.saturating_sub(1)); + + if self.context.breakpoints.clear(tracee, pc)? { + // We restored the original, `int3`-clobbered instruction in `clear()`. Now + // set the tracee's registers to execute it on restart. Do this _before_ the + // callback to simulate a hardware breakpoint. + regs.rip = pc.0; + tracee.set_registers(regs)?; + + self.event_handler + .on_breakpoint(&mut self.context, tracee)?; + } else { + warn!("no registered breakpoint for SIGTRAP delivery at {pc:x}"); + + // We didn't fix up a registered soft breakpoint, so we have no reason to + // re-execute the instruction at the last PC. Leave the tracee registers alone. + } + + Ok(()) + } + + fn update_images(&mut self, tracee: &mut Tracee) -> Result<()> { + let images = self + .context + .images + .as_mut() + .ok_or_else(|| format_err!("internal error: recorder images not initialized"))?; + let events = images.update()?; + + for (_base, image) in &events.loaded { + self.event_handler + .on_module_load(&mut self.context, tracee, image)?; + } + + Ok(()) + } +} + +pub struct DebuggerContext { + pub breakpoints: Breakpoints, + pub images: Option, + pub tracer: Ptracer, +} + +impl DebuggerContext { + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + let breakpoints = Breakpoints::default(); + let images = None; + let tracer = Ptracer::new(); + + Self { + breakpoints, + images, + tracer, + } + } + + pub fn find_image_for_addr(&self, addr: Address) -> Option<&ModuleImage> { + self.images.as_ref()?.find_image_for_addr(addr) + } +} + +/// Executable memory-mapped files for a process. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Images { + mapped: BTreeMap, + pid: i32, +} + +impl Images { + pub fn new(pid: i32) -> Self { + let mapped = BTreeMap::default(); + + Self { mapped, pid } + } + + pub fn mapped(&self) -> impl Iterator { + self.mapped.iter().map(|(va, i)| (*va, i)) + } + + pub fn update(&mut self) -> Result { + let proc = Process::new(self.pid)?; + + let mut new = BTreeMap::new(); + let mut group: Vec = vec![]; + + for map in proc.maps()? { + if let Some(last) = group.last() { + if last.pathname == map.pathname { + // The current memory mapping is the start of a new group. + // + // Consume the current group, and track any new module image. + if let Ok(image) = ModuleImage::new(group) { + let base = image.base(); + new.insert(base, image); + } + + // Reset the current group. + group = vec![]; + } + } + + group.push(map); + } + + let events = LoadEvents::new(&self.mapped, &new); + + self.mapped = new; + + Ok(events) + } + + pub fn find_image_for_addr(&self, addr: Address) -> Option<&ModuleImage> { + let (_, image) = self.mapped().find(|(_, im)| im.contains(&addr))?; + + Some(image) + } +} + +/// A `MemoryMap` that is known to be file-backed and executable. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ModuleImage { + base: Address, + maps: Vec, + path: FilePath, +} + +impl ModuleImage { + // Accepts an increasing sequence of memory mappings with a common file-backed + // pathname. + pub fn new(mut maps: Vec) -> Result { + maps.sort_by_key(|m| m.address); + + if maps.is_empty() { + bail!("no mapping for module image"); + } + + if !maps.iter().any(|m| m.perms.contains('x')) { + bail!("no executable mapping for module image"); + } + + // Cannot panic due to initial length check. + let first = &maps[0]; + + let path = if let MMapPath::Path(path) = &first.pathname { + FilePath::new(path.to_string_lossy())? + } else { + bail!("module image mappings must be file-backed"); + }; + + for map in &maps { + if map.pathname != first.pathname { + bail!("module image mapping not file-backed"); + } + } + + let base = Address(first.address.0); + + let image = ModuleImage { base, maps, path }; + + Ok(image) + } + + pub fn path(&self) -> &FilePath { + &self.path + } + + pub fn base(&self) -> Address { + self.base + } + + pub fn contains(&self, addr: &Address) -> bool { + for map in &self.maps { + let lo = Address(map.address.0); + let hi = Address(map.address.1); + if (lo..hi).contains(addr) { + return true; + } + } + + false + } +} + +pub struct LoadEvents { + pub loaded: Vec<(Address, ModuleImage)>, + pub unloaded: Vec<(Address, ModuleImage)>, +} + +impl LoadEvents { + pub fn new(old: &BTreeMap, new: &BTreeMap) -> Self { + // New not in old. + let loaded: Vec<_> = new + .iter() + .filter(|(nva, n)| { + !old.iter() + .any(|(iva, i)| *nva == iva && n.path() == i.path()) + }) + .map(|(va, i)| (*va, i.clone())) + .collect(); + + // Old not in new. + let unloaded: Vec<_> = old + .iter() + .filter(|(iva, i)| { + !new.iter() + .any(|(nva, n)| nva == *iva && n.path() == i.path()) + }) + .map(|(va, i)| (*va, i.clone())) + .collect(); + + Self { loaded, unloaded } + } +} + +#[derive(Clone, Debug, Default)] +pub struct Breakpoints { + saved: BTreeMap, +} + +impl Breakpoints { + pub fn set(&mut self, tracee: &mut Tracee, addr: Address) -> Result<()> { + // Return if the breakpoint exists. We don't want to conclude that the + // saved instruction byte was `0xcc`. + if self.saved.contains_key(&addr) { + return Ok(()); + } + + let mut data = [0u8]; + tracee.read_memory_mut(addr.0, &mut data)?; + self.saved.insert(addr, data[0]); + tracee.write_memory(addr.0, &[0xcc])?; + + Ok(()) + } + + pub fn clear(&mut self, tracee: &mut Tracee, addr: Address) -> Result { + let data = self.saved.remove(&addr); + + let cleared = if let Some(data) = data { + tracee.write_memory(addr.0, &[data])?; + true + } else { + false + }; + + Ok(cleared) + } +} + +fn continue_to_init_execve(tracer: &mut Ptracer) -> Result { + while let Some(tracee) = tracer.wait()? { + if let Stop::SyscallExit = &tracee.stop { + return Ok(tracee); + } + + tracer.restart(tracee, Restart::Continue)?; + } + + bail!("did not see initial execve() in tracee while recording coverage"); +} diff --git a/src/agent/coverage/src/record/windows.rs b/src/agent/coverage/src/record/windows.rs new file mode 100644 index 0000000000..c926889bfb --- /dev/null +++ b/src/agent/coverage/src/record/windows.rs @@ -0,0 +1,210 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::collections::BTreeMap; +use std::path::Path; +use std::process::Command; +use std::time::Duration; + +use anyhow::{anyhow, Result}; +use debuggable_module::load_module::LoadModule; +use debuggable_module::loader::Loader; +use debuggable_module::path::FilePath; +use debuggable_module::windows::WindowsModule; +use debuggable_module::Offset; +use debugger::{BreakpointId, BreakpointType, DebugEventHandler, Debugger, ModuleLoadInfo}; + +use crate::allowlist::TargetAllowList; +use crate::binary::{self, BinaryCoverage}; + +pub fn record( + cmd: Command, + timeout: Duration, + allowlist: impl Into>, +) -> Result { + let loader = Loader::new(); + let allowlist = allowlist.into().unwrap_or_default(); + + crate::timer::timed(timeout, move || { + let mut recorder = WindowsRecorder::new(&loader, allowlist); + let (mut dbg, _child) = Debugger::init(cmd, &mut recorder)?; + dbg.run(&mut recorder)?; + + Ok(recorder.coverage) + })? +} + +pub struct WindowsRecorder<'data> { + allowlist: TargetAllowList, + breakpoints: Breakpoints, + coverage: BinaryCoverage, + loader: &'data Loader, + modules: BTreeMap>, +} + +impl<'data> WindowsRecorder<'data> { + pub fn new(loader: &'data Loader, allowlist: TargetAllowList) -> Self { + let breakpoints = Breakpoints::default(); + let coverage = BinaryCoverage::default(); + let modules = BTreeMap::new(); + + Self { + allowlist, + breakpoints, + coverage, + loader, + modules, + } + } + + pub fn allowlist(&self) -> &TargetAllowList { + &self.allowlist + } + + pub fn allowlist_mut(&mut self) -> &mut TargetAllowList { + &mut self.allowlist + } + + fn try_on_create_process(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> { + // Not necessary for PDB search, but enables use of other `dbghelp` APIs. + if let Err(err) = dbg.target().maybe_sym_initialize() { + error!( + "unable to initialize symbol handler for new process {}: {:?}", + module.path().display(), + err, + ); + } + + self.insert_module(dbg, module) + } + + fn try_on_load_dll(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> { + self.insert_module(dbg, module) + } + + fn try_on_breakpoint(&mut self, _dbg: &mut Debugger, id: BreakpointId) -> Result<()> { + let breakpoint = self + .breakpoints + .remove(id) + .ok_or_else(|| anyhow!("stopped on dangling breakpoint"))?; + + let coverage = self + .coverage + .modules + .get_mut(&breakpoint.module) + .ok_or_else(|| anyhow!("coverage not initialized for module: {}", breakpoint.module))?; + + coverage.increment(breakpoint.offset)?; + + Ok(()) + } + + fn stop(&self, dbg: &mut Debugger) { + dbg.quit_debugging(); + } + + fn insert_module(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) -> Result<()> { + let path = FilePath::new(module.path().to_string_lossy())?; + + if !self.allowlist.modules.is_allowed(&path) { + debug!("not inserting denylisted module: {path}"); + return Ok(()); + } + + let module = if let Ok(m) = WindowsModule::load(self.loader, path.clone()) { + m + } else { + debug!("skipping undebuggable module: {path}"); + return Ok(()); + }; + + let coverage = binary::find_coverage_sites(&module, &self.allowlist)?; + + for offset in coverage.as_ref().keys().copied() { + let breakpoint = Breakpoint::new(path.clone(), offset); + self.breakpoints.set(dbg, breakpoint)?; + } + + self.coverage.modules.insert(path.clone(), coverage); + + self.modules.insert(path, module); + + Ok(()) + } +} + +#[derive(Debug, Default)] +struct Breakpoints { + id_to_offset: BTreeMap, + offset_to_breakpoint: BTreeMap, +} + +impl Breakpoints { + pub fn set(&mut self, dbg: &mut Debugger, breakpoint: Breakpoint) -> Result<()> { + if self.is_set(&breakpoint) { + return Ok(()); + } + + self.write(dbg, breakpoint) + } + + // Unguarded action that ovewrites both the target process address space and our index + // of known breakpoints. Callers must use `set()`, which avoids redundant breakpoint + // setting. + fn write(&mut self, dbg: &mut Debugger, breakpoint: Breakpoint) -> Result<()> { + // The `debugger` crates tracks loaded modules by base name. If a path or file + // name is used, software breakpoints will not be written. + let name = Path::new(breakpoint.module.base_name()); + let id = dbg.new_rva_breakpoint(name, breakpoint.offset.0, BreakpointType::OneTime)?; + + self.id_to_offset.insert(id, breakpoint.offset); + self.offset_to_breakpoint + .insert(breakpoint.offset, breakpoint); + + Ok(()) + } + + pub fn is_set(&self, breakpoint: &Breakpoint) -> bool { + self.offset_to_breakpoint.contains_key(&breakpoint.offset) + } + + pub fn remove(&mut self, id: BreakpointId) -> Option { + let offset = self.id_to_offset.remove(&id)?; + self.offset_to_breakpoint.remove(&offset) + } +} + +#[derive(Clone, Debug)] +struct Breakpoint { + module: FilePath, + offset: Offset, +} + +impl Breakpoint { + pub fn new(module: FilePath, offset: Offset) -> Self { + Self { module, offset } + } +} + +impl<'data> DebugEventHandler for WindowsRecorder<'data> { + fn on_create_process(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) { + if let Err(err) = self.try_on_create_process(dbg, module) { + warn!("{err}"); + self.stop(dbg); + } + } + + fn on_load_dll(&mut self, dbg: &mut Debugger, module: &ModuleLoadInfo) { + if let Err(err) = self.try_on_load_dll(dbg, module) { + warn!("{err}"); + self.stop(dbg); + } + } + + fn on_breakpoint(&mut self, dbg: &mut Debugger, bp: BreakpointId) { + if let Err(err) = self.try_on_breakpoint(dbg, bp) { + warn!("{err}"); + self.stop(dbg); + } + } +} diff --git a/src/agent/coverage/src/source.rs b/src/agent/coverage/src/source.rs new file mode 100644 index 0000000000..33067e6cc6 --- /dev/null +++ b/src/agent/coverage/src/source.rs @@ -0,0 +1,157 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::collections::{BTreeMap, BTreeSet}; + +use anyhow::{bail, Result}; + +use debuggable_module::block::{sweep_region, Block, Blocks}; +use debuggable_module::load_module::LoadModule; +use debuggable_module::loader::Loader; +use debuggable_module::path::FilePath; +use debuggable_module::{Module, Offset}; + +use crate::binary::BinaryCoverage; + +pub use crate::binary::Count; + +#[derive(Clone, Debug, Default)] +pub struct SourceCoverage { + pub files: BTreeMap, +} + +#[derive(Clone, Debug, Default)] +pub struct FileCoverage { + pub lines: BTreeMap, +} + +// Must be nonzero. +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub struct Line(u32); + +impl Line { + pub fn new(number: u32) -> Result { + if number == 0 { + bail!("line numbers must be nonzero"); + } + + Ok(Line(number)) + } + + pub fn number(&self) -> u32 { + self.0 + } +} + +impl From for u32 { + fn from(line: Line) -> Self { + line.number() + } +} + +pub fn binary_to_source_coverage(binary: &BinaryCoverage) -> Result { + use std::collections::btree_map::Entry; + + use symbolic::debuginfo::Object; + use symbolic::symcache::{SymCache, SymCacheConverter}; + + let loader = Loader::new(); + + let mut source = SourceCoverage::default(); + + for (exe_path, coverage) in &binary.modules { + let module: Box = Box::load(&loader, exe_path.clone())?; + let debuginfo = module.debuginfo()?; + + let mut symcache = vec![]; + let mut converter = SymCacheConverter::new(); + + let exe = Object::parse(module.executable_data())?; + converter.process_object(&exe)?; + + let di = Object::parse(module.debuginfo_data())?; + converter.process_object(&di)?; + + converter.serialize(&mut std::io::Cursor::new(&mut symcache))?; + let symcache = SymCache::parse(&symcache)?; + + let mut blocks = Blocks::new(); + + for function in debuginfo.functions() { + for offset in coverage.as_ref().keys() { + // Recover function blocks if it contains any coverage offset. + if function.contains(offset) { + let function_blocks = + sweep_region(&*module, &debuginfo, function.offset, function.size)?; + blocks.extend(&function_blocks); + break; + } + } + } + + for (offset, count) in coverage.as_ref() { + // Inflate blocks. + if let Some(block) = blocks.find(offset) { + let block_offsets = instruction_offsets(&*module, block)?; + + for offset in block_offsets { + for location in symcache.lookup(offset.0) { + let line_number = location.line(); + + if line_number == 0 { + continue; + } + + if let Some(file) = location.file() { + let file_path = FilePath::new(file.full_path())?; + + // We have a hit. + let file_coverage = source.files.entry(file_path).or_default(); + let line = Line(line_number); + + match file_coverage.lines.entry(line) { + Entry::Occupied(occupied) => { + let old = occupied.into_mut(); + + // If we miss any part of a line, count it as missed. + let new = u32::max(old.0, count.0); + + *old = Count(new); + } + Entry::Vacant(vacant) => { + vacant.insert(*count); + } + } + } + } + } + } + } + } + + Ok(source) +} + +fn instruction_offsets(module: &dyn Module, block: &Block) -> Result> { + use iced_x86::Decoder; + let data = module.read(block.offset, block.size)?; + + let mut offsets: BTreeSet = BTreeSet::default(); + + let mut pc = block.offset.0; + let mut decoder = Decoder::new(64, data, 0); + decoder.set_ip(pc); + + while decoder.can_decode() { + let inst = decoder.decode(); + + if inst.is_invalid() { + break; + } + + offsets.insert(Offset(pc)); + pc = inst.ip(); + } + + Ok(offsets) +} diff --git a/src/agent/coverage/src/timer.rs b/src/agent/coverage/src/timer.rs new file mode 100644 index 0000000000..24c8b44c51 --- /dev/null +++ b/src/agent/coverage/src/timer.rs @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::sync::mpsc; +use std::thread; +use std::time::Duration; + +use anyhow::{bail, Result}; + +pub fn timed(timeout: Duration, function: F) -> Result +where + T: Send + 'static, + F: FnOnce() -> T + Send + 'static, +{ + let (worker_sender, receiver) = mpsc::channel(); + let timer_sender = worker_sender.clone(); + + let _worker = thread::spawn(move || { + let out = function(); + worker_sender.send(Timed::Done(out)).unwrap(); + }); + + let _timer = thread::spawn(move || { + thread::sleep(timeout); + timer_sender.send(Timed::Timeout).unwrap(); + }); + + match receiver.recv()? { + Timed::Done(out) => Ok(out), + Timed::Timeout => bail!("function exceeded timeout of {:?}", timeout), + } +} + +enum Timed { + Done(T), + Timeout, +}