From a018d34e01110a1d4fa438b06962a83550f24870 Mon Sep 17 00:00:00 2001 From: Alex Franchuk Date: Thu, 21 Mar 2024 05:32:31 -0400 Subject: [PATCH] Send SIGSTOP to linux/android processes before doing other procfs/ptrace things. (#108) * Send SIGSTOP to linux/android processes before doing other procfs/ptrace things. If this fails, we continue as we used to. This is an attempt to get a consistent/static process state. Closes #28. * Fix mac * Fix warning about unused doc comment * Allow user customization of timeout Not sure this is needed, but better to allow users to customize this rather than rely on a hardcoded value * Update nix --------- Co-authored-by: Jake Shadle --- Cargo.lock | 12 ++++- Cargo.toml | 4 +- src/bin/test.rs | 26 +++++------ src/linux/minidump_writer.rs | 21 ++++++++- src/linux/ptrace_dumper.rs | 82 ++++++++++++++++++++++++++++++--- src/mac/mach.rs | 3 +- src/mac/streams/exception.rs | 8 ++-- src/mac/streams/thread_names.rs | 4 +- src/windows/minidump_writer.rs | 14 +++--- tests/ptrace_dumper.rs | 21 ++++----- 10 files changed, 146 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2f961eae..c11316d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -263,6 +263,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "circular" version = "0.3.0" @@ -1226,6 +1232,7 @@ dependencies = [ "futures", "goblin 0.8.0", "libc", + "log", "mach2", "memmap2", "memoffset", @@ -1294,12 +1301,13 @@ checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" [[package]] name = "nix" -version = "0.27.1" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ "bitflags 2.4.2", "cfg-if", + "cfg_aliases", "libc", ] diff --git a/Cargo.toml b/Cargo.toml index e0ff69c8..36f4cb1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ bitflags = "2.4" byteorder = "1.4" cfg-if = "1.0" crash-context = "0.6" +log = "0.4" memoffset = "0.9" minidump-common = "0.21" scroll = "0.12" @@ -25,10 +26,11 @@ goblin = "0.8" memmap2 = "0.9" [target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] -nix = { version = "0.27", default-features = false, features = [ +nix = { version = "0.28", default-features = false, features = [ "mman", "process", "ptrace", + "signal", "user", ] } # Used for parsing procfs info. diff --git a/src/bin/test.rs b/src/bin/test.rs index 85b6fa6a..df39b286 100644 --- a/src/bin/test.rs +++ b/src/bin/test.rs @@ -8,14 +8,14 @@ pub type Result = std::result::Result; mod linux { use super::*; use minidump_writer::{ + minidump_writer::STOP_TIMEOUT, ptrace_dumper::{PtraceDumper, AT_SYSINFO_EHDR}, LINUX_GATE_LIBRARY_NAME, }; use nix::{ - sys::mman::{mmap, MapFlags, ProtFlags}, + sys::mman::{mmap_anonymous, MapFlags, ProtFlags}, unistd::getppid, }; - use std::os::fd::BorrowedFd; macro_rules! test { ($x:expr, $errmsg:expr) => { @@ -29,13 +29,13 @@ mod linux { fn test_setup() -> Result<()> { let ppid = getppid(); - PtraceDumper::new(ppid.as_raw())?; + PtraceDumper::new(ppid.as_raw(), STOP_TIMEOUT)?; Ok(()) } fn test_thread_list() -> Result<()> { let ppid = getppid(); - let dumper = PtraceDumper::new(ppid.as_raw())?; + let dumper = PtraceDumper::new(ppid.as_raw(), STOP_TIMEOUT)?; test!(!dumper.threads.is_empty(), "No threads")?; test!( dumper @@ -51,7 +51,7 @@ mod linux { fn test_copy_from_process(stack_var: usize, heap_var: usize) -> Result<()> { let ppid = getppid().as_raw(); - let mut dumper = PtraceDumper::new(ppid)?; + let mut dumper = PtraceDumper::new(ppid, STOP_TIMEOUT)?; dumper.suspend_threads()?; let stack_res = PtraceDumper::copy_from_process(ppid, stack_var as *mut libc::c_void, 1)?; @@ -73,7 +73,7 @@ mod linux { fn test_find_mappings(addr1: usize, addr2: usize) -> Result<()> { let ppid = getppid(); - let dumper = PtraceDumper::new(ppid.as_raw())?; + let dumper = PtraceDumper::new(ppid.as_raw(), STOP_TIMEOUT)?; dumper .find_mapping(addr1) .ok_or("No mapping for addr1 found")?; @@ -90,7 +90,7 @@ mod linux { let ppid = getppid().as_raw(); let exe_link = format!("/proc/{}/exe", ppid); let exe_name = std::fs::read_link(exe_link)?.into_os_string(); - let mut dumper = PtraceDumper::new(getppid().as_raw())?; + let mut dumper = PtraceDumper::new(getppid().as_raw(), STOP_TIMEOUT)?; let mut found_exe = None; for (idx, mapping) in dumper.mappings.iter().enumerate() { if mapping.name.as_ref().map(|x| x.into()).as_ref() == Some(&exe_name) { @@ -107,7 +107,7 @@ mod linux { fn test_merged_mappings(path: String, mapped_mem: usize, mem_size: usize) -> Result<()> { // Now check that PtraceDumper interpreted the mappings properly. - let dumper = PtraceDumper::new(getppid().as_raw())?; + let dumper = PtraceDumper::new(getppid().as_raw(), STOP_TIMEOUT)?; let mut mapping_count = 0; for map in &dumper.mappings { if map @@ -129,7 +129,7 @@ mod linux { fn test_linux_gate_mapping_id() -> Result<()> { let ppid = getppid().as_raw(); - let mut dumper = PtraceDumper::new(ppid)?; + let mut dumper = PtraceDumper::new(ppid, STOP_TIMEOUT)?; let mut found_linux_gate = false; for mut mapping in dumper.mappings.clone() { if mapping.name == Some(LINUX_GATE_LIBRARY_NAME.into()) { @@ -148,7 +148,7 @@ mod linux { fn test_mappings_include_linux_gate() -> Result<()> { let ppid = getppid().as_raw(); - let dumper = PtraceDumper::new(ppid)?; + let dumper = PtraceDumper::new(ppid, STOP_TIMEOUT)?; let linux_gate_loc = dumper.auxv[&AT_SYSINFO_EHDR]; test!(linux_gate_loc != 0, "linux_gate_loc == 0")?; let mut found_linux_gate = false; @@ -215,18 +215,16 @@ mod linux { let memory_size = std::num::NonZeroUsize::new(page_size.unwrap() as usize).unwrap(); // Get some memory to be mapped by the child-process let mapped_mem = unsafe { - mmap::( + mmap_anonymous( None, memory_size, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, MapFlags::MAP_PRIVATE | MapFlags::MAP_ANON, - None, - 0, ) .unwrap() }; - println!("{} {}", mapped_mem as usize, memory_size); + println!("{} {}", mapped_mem.as_ptr() as usize, memory_size); loop { std::thread::park(); } diff --git a/src/linux/minidump_writer.rs b/src/linux/minidump_writer.rs index 408a8d6f..c83308f5 100644 --- a/src/linux/minidump_writer.rs +++ b/src/linux/minidump_writer.rs @@ -13,7 +13,10 @@ use crate::{ mem_writer::{Buffer, MemoryArrayWriter, MemoryWriter, MemoryWriterError}, minidump_format::*, }; -use std::io::{Seek, Write}; +use std::{ + io::{Seek, Write}, + time::Duration, +}; pub enum CrashingThreadContext { None, @@ -21,6 +24,10 @@ pub enum CrashingThreadContext { CrashContextPlusAddress((MDLocationDescriptor, usize)), } +/// The default timeout after a `SIGSTOP` after which minidump writing proceeds +/// regardless of the process state +pub const STOP_TIMEOUT: Duration = Duration::from_millis(100); + pub struct MinidumpWriter { pub process_id: Pid, pub blamed_thread: Pid, @@ -34,6 +41,7 @@ pub struct MinidumpWriter { pub sanitize_stack: bool, pub crash_context: Option, pub crashing_thread_context: CrashingThreadContext, + pub stop_timeout: Duration, } // This doesn't work yet: @@ -62,6 +70,7 @@ impl MinidumpWriter { sanitize_stack: false, crash_context: None, crashing_thread_context: CrashingThreadContext::None, + stop_timeout: STOP_TIMEOUT, } } @@ -100,10 +109,18 @@ impl MinidumpWriter { self } + /// Sets the timeout after `SIGSTOP` is sent to the process, if the process + /// has not stopped by the time the timeout has reached, we proceed with + /// minidump generation + pub fn stop_timeout(&mut self, duration: Duration) -> &mut Self { + self.stop_timeout = duration; + self + } + /// Generates a minidump and writes to the destination provided. Returns the in-memory /// version of the minidump as well. pub fn dump(&mut self, destination: &mut (impl Write + Seek)) -> Result> { - let mut dumper = PtraceDumper::new(self.process_id)?; + let mut dumper = PtraceDumper::new(self.process_id, self.stop_timeout)?; dumper.suspend_threads()?; dumper.late_init()?; diff --git a/src/linux/ptrace_dumper.rs b/src/linux/ptrace_dumper.rs index 510c3397..9de95a65 100644 --- a/src/linux/ptrace_dumper.rs +++ b/src/linux/ptrace_dumper.rs @@ -15,10 +15,20 @@ use crate::{ use goblin::elf; use nix::{ errno::Errno, - sys::{ptrace, wait}, + sys::{ptrace, signal, wait}, +}; +use procfs_core::{ + process::{MMPermissions, ProcState, Stat}, + FromRead, ProcError, +}; +use std::{ + collections::HashMap, + ffi::c_void, + io::BufReader, + path, + result::Result, + time::{Duration, Instant}, }; -use procfs_core::process::MMPermissions; -use std::{collections::HashMap, ffi::c_void, io::BufReader, path, result::Result}; #[derive(Debug, Clone)] pub struct Thread { @@ -29,6 +39,7 @@ pub struct Thread { #[derive(Debug)] pub struct PtraceDumper { pub pid: Pid, + process_stopped: bool, threads_suspended: bool, pub threads: Vec, pub auxv: HashMap, @@ -45,9 +56,27 @@ impl Drop for PtraceDumper { fn drop(&mut self) { // Always try to resume all threads (e.g. in case of error) let _ = self.resume_threads(); + // Always allow the process to continue. + let _ = self.continue_process(); } } +#[derive(Debug, thiserror::Error)] +enum StopProcessError { + #[error("Failed to stop the process")] + Stop(#[from] Errno), + #[error("Failed to get the process state")] + State(#[from] ProcError), + #[error("Timeout waiting for process to stop")] + Timeout, +} + +#[derive(Debug, thiserror::Error)] +enum ContinueProcessError { + #[error("Failed to continue the process")] + Continue(#[from] Errno), +} + /// PTRACE_DETACH the given pid. /// /// This handles special errno cases (ESRCH) which we won't consider errors. @@ -67,21 +96,26 @@ fn ptrace_detach(child: Pid) -> Result<(), DumperError> { impl PtraceDumper { /// Constructs a dumper for extracting information of a given process /// with a process ID of |pid|. - pub fn new(pid: Pid) -> Result { + pub fn new(pid: Pid, stop_timeout: Duration) -> Result { let mut dumper = PtraceDumper { pid, + process_stopped: false, threads_suspended: false, threads: Vec::new(), auxv: HashMap::new(), mappings: Vec::new(), page_size: 0, }; - dumper.init()?; + dumper.init(stop_timeout)?; Ok(dumper) } // TODO: late_init for chromeos and android - pub fn init(&mut self) -> Result<(), InitError> { + pub fn init(&mut self, stop_timeout: Duration) -> Result<(), InitError> { + // Stopping the process is best-effort. + if let Err(e) = self.stop_process(stop_timeout) { + log::warn!("failed to stop process {}: {e}", self.pid); + } self.read_auxv()?; self.enumerate_threads()?; self.enumerate_mappings()?; @@ -207,6 +241,42 @@ impl PtraceDumper { result } + /// Send SIGSTOP to the process so that we can get a consistent state. + /// + /// This will block waiting for the process to stop until `timeout` has passed. + fn stop_process(&mut self, timeout: Duration) -> Result<(), StopProcessError> { + signal::kill(nix::unistd::Pid::from_raw(self.pid), Some(signal::SIGSTOP))?; + + // Something like waitpid for non-child processes would be better, but we have no such + // tool, so we poll the status. + const POLL_INTERVAL: Duration = Duration::from_millis(1); + let proc_file = format!("/proc/{}/stat", self.pid); + let end = Instant::now() + timeout; + + loop { + if let Ok(ProcState::Stopped) = Stat::from_file(&proc_file)?.state() { + self.process_stopped = true; + return Ok(()); + } + + std::thread::sleep(POLL_INTERVAL); + if Instant::now() > end { + return Err(StopProcessError::Timeout); + } + } + } + + /// Send SIGCONT to the process to continue. + /// + /// Unlike `stop_process`, this function does not wait for the process to continue. + fn continue_process(&mut self) -> Result<(), ContinueProcessError> { + if self.process_stopped { + signal::kill(nix::unistd::Pid::from_raw(self.pid), Some(signal::SIGCONT))?; + } + self.process_stopped = false; + Ok(()) + } + /// Parse /proc/$pid/task to list all the threads of the process identified by /// pid. fn enumerate_threads(&mut self) -> Result<(), InitError> { diff --git a/src/mac/mach.rs b/src/mac/mach.rs index f95211dc..9b0179fa 100644 --- a/src/mac/mach.rs +++ b/src/mac/mach.rs @@ -590,7 +590,8 @@ pub fn sysctl_by_name(name: &[u8]) -> T { 0, ) != 0 { - // log? + // TODO convert to ascii characters when logging? + log::warn!("failed to get sysctl for {name:?}"); T::default() } else { out diff --git a/src/mac/streams/exception.rs b/src/mac/streams/exception.rs index e594dd8d..7dd7f8fa 100644 --- a/src/mac/streams/exception.rs +++ b/src/mac/streams/exception.rs @@ -69,9 +69,11 @@ impl MinidumpWriter { } else { // For all other exceptions types, the value in the code // _should_ never exceed 32 bits, crashpad does an actual - // range check here, but since we don't really log anything - // else at the moment I'll punt that for now - // TODO: log/do something if exc.code > u32::MAX + // range check here. + if code > u32::MAX.into() { + // TODO: do something more than logging? + log::warn!("exception code {code:#018x} exceeds the expected 32 bits"); + } code as u32 }; diff --git a/src/mac/streams/thread_names.rs b/src/mac/streams/thread_names.rs index 42242a63..016dd48e 100644 --- a/src/mac/streams/thread_names.rs +++ b/src/mac/streams/thread_names.rs @@ -25,8 +25,8 @@ impl MinidumpWriter { // not a critical failure let name_loc = match Self::write_thread_name(buffer, dumper, tid) { Ok(loc) => loc, - Err(_err) => { - // TODO: log error + Err(err) => { + log::warn!("failed to write thread name for thread {tid}: {err}"); write_string_to_location(buffer, "")? } }; diff --git a/src/windows/minidump_writer.rs b/src/windows/minidump_writer.rs index 70cc420e..21753689 100644 --- a/src/windows/minidump_writer.rs +++ b/src/windows/minidump_writer.rs @@ -185,13 +185,13 @@ impl MinidumpWriter { // This is a mut pointer for some reason...I don't _think_ it is // actually mut in practice...? ExceptionPointers: crash_context.exception_pointers as *mut _, - /// The `EXCEPTION_POINTERS` contained in crash context is a pointer into the - /// memory of the process that crashed, as it contains an `EXCEPTION_RECORD` - /// record which is an internally linked list, so in the case that we are - /// dumping a process other than the current one, we need to tell - /// `MiniDumpWriteDump` that the pointers come from an external process so that - /// it can use eg `ReadProcessMemory` to get the contextual information from - /// the crash, rather than from the current process + // The `EXCEPTION_POINTERS` contained in crash context is a pointer into the + // memory of the process that crashed, as it contains an `EXCEPTION_RECORD` + // record which is an internally linked list, so in the case that we are + // dumping a process other than the current one, we need to tell + // `MiniDumpWriteDump` that the pointers come from an external process so that + // it can use eg `ReadProcessMemory` to get the contextual information from + // the crash, rather than from the current process ClientPointers: i32::from(is_external_process), }, ); diff --git a/tests/ptrace_dumper.rs b/tests/ptrace_dumper.rs index 1be27f08..6b62a4f6 100644 --- a/tests/ptrace_dumper.rs +++ b/tests/ptrace_dumper.rs @@ -7,7 +7,6 @@ use nix::sys::signal::Signal; use std::convert::TryInto; use std::io::{BufRead, BufReader}; use std::mem::size_of; -use std::os::unix::io::AsFd; use std::os::unix::process::ExitStatusExt; mod common; @@ -29,7 +28,8 @@ fn test_thread_list_from_parent() { let num_of_threads = 5; let mut child = start_child_and_wait_for_threads(num_of_threads); let pid = child.id() as i32; - let mut dumper = PtraceDumper::new(pid).expect("Couldn't init dumper"); + let mut dumper = PtraceDumper::new(pid, minidump_writer::minidump_writer::STOP_TIMEOUT) + .expect("Couldn't init dumper"); assert_eq!(dumper.threads.len(), num_of_threads); dumper.suspend_threads().expect("Could not suspend threads"); @@ -129,20 +129,22 @@ fn test_merged_mappings() { map_size, ProtFlags::PROT_READ, MapFlags::MAP_SHARED, - Some(file.as_fd()), + &file, 0, ) .unwrap() }; + let mapped = mapped_mem.as_ptr() as usize; + // Carve a page out of the first mapping with different permissions. let _inside_mapping = unsafe { mmap( - std::num::NonZeroUsize::new(mapped_mem as usize + 2 * page_size.get()), + std::num::NonZeroUsize::new(mapped + 2 * page_size.get()), page_size, ProtFlags::PROT_NONE, MapFlags::MAP_SHARED | MapFlags::MAP_FIXED, - Some(file.as_fd()), + &file, // Map a different offset just to // better test real-world conditions. page_size.get().try_into().unwrap(), // try_into() in order to work for 32 and 64 bit @@ -151,11 +153,7 @@ fn test_merged_mappings() { spawn_child( "merged_mappings", - &[ - path, - &format!("{}", mapped_mem as usize), - &format!("{map_size}"), - ], + &[path, &format!("{mapped}"), &format!("{map_size}")], ); } @@ -209,7 +207,8 @@ fn test_sanitize_stack_copy() { let heap_addr = usize::from_str_radix(output.next().unwrap().trim_start_matches("0x"), 16) .expect("unable to parse mmap_addr"); - let mut dumper = PtraceDumper::new(pid).expect("Couldn't init dumper"); + let mut dumper = PtraceDumper::new(pid, minidump_writer::minidump_writer::STOP_TIMEOUT) + .expect("Couldn't init dumper"); assert_eq!(dumper.threads.len(), num_of_threads); dumper.suspend_threads().expect("Could not suspend threads"); let thread_info = dumper