diff --git a/Cargo.lock b/Cargo.lock index d87d8d9fbc43..dbc8c502e10e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3469,6 +3469,7 @@ dependencies = [ "lazy_static", "libc", "log", + "mach", "memoffset", "more-asserts", "psm", diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index ced90fe07979..2c3776fbf7bb 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -475,6 +475,15 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec { let mut insts = SmallVec::new(); + + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::Aarch64SetPointerAuth { + return_addresses: false, + }, + }); + } + // stp fp (x29), lr (x30), [sp, #-16]! insts.push(Inst::StoreP64 { rt: fp_reg(), diff --git a/cranelift/codegen/src/isa/unwind.rs b/cranelift/codegen/src/isa/unwind.rs index 9ea32a035e16..7c9718a5700c 100644 --- a/cranelift/codegen/src/isa/unwind.rs +++ b/cranelift/codegen/src/isa/unwind.rs @@ -69,6 +69,11 @@ pub mod input { RememberState, /// Restores the state. RestoreState, + /// On aarch64 ARMv8.3+ devices, enables or disables pointer authentication. + Aarch64SetPointerAuth { + /// Whether return addresses (hold in LR) contain a pointer-authentication code. + return_addresses: bool, + }, } /// Unwind information as generated by a backend. @@ -234,4 +239,10 @@ pub enum UnwindInst { /// The saved register. reg: RealReg, }, + /// Defines if the aarch64-specific pointer authentication available for ARM v8.3+ devices + /// is enabled for certain pointers or not. + Aarch64SetPointerAuth { + /// Whether return addresses (hold in LR) contain a pointer-authentication code. + return_addresses: bool, + }, } diff --git a/cranelift/codegen/src/isa/unwind/systemv.rs b/cranelift/codegen/src/isa/unwind/systemv.rs index 4d3a00947a7f..965603d4e144 100644 --- a/cranelift/codegen/src/isa/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/unwind/systemv.rs @@ -44,6 +44,11 @@ pub(crate) enum CallFrameInstruction { RememberState, RestoreState, ArgsSize(u32), + /// Enables or disables pointer authentication on aarch64 platforms post ARMv8.3. This + /// particular item maps to gimli::ValExpression(RA_SIGN_STATE, lit0/lit1). + Aarch64SetPointerAuth { + return_addresses: bool, + }, } impl From for CallFrameInstruction { @@ -75,7 +80,7 @@ impl From for CallFrameInstruction { impl Into for CallFrameInstruction { fn into(self) -> gimli::write::CallFrameInstruction { - use gimli::{write::CallFrameInstruction, Register}; + use gimli::{write::CallFrameInstruction, write::Expression, Register}; match self { Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset), @@ -92,6 +97,21 @@ impl Into for CallFrameInstruction { Self::RememberState => CallFrameInstruction::RememberState, Self::RestoreState => CallFrameInstruction::RestoreState, Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size), + Self::Aarch64SetPointerAuth { return_addresses } => { + // To enable pointer authentication for return addresses in dwarf directives, we + // use a small dwarf expression that sets the value of the pseudo-register + // RA_SIGN_STATE (RA stands for return address) to 0 or 1. This behavior is + // documented in + // https://github.com/ARM-software/abi-aa/blob/master/aadwarf64/aadwarf64.rst#41dwarf-register-names. + let mut expr = Expression::new(); + expr.op(if return_addresses { + gimli::DW_OP_lit1 + } else { + gimli::DW_OP_lit0 + }); + const RA_SIGN_STATE: Register = Register(34); + CallFrameInstruction::ValExpression(RA_SIGN_STATE, expr) + } } } } @@ -187,6 +207,12 @@ pub(crate) fn create_unwind_info_from_insts>( let off = (clobber_offset as i32) - (clobber_offset_to_cfa as i32); instructions.push((instruction_offset, CallFrameInstruction::Offset(reg, off))); } + &UnwindInst::Aarch64SetPointerAuth { return_addresses } => { + instructions.push(( + instruction_offset, + CallFrameInstruction::Aarch64SetPointerAuth { return_addresses }, + )); + } } } @@ -245,6 +271,9 @@ impl UnwindInfo { UnwindCode::RestoreState => { builder.restore_state(*offset); } + UnwindCode::Aarch64SetPointerAuth { return_addresses } => { + builder.set_aarch64_pauth(*offset, *return_addresses); + } } } @@ -399,4 +428,11 @@ impl<'a, Reg: PartialEq + Copy> InstructionBuilder<'a, Reg> { self.instructions .push((offset, CallFrameInstruction::RestoreState)); } + + fn set_aarch64_pauth(&mut self, offset: u32, return_addresses: bool) { + self.instructions.push(( + offset, + CallFrameInstruction::Aarch64SetPointerAuth { return_addresses }, + )); + } } diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 43c659b6d782..1308eb3bdc8b 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -379,6 +379,9 @@ pub(crate) fn create_unwind_info_from_insts>( }); } }, + &UnwindInst::Aarch64SetPointerAuth { .. } => { + unreachable!("no aarch64 on x64"); + } } max_unwind_offset = instruction_offset; } diff --git a/crates/fiber/src/arch/aarch64.S b/crates/fiber/src/arch/aarch64.S index f38be926b260..37e9bc57fb19 100644 --- a/crates/fiber/src/arch/aarch64.S +++ b/crates/fiber/src/arch/aarch64.S @@ -107,9 +107,9 @@ FUNCTION(wasmtime_fiber_start): // ... and then we call the function! Note that this is a function call so // our frame stays on the stack to backtrace through. blr x20 - // .. technically we shouldn't get here, and I would like to write in an - // instruction which just aborts, but I don't know such an instruction in - // aarch64 land. + // Unreachable, here for safety. This should help catch unexpected behaviors. + // Use a noticeable payload so one can grep for it in the codebase. + brk 0xf1b3 .cfi_endproc SIZE(wasmtime_fiber_start) diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index 6bfdcd66945a..4cf2a8d2cd6a 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -27,6 +27,9 @@ psm = "0.1.11" rand = "0.7.3" anyhow = "1.0.38" +[target.'cfg(target_os = "macos")'.dependencies] +mach = "0.3.2" + [target.'cfg(target_os = "windows")'.dependencies] winapi = { version = "0.3.7", features = ["winbase", "memoryapi", "errhandlingapi"] } diff --git a/crates/runtime/src/traphandlers.rs b/crates/runtime/src/traphandlers.rs index 0a8d290f06bc..d9fed4c17e3d 100644 --- a/crates/runtime/src/traphandlers.rs +++ b/crates/runtime/src/traphandlers.rs @@ -6,7 +6,6 @@ use backtrace::Backtrace; use std::any::Any; use std::cell::Cell; use std::error::Error; -use std::io; use std::ptr; use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; use std::sync::Once; @@ -24,312 +23,23 @@ extern "C" { } cfg_if::cfg_if! { - if #[cfg(unix)] { - use std::mem::{self, MaybeUninit}; - - /// Function which may handle custom signals while processing traps. - pub type SignalHandler<'a> = dyn Fn(libc::c_int, *const libc::siginfo_t, *const libc::c_void) -> bool + 'a; - - static mut PREV_SIGSEGV: MaybeUninit = MaybeUninit::uninit(); - static mut PREV_SIGBUS: MaybeUninit = MaybeUninit::uninit(); - static mut PREV_SIGILL: MaybeUninit = MaybeUninit::uninit(); - static mut PREV_SIGFPE: MaybeUninit = MaybeUninit::uninit(); - - unsafe fn platform_init() { - let register = |slot: &mut MaybeUninit, signal: i32| { - let mut handler: libc::sigaction = mem::zeroed(); - // The flags here are relatively careful, and they are... - // - // SA_SIGINFO gives us access to information like the program - // counter from where the fault happened. - // - // SA_ONSTACK allows us to handle signals on an alternate stack, - // so that the handler can run in response to running out of - // stack space on the main stack. Rust installs an alternate - // stack with sigaltstack, so we rely on that. - // - // SA_NODEFER allows us to reenter the signal handler if we - // crash while handling the signal, and fall through to the - // Breakpad handler by testing handlingSegFault. - handler.sa_flags = libc::SA_SIGINFO | libc::SA_NODEFER | libc::SA_ONSTACK; - handler.sa_sigaction = trap_handler as usize; - libc::sigemptyset(&mut handler.sa_mask); - if libc::sigaction(signal, &handler, slot.as_mut_ptr()) != 0 { - panic!( - "unable to install signal handler: {}", - io::Error::last_os_error(), - ); - } - }; - - // Allow handling OOB with signals on all architectures - register(&mut PREV_SIGSEGV, libc::SIGSEGV); - - // Handle `unreachable` instructions which execute `ud2` right now - register(&mut PREV_SIGILL, libc::SIGILL); - - // x86 uses SIGFPE to report division by zero - if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") { - register(&mut PREV_SIGFPE, libc::SIGFPE); - } - - // On ARM, handle Unaligned Accesses. - // On Darwin, guard page accesses are raised as SIGBUS. - if cfg!(target_arch = "arm") || cfg!(target_os = "macos") || cfg!(target_os = "freebsd") { - register(&mut PREV_SIGBUS, libc::SIGBUS); - } - } - - unsafe extern "C" fn trap_handler( - signum: libc::c_int, - siginfo: *mut libc::siginfo_t, - context: *mut libc::c_void, - ) { - let previous = match signum { - libc::SIGSEGV => &PREV_SIGSEGV, - libc::SIGBUS => &PREV_SIGBUS, - libc::SIGFPE => &PREV_SIGFPE, - libc::SIGILL => &PREV_SIGILL, - _ => panic!("unknown signal: {}", signum), - }; - let handled = tls::with(|info| { - // If no wasm code is executing, we don't handle this as a wasm - // trap. - let info = match info { - Some(info) => info, - None => return false, - }; - - // If we hit an exception while handling a previous trap, that's - // quite bad, so bail out and let the system handle this - // recursive segfault. - // - // Otherwise flag ourselves as handling a trap, do the trap - // handling, and reset our trap handling flag. Then we figure - // out what to do based on the result of the trap handling. - let jmp_buf = info.handle_trap( - get_pc(context), - |handler| handler(signum, siginfo, context), - ); - - // Figure out what to do based on the result of this handling of - // the trap. Note that our sentinel value of 1 means that the - // exception was handled by a custom exception handler, so we - // keep executing. - if jmp_buf.is_null() { - return false; - } else if jmp_buf as usize == 1 { - return true; - - // on macOS this is a bit special, unfortunately. If we were to - // `siglongjmp` out of the signal handler that notably does - // *not* reset the sigaltstack state of our signal handler. This - // seems to trick the kernel into thinking that the sigaltstack - // is still in use upon delivery of the next signal, meaning - // that the sigaltstack is not ever used again if we immediately - // call `Unwind` here. - // - // Note that if we use `longjmp` instead of `siglongjmp` then - // the problem is fixed. The problem with that, however, is that - // `setjmp` is much slower than `sigsetjmp` due to the - // preservation of the proceses signal mask. The reason - // `longjmp` appears to work is that it seems to call a function - // (according to published macOS sources) called - // `_sigunaltstack` which updates the kernel to say the - // sigaltstack is no longer in use. We ideally want to call that - // here but I don't think there's a stable way for us to call - // that. - // - // Given all that, on macOS only, we do the next best thing. We - // return from the signal handler after updating the register - // context. This will cause control to return to our - // `unwind_shim` function defined here which will perform the - // `Unwind` (`siglongjmp`) for us. The reason this works is that - // by returning from the signal handler we'll trigger all the - // normal machinery for "the signal handler is done running" - // which will clear the sigaltstack flag and allow reusing it - // for the next signal. Then upon resuming in our custom code we - // blow away the stack anyway with a longjmp. - } else if cfg!(target_os = "macos") { - unsafe extern "C" fn unwind_shim(jmp_buf: *const u8) { - Unwind(jmp_buf) - } - set_pc(context, unwind_shim as usize, jmp_buf as usize); - return true; - } else { - Unwind(jmp_buf) - } - }); - - if handled { - return; - } - - // This signal is not for any compiled wasm code we expect, so we - // need to forward the signal to the next handler. If there is no - // next handler (SIG_IGN or SIG_DFL), then it's time to crash. To do - // this, we set the signal back to its original disposition and - // return. This will cause the faulting op to be re-executed which - // will crash in the normal way. If there is a next handler, call - // it. It will either crash synchronously, fix up the instruction - // so that execution can continue and return, or trigger a crash by - // returning the signal to it's original disposition and returning. - let previous = &*previous.as_ptr(); - if previous.sa_flags & libc::SA_SIGINFO != 0 { - mem::transmute::< - usize, - extern "C" fn(libc::c_int, *mut libc::siginfo_t, *mut libc::c_void), - >(previous.sa_sigaction)(signum, siginfo, context) - } else if previous.sa_sigaction == libc::SIG_DFL || - previous.sa_sigaction == libc::SIG_IGN - { - libc::sigaction(signum, previous, ptr::null_mut()); - } else { - mem::transmute::( - previous.sa_sigaction - )(signum) - } - } - - unsafe fn get_pc(cx: *mut libc::c_void) -> *const u8 { - cfg_if::cfg_if! { - if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] { - let cx = &*(cx as *const libc::ucontext_t); - cx.uc_mcontext.gregs[libc::REG_RIP as usize] as *const u8 - } else if #[cfg(all(target_os = "linux", target_arch = "x86"))] { - let cx = &*(cx as *const libc::ucontext_t); - cx.uc_mcontext.gregs[libc::REG_EIP as usize] as *const u8 - } else if #[cfg(all(any(target_os = "linux", target_os = "android"), target_arch = "aarch64"))] { - let cx = &*(cx as *const libc::ucontext_t); - cx.uc_mcontext.pc as *const u8 - } else if #[cfg(all(target_os = "macos", target_arch = "x86_64"))] { - let cx = &*(cx as *const libc::ucontext_t); - (*cx.uc_mcontext).__ss.__rip as *const u8 - } else if #[cfg(all(target_os = "macos", target_arch = "x86"))] { - let cx = &*(cx as *const libc::ucontext_t); - (*cx.uc_mcontext).__ss.__eip as *const u8 - } else if #[cfg(all(target_os = "macos", target_arch = "aarch64"))] { - let cx = &*(cx as *const libc::ucontext_t); - (*cx.uc_mcontext).__ss.__pc as *const u8 - } else if #[cfg(all(target_os = "freebsd", target_arch = "x86_64"))] { - let cx = &*(cx as *const libc::ucontext_t); - cx.uc_mcontext.mc_rip as *const u8 - } else { - compile_error!("unsupported platform"); - } - } - } - - // This is only used on macOS targets for calling an unwinding shim - // function to ensure that we return from the signal handler. - // - // See more comments above where this is called for what it's doing. - unsafe fn set_pc(cx: *mut libc::c_void, pc: usize, arg1: usize) { - cfg_if::cfg_if! { - if #[cfg(not(target_os = "macos"))] { - drop((cx, pc, arg1)); - unreachable!(); // not used on these platforms - } else if #[cfg(target_arch = "x86_64")] { - let cx = &mut *(cx as *mut libc::ucontext_t); - (*cx.uc_mcontext).__ss.__rip = pc as u64; - (*cx.uc_mcontext).__ss.__rdi = arg1 as u64; - // We're simulating a "pseudo-call" so we need to ensure - // stack alignment is properly respected, notably that on a - // `call` instruction the stack is 8/16-byte aligned, then - // the function adjusts itself to be 16-byte aligned. - // - // Most of the time the stack pointer is 16-byte aligned at - // the time of the trap but for more robust-ness with JIT - // code where it may ud2 in a prologue check before the - // stack is aligned we double-check here. - if (*cx.uc_mcontext).__ss.__rsp % 16 == 0 { - (*cx.uc_mcontext).__ss.__rsp -= 8; - } - } else if #[cfg(target_arch = "aarch64")] { - let cx = &mut *(cx as *mut libc::ucontext_t); - (*cx.uc_mcontext).__ss.__pc = pc as u64; - (*cx.uc_mcontext).__ss.__x[0] = arg1 as u64; - } else { - compile_error!("unsupported macos target architecture"); - } - } - } + if #[cfg(target_os = "macos")] { + mod macos; + use macos as sys; + } else if #[cfg(unix)] { + mod unix; + use unix as sys; } else if #[cfg(target_os = "windows")] { - use winapi::um::errhandlingapi::*; - use winapi::um::winnt::*; - use winapi::um::minwinbase::*; - use winapi::vc::excpt::*; - - /// Function which may handle custom signals while processing traps. - pub type SignalHandler<'a> = dyn Fn(winapi::um::winnt::PEXCEPTION_POINTERS) -> bool + 'a; - - unsafe fn platform_init() { - // our trap handler needs to go first, so that we can recover from - // wasm faults and continue execution, so pass `1` as a true value - // here. - if AddVectoredExceptionHandler(1, Some(exception_handler)).is_null() { - panic!("failed to add exception handler: {}", io::Error::last_os_error()); - } - } - - unsafe extern "system" fn exception_handler( - exception_info: PEXCEPTION_POINTERS - ) -> LONG { - // Check the kind of exception, since we only handle a subset within - // wasm code. If anything else happens we want to defer to whatever - // the rest of the system wants to do for this exception. - let record = &*(*exception_info).ExceptionRecord; - if record.ExceptionCode != EXCEPTION_ACCESS_VIOLATION && - record.ExceptionCode != EXCEPTION_ILLEGAL_INSTRUCTION && - record.ExceptionCode != EXCEPTION_INT_DIVIDE_BY_ZERO && - record.ExceptionCode != EXCEPTION_INT_OVERFLOW - { - return EXCEPTION_CONTINUE_SEARCH; - } - - // FIXME: this is what the previous C++ did to make sure that TLS - // works by the time we execute this trap handling code. This isn't - // exactly super easy to call from Rust though and it's not clear we - // necessarily need to do so. Leaving this here in case we need this - // in the future, but for now we can probably wait until we see a - // strange fault before figuring out how to reimplement this in - // Rust. - // - // if (!NtCurrentTeb()->Reserved1[sThreadLocalArrayPointerIndex]) { - // return EXCEPTION_CONTINUE_SEARCH; - // } - - // This is basically the same as the unix version above, only with a - // few parameters tweaked here and there. - tls::with(|info| { - let info = match info { - Some(info) => info, - None => return EXCEPTION_CONTINUE_SEARCH, - }; - cfg_if::cfg_if! { - if #[cfg(target_arch = "x86_64")] { - let ip = (*(*exception_info).ContextRecord).Rip as *const u8; - } else if #[cfg(target_arch = "x86")] { - let ip = (*(*exception_info).ContextRecord).Eip as *const u8; - } else { - compile_error!("unsupported platform"); - } - } - let jmp_buf = info.handle_trap(ip, |handler| handler(exception_info)); - if jmp_buf.is_null() { - EXCEPTION_CONTINUE_SEARCH - } else if jmp_buf as usize == 1 { - EXCEPTION_CONTINUE_EXECUTION - } else { - Unwind(jmp_buf) - } - }) - } + mod windows; + use windows as sys; } } -/// This function performs the low-overhead signal handler initialization that -/// we want to do eagerly to ensure a more-deterministic global process state. +pub use sys::SignalHandler; + +/// This function performs the low-overhead platform-specific initialization +/// that we want to do eagerly to ensure a more-deterministic global process +/// state. /// /// This is especially relevant for signal handlers since handler ordering /// depends on installation order: the wasm signal handler must run *before* @@ -339,13 +49,7 @@ cfg_if::cfg_if! { /// times, having no effect after the first call. pub fn init_traps() { static INIT: Once = Once::new(); - INIT.call_once(real_init); -} - -fn real_init() { - unsafe { - platform_init(); - } + INIT.call_once(|| unsafe { sys::platform_init() }); } /// Raises a user-defined trap immediately. @@ -451,9 +155,7 @@ pub unsafe fn catch_traps(trap_info: &impl TrapInfo, mut closure: F) -> Resul where F: FnMut(), { - // Ensure that we have our sigaltstack installed. - #[cfg(unix)] - setup_unix_sigaltstack()?; + sys::lazy_per_thread_init()?; return CallThreadState::new(trap_info).with(|cx| { RegisterSetjmp( @@ -691,7 +393,7 @@ impl<'a> CallThreadState<'a> { /// instance, and the trap handler should quickly return. /// * a different pointer - a jmp_buf buffer to longjmp to, meaning that /// the wasm trap was succesfully handled. - fn handle_trap( + fn jmp_buf_if_trap( &self, pc: *const u8, call_handler: impl Fn(&SignalHandler) -> bool, @@ -723,22 +425,17 @@ impl<'a> CallThreadState<'a> { return ptr::null(); } - // TODO: stack overflow can happen at any random time (i.e. in malloc() - // in memory.grow) and it's really hard to determine if the cause was - // stack overflow and if it happened in WebAssembly module. - // - // So, let's assume that any untrusted code called from WebAssembly - // doesn't trap. Then, if we have called some WebAssembly code, it - // means the trap is stack overflow. - if self.jmp_buf.get().is_null() { - return ptr::null(); - } + // If all that passed then this is indeed a wasm trap, so return the + // `jmp_buf` passed to `Unwind` to resume. + self.jmp_buf.get() + } + + fn capture_backtrace(&self, pc: *const u8) { let backtrace = Backtrace::new_unresolved(); self.unwind.replace(UnwindReason::JitTrap { backtrace, pc: pc as usize, }); - self.jmp_buf.get() } } @@ -784,6 +481,10 @@ mod tls { #[inline(never)] // see module docs for why this is here pub fn replace(val: Ptr) -> Ptr { + // Mark the current thread as handling interrupts for this specific + // CallThreadState: may clobber the previous entry. + super::super::sys::register_tls(val); + PTR.with(|p| p.replace(val)) } @@ -820,13 +521,19 @@ mod tls { /// /// This is unsafe because it's intended to only be used within the /// context of stack switching within wasmtime. - pub unsafe fn replace(self) { + pub unsafe fn replace(self) -> Result<(), super::Trap> { + // When replacing to the previous value of TLS, we might have + // crossed a thread: make sure the trap-handling lazy initializer + // runs. + super::sys::lazy_per_thread_init()?; + // We need to configure our previous TLS pointer to whatever is in // TLS at this time, and then we set the current state to ourselves. let prev = raw::get(); assert!((*self.0).prev.get().is_null()); (*self.0).prev.set(prev); raw::replace(self.0); + Ok(()) } } @@ -861,112 +568,3 @@ mod tls { unsafe { closure(if p.is_null() { None } else { Some(&*p) }) } } } - -/// A module for registering a custom alternate signal stack (sigaltstack). -/// -/// Rust's libstd installs an alternate stack with size `SIGSTKSZ`, which is not -/// always large enough for our signal handling code. Override it by creating -/// and registering our own alternate stack that is large enough and has a guard -/// page. -#[cfg(unix)] -fn setup_unix_sigaltstack() -> Result<(), Trap> { - use std::cell::RefCell; - use std::convert::TryInto; - use std::ptr::null_mut; - - thread_local! { - /// Thread-local state is lazy-initialized on the first time it's used, - /// and dropped when the thread exits. - static TLS: RefCell = RefCell::new(Tls::None); - } - - /// The size of the sigaltstack (not including the guard, which will be - /// added). Make this large enough to run our signal handlers. - const MIN_STACK_SIZE: usize = 16 * 4096; - - enum Tls { - None, - Allocated { - mmap_ptr: *mut libc::c_void, - mmap_size: usize, - }, - BigEnough, - } - - return TLS.with(|slot| unsafe { - let mut slot = slot.borrow_mut(); - match *slot { - Tls::None => {} - // already checked - _ => return Ok(()), - } - - // Check to see if the existing sigaltstack, if it exists, is big - // enough. If so we don't need to allocate our own. - let mut old_stack = mem::zeroed(); - let r = libc::sigaltstack(ptr::null(), &mut old_stack); - assert_eq!(r, 0, "learning about sigaltstack failed"); - if old_stack.ss_flags & libc::SS_DISABLE == 0 && old_stack.ss_size >= MIN_STACK_SIZE { - *slot = Tls::BigEnough; - return Ok(()); - } - - // ... but failing that we need to allocate our own, so do all that - // here. - let page_size: usize = libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap(); - let guard_size = page_size; - let alloc_size = guard_size + MIN_STACK_SIZE; - - let ptr = libc::mmap( - null_mut(), - alloc_size, - libc::PROT_NONE, - libc::MAP_PRIVATE | libc::MAP_ANON, - -1, - 0, - ); - if ptr == libc::MAP_FAILED { - return Err(Trap::oom()); - } - - // Prepare the stack with readable/writable memory and then register it - // with `sigaltstack`. - let stack_ptr = (ptr as usize + guard_size) as *mut libc::c_void; - let r = libc::mprotect( - stack_ptr, - MIN_STACK_SIZE, - libc::PROT_READ | libc::PROT_WRITE, - ); - assert_eq!(r, 0, "mprotect to configure memory for sigaltstack failed"); - let new_stack = libc::stack_t { - ss_sp: stack_ptr, - ss_flags: 0, - ss_size: MIN_STACK_SIZE, - }; - let r = libc::sigaltstack(&new_stack, ptr::null_mut()); - assert_eq!(r, 0, "registering new sigaltstack failed"); - - *slot = Tls::Allocated { - mmap_ptr: ptr, - mmap_size: alloc_size, - }; - Ok(()) - }); - - impl Drop for Tls { - fn drop(&mut self) { - let (ptr, size) = match self { - Tls::Allocated { - mmap_ptr, - mmap_size, - } => (*mmap_ptr, *mmap_size), - _ => return, - }; - unsafe { - // Deallocate the stack memory. - let r = libc::munmap(ptr, size); - debug_assert_eq!(r, 0, "munmap failed during thread shutdown"); - } - } - } -} diff --git a/crates/runtime/src/traphandlers/macos.rs b/crates/runtime/src/traphandlers/macos.rs new file mode 100644 index 000000000000..a2902250424e --- /dev/null +++ b/crates/runtime/src/traphandlers/macos.rs @@ -0,0 +1,496 @@ +//! macOS-specific handling of handling exceptions +//! +//! Unlike other Unix platforms macOS here uses mach ports to handle exceptions +//! instead of signals. While macOS platforms could use signals (and +//! historically they did!) this is incompatible when Wasmtime is linked into a +//! project that is otherwise using mach ports for catching exceptions. This +//! came up #2456 notably when a project like Breakpad is integrated to blanket +//! catch exceptions and report them. +//! +//! Mach ports are somewhat obscure and not really heavily used in a ton of +//! places. Needless to say the original author of this file worked with mach +//! ports for the first time when writing this file. As such the exact specifics +//! here may not be super well documented. This file is 100% lifted from +//! SpiderMonkey and then adapted for Wasmtime's purposes. Credit for almost +//! all of this file goes to SpiderMonkey for figuring out all the fiddly bits. +//! See also https://searchfox.org/mozilla-central/source/js/src/wasm/WasmSignalHandlers.cpp for +//! the original code. +//! +//! The high-level overview is that when using mach ports a thread is blocked +//! when it generates an exception and then a message can be read from the +//! port. This means that, unlike signals, threads can't fix their own traps. +//! Instead a helper thread is spun up to service exception messages. This is +//! also in conflict with Wasmtime's exception handling currently which is to +//! use a thread-local to figure out whether a pc is a wasm pc or not on a +//! trap. To work around this we have a global map from mach thread numbers to +//! the state for that thread, updated on entry/exit from wasm. This is likely +//! slower than signals which do less updating on wasm entry/exit, but hopefully +//! by the time this is a problem we can figure out a better solution. +//! +//! Otherwise this file heavily uses the `mach` Rust crate for type and +//! function declarations. Many bits and pieces are copied or translated from +//! the SpiderMonkey implementation and it should pass all the tests! + +#![allow(non_snake_case)] + +use crate::traphandlers::{tls, CallThreadState, Trap, Unwind}; +use mach::exception_types::*; +use mach::kern_return::*; +use mach::mach_init::*; +use mach::mach_port::*; +use mach::message::*; +use mach::port::*; +use mach::thread_act::*; +use mach::traps::*; +use std::cell::Cell; +use std::collections::HashMap; +use std::mem; +use std::ptr; +use std::sync::Mutex; +use std::thread; + +/// Other `mach` declarations awaiting https://github.com/fitzgen/mach/pull/64 to be merged. +mod mach_addons { + #![allow(non_camel_case_types)] + #![allow(non_upper_case_globals)] + #![allow(dead_code)] + + use mach::{ + exception_types::*, kern_return::*, mach_types::*, message::*, port::*, thread_status::*, + }; + use std::mem; + + #[repr(C)] + #[derive(Copy, Clone, Debug)] + #[allow(dead_code)] + pub struct NDR_record_t { + mig_vers: libc::c_uchar, + if_vers: libc::c_uchar, + reserved1: libc::c_uchar, + mig_encoding: libc::c_uchar, + int_rep: libc::c_uchar, + char_rep: libc::c_uchar, + float_rep: libc::c_uchar, + reserved32: libc::c_uchar, + } + + extern "C" { + pub static NDR_record: NDR_record_t; + } + + #[repr(C)] + #[allow(dead_code)] + #[derive(Copy, Clone, Debug)] + pub struct __Request__exception_raise_t { + pub Head: mach_msg_header_t, + /* start of the kernel processed data */ + pub msgh_body: mach_msg_body_t, + pub thread: mach_msg_port_descriptor_t, + pub task: mach_msg_port_descriptor_t, + /* end of the kernel processed data */ + pub NDR: NDR_record_t, + pub exception: exception_type_t, + pub codeCnt: mach_msg_type_number_t, + pub code: [i64; 2], + } + + #[repr(C)] + #[allow(dead_code)] + #[derive(Copy, Clone, Debug)] + pub struct __Reply__exception_raise_t { + pub Head: mach_msg_header_t, + pub NDR: NDR_record_t, + pub RetCode: kern_return_t, + } + + #[repr(C)] + #[derive(Copy, Clone, Debug, Default, Hash, PartialOrd, PartialEq, Eq, Ord)] + pub struct arm_thread_state64_t { + pub __x: [u64; 29], + pub __fp: u64, // frame pointer x29 + pub __lr: u64, // link register x30 + pub __sp: u64, // stack pointer x31 + pub __pc: u64, + pub __cpsr: u32, + pub __pad: u32, + } + + impl arm_thread_state64_t { + pub fn count() -> mach_msg_type_number_t { + (mem::size_of::() / mem::size_of::()) as mach_msg_type_number_t + } + } + + pub static ARM_THREAD_STATE64: thread_state_flavor_t = 6; + + #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] + pub static THREAD_STATE_NONE: thread_state_flavor_t = 13; + #[cfg(target_arch = "aarch64")] + pub static THREAD_STATE_NONE: thread_state_flavor_t = 5; + + extern "C" { + pub fn thread_set_state( + target_act: thread_port_t, + flavor: thread_state_flavor_t, + new_state: thread_state_t, + new_stateCnt: mach_msg_type_number_t, + ) -> kern_return_t; + + pub fn thread_set_exception_ports( + thread: thread_port_t, + exception_mask: exception_mask_t, + new_port: mach_port_t, + behavior: libc::c_uint, + new_flavor: thread_state_flavor_t, + ) -> kern_return_t; + } +} + +use mach_addons::*; + +/// Just used below +pub enum Void {} +/// For now this is basically unused, we don't expose this any more for +/// Wasmtime on macOS. +pub type SignalHandler<'a> = dyn Fn(Void) -> bool + 'a; + +/// Process-global map for mapping thread names to their state to figure out +/// whether a thread's trap is related to wasm or not. This is extremely +/// unsafe and caution must be used when accessing. Be sure to read +/// documentation below on this. +static mut MAP: *mut Mutex>> = + ptr::null_mut(); + +/// Process-global port that we use to route thread-level exceptions to. +static mut WASMTIME_PORT: mach_port_name_t = MACH_PORT_NULL; + +pub unsafe fn platform_init() { + // Initialize the process global map + MAP = Box::into_raw(Default::default()); + + // Allocate our WASMTIME_PORT and make sure that it can be sent to so we + // can receive exceptions. + let me = mach_task_self(); + let kret = mach_port_allocate(me, MACH_PORT_RIGHT_RECEIVE, &mut WASMTIME_PORT); + assert_eq!(kret, KERN_SUCCESS, "failed to allocate port"); + let kret = mach_port_insert_right(me, WASMTIME_PORT, WASMTIME_PORT, MACH_MSG_TYPE_MAKE_SEND); + assert_eq!(kret, KERN_SUCCESS, "failed to insert right"); + + // Spin up our handler thread which will solely exist to service exceptions + // generated by other threads. Note that this is a background thread that + // we're not very interested in so it's detached here. + thread::spawn(|| handler_thread()); +} + +// This is largely just copied from SpiderMonkey. +#[repr(C)] +#[allow(dead_code)] +struct ExceptionRequest { + body: __Request__exception_raise_t, + trailer: mach_msg_trailer_t, +} + +unsafe fn handler_thread() { + // Taken from mach_exc in /usr/include/mach/mach_exc.defs. + const EXCEPTION_MSG_ID: mach_msg_id_t = 2405; + + loop { + // Block this thread reading a message from our port. This will block + // until some thread throws an exception. Note that messages are all + // expected to be exceptions here. + let mut request: ExceptionRequest = mem::zeroed(); + let kret = mach_msg( + &mut request.body.Head, + MACH_RCV_MSG, + 0, + mem::size_of_val(&request) as u32, + WASMTIME_PORT, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL, + ); + if kret != KERN_SUCCESS { + eprintln!("mach_msg failed with {} ({0:x})", kret); + libc::abort(); + } + if request.body.Head.msgh_id != EXCEPTION_MSG_ID { + eprintln!("unexpected msg header id {}", request.body.Head.msgh_id); + libc::abort(); + } + + // Attempt to handle the exception below which will process the state + // of the request. + // + // We unconditionally need to send a message back on our port after + // this exception is received, and our reply code here dictates whether + // the thread crashes or whether we continue execution of the thread. + let reply_code = if handle_exception(&mut request) { + KERN_SUCCESS + } else { + KERN_FAILURE + }; + + // This magic incantation to send a reply back to the kernel was + // derived from the exc_server generated by + // 'mig -v /usr/include/mach/mach_exc.defs'. + let mut reply: __Reply__exception_raise_t = mem::zeroed(); + reply.Head.msgh_bits = + MACH_MSGH_BITS(request.body.Head.msgh_bits & MACH_MSGH_BITS_REMOTE_MASK, 0); + reply.Head.msgh_size = mem::size_of_val(&reply) as u32; + reply.Head.msgh_remote_port = request.body.Head.msgh_remote_port; + reply.Head.msgh_local_port = MACH_PORT_NULL; + reply.Head.msgh_id = request.body.Head.msgh_id + 100; + reply.NDR = NDR_record; + reply.RetCode = reply_code; + mach_msg( + &mut reply.Head, + MACH_SEND_MSG, + mem::size_of_val(&reply) as u32, + 0, + MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL, + ); + } +} + +unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { + // First make sure that this exception is one that we actually expect to + // get raised by wasm code. All other exceptions we safely ignore. + match request.body.exception as u32 { + EXC_BAD_ACCESS | EXC_BAD_INSTRUCTION => {} + _ => return false, + } + + // Depending on the current architecture various bits and pieces of this + // will change. This is expected to get filled out for other macos + // platforms as necessary. + // + // The variables this needs to define are: + // + // * `ThreadState` - a structure read via `thread_get_state` to learn about + // the register state of the thread that trapped. + // * `thread_state_flavor` - used to read `ThreadState` + // * `get_pc` - a function from `&ThreadState` to a pointer to read the + // current program counter, used to test if it's an address we're + // catching wasm traps for. + // * `resume` - a function used to modify `ThreadState` to resume in the + // target thread in the `unwind` function below, passing the two + // parameters as the first two arguments. + // * `thread_state` - a fresh instance of `ThreadState` to read into + // * `thread_state_count` - the size to pass to `mach_msg`. + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + use mach::structs::x86_thread_state64_t; + use mach::thread_status::x86_THREAD_STATE64; + + type ThreadState = x86_thread_state64_t; + + let thread_state_flavor = x86_THREAD_STATE64; + + let get_pc = |state: &ThreadState| state.__rip as *const u8; + + let resume = |state: &mut ThreadState, pc: usize, jmp_buf: usize| { + // The x86_64 ABI requires a 16-byte stack alignment for + // functions, so typically we'll be 16-byte aligned. In this + // case we simulate a `call` instruction by decrementing the + // stack pointer and pushing the "return" address which in this + // case is the faulting address. This should help the native + // unwinder figure out how to find the precisely trapping + // function. + // + // Note, however, that if the stack is not 16-byte aligned then + // we don't do anything. Currently this only arises due to + // `ud2` in the prologue of functions when performing the + // initial stack check. In the old backend 0 stack manipulation + // happens until after the stack check passes, so if the stack + // check fails (hence we're running in this handler) then the + // stack is not 16-byte aligned due to the previous return + // address pushed by `call`. In this scenario we just blow away + // the stack frame by overwriting %rip. This technically loses + // the precise frame that was interrupted, but that's probably + // not the end of the world anyway. + if state.__rsp % 16 == 0 { + state.__rsp -= 8; + *(state.__rsp as *mut u64) = state.__rip; + } + state.__rip = unwind as u64; + state.__rdi = pc as u64; + state.__rsi = jmp_buf as u64; + }; + let mut thread_state = ThreadState::new(); + } else if #[cfg(target_arch = "aarch64")] { + type ThreadState = arm_thread_state64_t; + + let thread_state_flavor = ARM_THREAD_STATE64; + + let get_pc = |state: &ThreadState| state.__pc as *const u8; + + let resume = |state: &mut ThreadState, pc: usize, jmp_buf: usize| { + // Clobber LR with the faulting PC, so unwinding resumes at the + // faulting instruction. The previous value of LR has been saved + // by the callee (in Cranelift generated code), so no need to + // stash it. + state.__lr = pc as u64; + + // Fill in the 2 arguments to unwind here, and set PC to it, so + // it looks like a call to unwind. + state.__pc = unwind as u64; + state.__x[0] = pc as u64; + state.__x[1] = jmp_buf as u64; + }; + let mut thread_state = mem::zeroed::(); + } else { + compile_error!("unsupported target architecture"); + } + } + + // First up read our origin thread's state into the area defined above. + let origin_thread = request.body.thread.name; + let mut thread_state_count = ThreadState::count(); + let kret = thread_get_state( + origin_thread, + thread_state_flavor, + &mut thread_state as *mut ThreadState as *mut u32, + &mut thread_state_count, + ); + if kret != KERN_SUCCESS { + return false; + } + + // Use our global map to determine if this program counter is indeed a wasm + // trap, loading the `jmp_buf` to unwind to if it is. + // + // Note that this is where things are pretty tricky. We're accessing + // non-`Send` state (`CallThreadState`) from the exception handling thread. + // While typically invalid we are guaranteed that the original thread is + // stopped while we're accessing it here so this should be safe. + // + // Note also that we access the `state` outside the lock of `MAP`. This + // again is safe because if `state` is `Some` then we're guaranteed the + // thread is stopped and won't be removing or invalidating its state. + // Finally our indirection with a pointer means that we can read the + // pointer value and if `MAP` changes happen after we read our entry that's + // ok since they won't invalidate our entry. + let pc = get_pc(&thread_state); + let state = (*MAP) + .lock() + .unwrap_or_else(|e| e.into_inner()) + .get(&origin_thread) + .copied(); + let jmp_buf = match state { + Some(state) => (*state).jmp_buf_if_trap(pc, |_| false), + None => ptr::null(), + }; + if jmp_buf.is_null() { + return false; + } + if jmp_buf as usize == 1 { + return false; + } + + // We have determined that this is a wasm trap and we need to actually + // force the thread itself to trap. The thread's register state is + // configured to resume in the `unwind` function below, we update the + // thread's register state, and then we're off to the races. + resume(&mut thread_state, pc as usize, jmp_buf as usize); + let kret = thread_set_state( + origin_thread, + thread_state_flavor, + &mut thread_state as *mut ThreadState as *mut u32, + thread_state_count, + ); + kret == KERN_SUCCESS +} + +/// This is a "landing pad" which is never called directly but is directly +/// resumed into from wasm-trapped threads. +/// +/// This is a small shim which primarily serves the purpose of simply capturing +/// a native backtrace once we've switched back to the thread itself. After +/// the backtrace is captured we can do the usual `longjmp` back to the source +/// of the wasm code. +unsafe extern "C" fn unwind(wasm_pc: *const u8, jmp_buf: *const u8) -> ! { + tls::with(|state| { + if let Some(state) = state { + state.capture_backtrace(wasm_pc); + } + }); + + Unwind(jmp_buf); +} + +thread_local! { + static MY_PORT: ClosePort = ClosePort(unsafe { mach_thread_self() }); +} + +struct ClosePort(mach_port_name_t); + +impl Drop for ClosePort { + fn drop(&mut self) { + unsafe { + mach_port_deallocate(mach_task_self(), self.0); + } + } +} + +/// Exceptions on macOS can be delivered to either thread-level or task-level +/// exception ports. In wasmtime we choose to send the exceptions to +/// thread-level ports. This means that we need to, for each thread that can +/// generate an exception, register our thread's exception port as +/// `WASMTIME_PORT` above. +/// +/// Note that this choice is done because at the current time if we were to +/// implement a task-level (process-wide) port we'd have to figure out how to +/// forward exceptions that we're not interested to the previously registered +/// port. At this time the author isn't sure how to even do that. SpiderMonkey +/// calls this forwarding "dark magic" as well, and since SpiderMonkey chooses +/// thread-level ports then I hope that's good enough for wasmtime. +/// +/// Also note that this choice of thread-level ports should be fine in that +/// unhandled thread-level exceptions get automatically forwarded to the +/// task-level port which is where we'd expected things like breakpad/crashpad +/// exception handlers to get registered. +pub fn lazy_per_thread_init() -> Result<(), Trap> { + thread_local! { + static PORTS_SET: Cell = Cell::new(false); + } + + PORTS_SET.with(|ports| { + if ports.replace(true) { + return; + } + + unsafe { + assert!(WASMTIME_PORT != MACH_PORT_NULL); + let kret = thread_set_exception_ports( + MY_PORT.with(|p| p.0), + EXC_MASK_BAD_ACCESS | EXC_MASK_BAD_INSTRUCTION, + WASMTIME_PORT, + EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES, + mach_addons::THREAD_STATE_NONE, + ); + assert_eq!(kret, KERN_SUCCESS, "failed to set thread exception port"); + } + }); + Ok(()) +} + +/// This hook is invoked whenever TLS state for the current thread is updated +/// to the `ptr` specified. +/// +/// The purpose for hooking this on macOS is we register in a process-global map +/// that our mach thread's state is `ptr` at this time. This allows the +/// exception handling thread to lookup in this map later if our thread +/// generates an exception. +/// +/// Note that in general this is quite unsafe since we're moving non-Send state +/// (`ptr`) which is also only valid for a short portion of the program (it +/// lives on the stack) into a global portion of the program. This needs to be +/// kept tightly in sync with `handle_exception` above where it's accessed in a +/// very limited fashion. +pub fn register_tls(ptr: *const CallThreadState<'static>) { + unsafe { + let me = MY_PORT.with(|p| p.0); + (*MAP).lock().unwrap().insert(me, ptr); + } +} diff --git a/crates/runtime/src/traphandlers/unix.rs b/crates/runtime/src/traphandlers/unix.rs new file mode 100644 index 000000000000..a97976f9caa2 --- /dev/null +++ b/crates/runtime/src/traphandlers/unix.rs @@ -0,0 +1,257 @@ +use crate::traphandlers::{tls, CallThreadState, Trap, Unwind}; +use std::cell::RefCell; +use std::convert::TryInto; +use std::io; +use std::mem::{self, MaybeUninit}; +use std::ptr::{self, null_mut}; + +/// Function which may handle custom signals while processing traps. +pub type SignalHandler<'a> = + dyn Fn(libc::c_int, *const libc::siginfo_t, *const libc::c_void) -> bool + 'a; + +static mut PREV_SIGSEGV: MaybeUninit = MaybeUninit::uninit(); +static mut PREV_SIGBUS: MaybeUninit = MaybeUninit::uninit(); +static mut PREV_SIGILL: MaybeUninit = MaybeUninit::uninit(); +static mut PREV_SIGFPE: MaybeUninit = MaybeUninit::uninit(); + +pub unsafe fn platform_init() { + let register = |slot: &mut MaybeUninit, signal: i32| { + let mut handler: libc::sigaction = mem::zeroed(); + // The flags here are relatively careful, and they are... + // + // SA_SIGINFO gives us access to information like the program + // counter from where the fault happened. + // + // SA_ONSTACK allows us to handle signals on an alternate stack, + // so that the handler can run in response to running out of + // stack space on the main stack. Rust installs an alternate + // stack with sigaltstack, so we rely on that. + // + // SA_NODEFER allows us to reenter the signal handler if we + // crash while handling the signal, and fall through to the + // Breakpad handler by testing handlingSegFault. + handler.sa_flags = libc::SA_SIGINFO | libc::SA_NODEFER | libc::SA_ONSTACK; + handler.sa_sigaction = trap_handler as usize; + libc::sigemptyset(&mut handler.sa_mask); + if libc::sigaction(signal, &handler, slot.as_mut_ptr()) != 0 { + panic!( + "unable to install signal handler: {}", + io::Error::last_os_error(), + ); + } + }; + + // Allow handling OOB with signals on all architectures + register(&mut PREV_SIGSEGV, libc::SIGSEGV); + + // Handle `unreachable` instructions which execute `ud2` right now + register(&mut PREV_SIGILL, libc::SIGILL); + + // x86 uses SIGFPE to report division by zero + if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") { + register(&mut PREV_SIGFPE, libc::SIGFPE); + } + + // On ARM, handle Unaligned Accesses. + if cfg!(target_arch = "arm") || cfg!(target_os = "freebsd") { + register(&mut PREV_SIGBUS, libc::SIGBUS); + } +} + +unsafe extern "C" fn trap_handler( + signum: libc::c_int, + siginfo: *mut libc::siginfo_t, + context: *mut libc::c_void, +) { + let previous = match signum { + libc::SIGSEGV => &PREV_SIGSEGV, + libc::SIGBUS => &PREV_SIGBUS, + libc::SIGFPE => &PREV_SIGFPE, + libc::SIGILL => &PREV_SIGILL, + _ => panic!("unknown signal: {}", signum), + }; + let handled = tls::with(|info| { + // If no wasm code is executing, we don't handle this as a wasm + // trap. + let info = match info { + Some(info) => info, + None => return false, + }; + + // If we hit an exception while handling a previous trap, that's + // quite bad, so bail out and let the system handle this + // recursive segfault. + // + // Otherwise flag ourselves as handling a trap, do the trap + // handling, and reset our trap handling flag. Then we figure + // out what to do based on the result of the trap handling. + let pc = get_pc(context); + let jmp_buf = + info.jmp_buf_if_trap(pc, |handler| handler(signum, siginfo, context)); + + // Figure out what to do based on the result of this handling of + // the trap. Note that our sentinel value of 1 means that the + // exception was handled by a custom exception handler, so we + // keep executing. + if jmp_buf.is_null() { + return false; + } + if jmp_buf as usize == 1 { + return true; + } + info.capture_backtrace(pc); + Unwind(jmp_buf) + }); + + if handled { + return; + } + + // This signal is not for any compiled wasm code we expect, so we + // need to forward the signal to the next handler. If there is no + // next handler (SIG_IGN or SIG_DFL), then it's time to crash. To do + // this, we set the signal back to its original disposition and + // return. This will cause the faulting op to be re-executed which + // will crash in the normal way. If there is a next handler, call + // it. It will either crash synchronously, fix up the instruction + // so that execution can continue and return, or trigger a crash by + // returning the signal to it's original disposition and returning. + let previous = &*previous.as_ptr(); + if previous.sa_flags & libc::SA_SIGINFO != 0 { + mem::transmute::( + previous.sa_sigaction, + )(signum, siginfo, context) + } else if previous.sa_sigaction == libc::SIG_DFL || previous.sa_sigaction == libc::SIG_IGN { + libc::sigaction(signum, previous, ptr::null_mut()); + } else { + mem::transmute::(previous.sa_sigaction)(signum) + } +} + +unsafe fn get_pc(cx: *mut libc::c_void) -> *const u8 { + cfg_if::cfg_if! { + if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] { + let cx = &*(cx as *const libc::ucontext_t); + cx.uc_mcontext.gregs[libc::REG_RIP as usize] as *const u8 + } else if #[cfg(all(target_os = "linux", target_arch = "x86"))] { + let cx = &*(cx as *const libc::ucontext_t); + cx.uc_mcontext.gregs[libc::REG_EIP as usize] as *const u8 + } else if #[cfg(all(any(target_os = "linux", target_os = "android"), target_arch = "aarch64"))] { + let cx = &*(cx as *const libc::ucontext_t); + cx.uc_mcontext.pc as *const u8 + } else if #[cfg(all(target_os = "freebsd", target_arch = "x86_64"))] { + let cx = &*(cx as *const libc::ucontext_t); + cx.uc_mcontext.mc_rip as *const u8 + } else { + compile_error!("unsupported platform"); + } + } +} + +/// A function for registering a custom alternate signal stack (sigaltstack). +/// +/// Rust's libstd installs an alternate stack with size `SIGSTKSZ`, which is not +/// always large enough for our signal handling code. Override it by creating +/// and registering our own alternate stack that is large enough and has a guard +/// page. +pub fn lazy_per_thread_init() -> Result<(), Trap> { + thread_local! { + /// Thread-local state is lazy-initialized on the first time it's used, + /// and dropped when the thread exits. + static TLS: RefCell = RefCell::new(Tls::None); + } + + /// The size of the sigaltstack (not including the guard, which will be + /// added). Make this large enough to run our signal handlers. + const MIN_STACK_SIZE: usize = 16 * 4096; + + enum Tls { + None, + Allocated { + mmap_ptr: *mut libc::c_void, + mmap_size: usize, + }, + BigEnough, + } + + return TLS.with(|slot| unsafe { + let mut slot = slot.borrow_mut(); + match *slot { + Tls::None => {} + // already checked + _ => return Ok(()), + } + + // Check to see if the existing sigaltstack, if it exists, is big + // enough. If so we don't need to allocate our own. + let mut old_stack = mem::zeroed(); + let r = libc::sigaltstack(ptr::null(), &mut old_stack); + assert_eq!(r, 0, "learning about sigaltstack failed"); + if old_stack.ss_flags & libc::SS_DISABLE == 0 && old_stack.ss_size >= MIN_STACK_SIZE { + *slot = Tls::BigEnough; + return Ok(()); + } + + // ... but failing that we need to allocate our own, so do all that + // here. + let page_size: usize = libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap(); + let guard_size = page_size; + let alloc_size = guard_size + MIN_STACK_SIZE; + + let ptr = libc::mmap( + null_mut(), + alloc_size, + libc::PROT_NONE, + libc::MAP_PRIVATE | libc::MAP_ANON, + -1, + 0, + ); + if ptr == libc::MAP_FAILED { + return Err(Trap::oom()); + } + + // Prepare the stack with readable/writable memory and then register it + // with `sigaltstack`. + let stack_ptr = (ptr as usize + guard_size) as *mut libc::c_void; + let r = libc::mprotect( + stack_ptr, + MIN_STACK_SIZE, + libc::PROT_READ | libc::PROT_WRITE, + ); + assert_eq!(r, 0, "mprotect to configure memory for sigaltstack failed"); + let new_stack = libc::stack_t { + ss_sp: stack_ptr, + ss_flags: 0, + ss_size: MIN_STACK_SIZE, + }; + let r = libc::sigaltstack(&new_stack, ptr::null_mut()); + assert_eq!(r, 0, "registering new sigaltstack failed"); + + *slot = Tls::Allocated { + mmap_ptr: ptr, + mmap_size: alloc_size, + }; + Ok(()) + }); + + impl Drop for Tls { + fn drop(&mut self) { + let (ptr, size) = match self { + Tls::Allocated { + mmap_ptr, + mmap_size, + } => (*mmap_ptr, *mmap_size), + _ => return, + }; + unsafe { + // Deallocate the stack memory. + let r = libc::munmap(ptr, size); + debug_assert_eq!(r, 0, "munmap failed during thread shutdown"); + } + } + } +} + +pub fn register_tls(_: *const CallThreadState<'static>) { + // Unused on unix +} diff --git a/crates/runtime/src/traphandlers/windows.rs b/crates/runtime/src/traphandlers/windows.rs new file mode 100644 index 000000000000..9512c4b52cae --- /dev/null +++ b/crates/runtime/src/traphandlers/windows.rs @@ -0,0 +1,83 @@ +use crate::traphandlers::{tls, CallThreadState, Trap, Unwind}; +use std::io; +use winapi::um::errhandlingapi::*; +use winapi::um::minwinbase::*; +use winapi::um::winnt::*; +use winapi::vc::excpt::*; + +/// Function which may handle custom signals while processing traps. +pub type SignalHandler<'a> = dyn Fn(winapi::um::winnt::PEXCEPTION_POINTERS) -> bool + 'a; + +pub unsafe fn platform_init() { + // our trap handler needs to go first, so that we can recover from + // wasm faults and continue execution, so pass `1` as a true value + // here. + if AddVectoredExceptionHandler(1, Some(exception_handler)).is_null() { + panic!( + "failed to add exception handler: {}", + io::Error::last_os_error() + ); + } +} + +unsafe extern "system" fn exception_handler(exception_info: PEXCEPTION_POINTERS) -> LONG { + // Check the kind of exception, since we only handle a subset within + // wasm code. If anything else happens we want to defer to whatever + // the rest of the system wants to do for this exception. + let record = &*(*exception_info).ExceptionRecord; + if record.ExceptionCode != EXCEPTION_ACCESS_VIOLATION + && record.ExceptionCode != EXCEPTION_ILLEGAL_INSTRUCTION + && record.ExceptionCode != EXCEPTION_INT_DIVIDE_BY_ZERO + && record.ExceptionCode != EXCEPTION_INT_OVERFLOW + { + return EXCEPTION_CONTINUE_SEARCH; + } + + // FIXME: this is what the previous C++ did to make sure that TLS + // works by the time we execute this trap handling code. This isn't + // exactly super easy to call from Rust though and it's not clear we + // necessarily need to do so. Leaving this here in case we need this + // in the future, but for now we can probably wait until we see a + // strange fault before figuring out how to reimplement this in + // Rust. + // + // if (!NtCurrentTeb()->Reserved1[sThreadLocalArrayPointerIndex]) { + // return EXCEPTION_CONTINUE_SEARCH; + // } + + // This is basically the same as the unix version above, only with a + // few parameters tweaked here and there. + tls::with(|info| { + let info = match info { + Some(info) => info, + None => return EXCEPTION_CONTINUE_SEARCH, + }; + cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + let ip = (*(*exception_info).ContextRecord).Rip as *const u8; + } else if #[cfg(target_arch = "x86")] { + let ip = (*(*exception_info).ContextRecord).Eip as *const u8; + } else { + compile_error!("unsupported platform"); + } + } + let jmp_buf = info.jmp_buf_if_trap(ip, |handler| handler(exception_info)); + if jmp_buf.is_null() { + EXCEPTION_CONTINUE_SEARCH + } else if jmp_buf as usize == 1 { + EXCEPTION_CONTINUE_EXECUTION + } else { + info.capture_backtrace(ip); + Unwind(jmp_buf) + } + }) +} + +pub fn lazy_per_thread_init() -> Result<(), Trap> { + // Unused on Windows + Ok(()) +} + +pub fn register_tls(_: *const CallThreadState<'static>) { + // Unused on Windows +} diff --git a/crates/wasmtime/src/lib.rs b/crates/wasmtime/src/lib.rs index 6d4e847f4fc5..066d034cbcff 100644 --- a/crates/wasmtime/src/lib.rs +++ b/crates/wasmtime/src/lib.rs @@ -307,7 +307,9 @@ pub use crate::types::*; pub use crate::values::*; cfg_if::cfg_if! { - if #[cfg(unix)] { + if #[cfg(target_os = "macos")] { + // no extensions for macOS at this time + } else if #[cfg(unix)] { pub mod unix; } else if #[cfg(windows)] { pub mod windows; diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index 07f528db9e2c..e90b394d26c9 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -395,6 +395,7 @@ impl Store { self.existing_instance_handle(InstanceHandle::from_vmctx(cx)) } + #[cfg_attr(not(target_os = "linux"), allow(dead_code))] // not used on all platforms pub(crate) fn set_signal_handler(&self, handler: Option>>) { *self.inner.signal_handler.borrow_mut() = handler; } @@ -713,10 +714,11 @@ impl Store { Poll::Ready(t) => break Ok(t), Poll::Pending => {} } + unsafe { let before = wasmtime_runtime::TlsRestore::take(); let res = (*suspend).suspend(()); - before.replace(); + before.replace().map_err(|e| Trap::from_runtime(self, e))?; res?; } } @@ -822,7 +824,7 @@ impl Store { let cx = unsafe { std::mem::transmute::<&mut Context<'_>, *mut Context<'static>>(cx) }; let prev = self.store.inner.current_poll_cx.replace(cx); - let _reste = Reset(&self.store.inner.current_poll_cx, prev); + let _reset = Reset(&self.store.inner.current_poll_cx, prev); // After that's set up we resume execution of the fiber, which // may also start the fiber for the first time. This either diff --git a/crates/wasmtime/src/unix.rs b/crates/wasmtime/src/unix.rs index 2e9c10d9c1bf..25969be2c678 100644 --- a/crates/wasmtime/src/unix.rs +++ b/crates/wasmtime/src/unix.rs @@ -1,9 +1,9 @@ //! Unix-specific extension for the `wasmtime` crate. //! -//! This module is only available on Unix targets, for example Linux and macOS. -//! It is not available on Windows, for example. Note that the import path for -//! this module is `wasmtime::unix::...`, which is intended to emphasize that it -//! is platform-specific. +//! This module is only available on Unix targets, for example Linux. Note that +//! this module is notably not available on macOS or Windows. Note that the +//! import path for this module is `wasmtime::unix::...`, which is intended to +//! emphasize that it is platform-specific. //! //! The traits contained in this module are intended to extend various types //! throughout the `wasmtime` crate with extra functionality that's only diff --git a/tests/all/custom_signal_handler.rs b/tests/all/custom_signal_handler.rs index 805f759935aa..2d4243123eff 100644 --- a/tests/all/custom_signal_handler.rs +++ b/tests/all/custom_signal_handler.rs @@ -1,4 +1,4 @@ -#[cfg(not(target_os = "windows"))] +#[cfg(target_os = "linux")] mod tests { use anyhow::Result; use std::rc::Rc;