From cd33960c5c468e12d09f8eaf6b696dc78a0d99a0 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 22 Feb 2021 13:57:51 -0800 Subject: [PATCH] Fix preservation of the sigaltstack on macOS This commit fixes an issue discovered in the wasmtime-go bindings when the Go runtime was crashing on macOS only when running wasm code that trapped. It turns out that our switch to `siglongjmp` from `longjmp` actually broke macOS! This breakage happens because all subsequent signals after the first signal are all delivered on the main stack, not the sigaltstack, even if the sigaltstack is configured. This causes the Go runtime to crash since it expects to run on the sigaltstack. The fix in this commit is to actually return from the signal handler to trigger the kernel's updating of the sigaltstack no longer being in use. Before we return, however, we configure the register context to return to to call some custom code which immediately does the unwind we would otherwise have done. This works around the issue on macOS hopefully without adding too many portability problems. Ideally this will all go away as well with #2632 as well. --- crates/runtime/src/traphandlers.rs | 70 ++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/crates/runtime/src/traphandlers.rs b/crates/runtime/src/traphandlers.rs index feaf2c4a9af4..a6d1d4ce01fe 100644 --- a/crates/runtime/src/traphandlers.rs +++ b/crates/runtime/src/traphandlers.rs @@ -118,6 +118,42 @@ cfg_if::cfg_if! { return false; } else if jmp_buf as usize == 1 { return true; + + // on macOS this is a bit special, unfortunately. If we were to + // `siglongjmp` out of the signal handler that notably does + // *not* reset the sigaltstack state of our signal handler. This + // seems to trick the kernel into thinking that the sigaltstack + // is still in use upon delivery of the next signal, meaning + // that the sigaltstack is not ever used again if we immediately + // call `Unwind` here. + // + // Note that if we use `longjmp` instead of `siglongjmp` then + // the problem is fixed. The problem with that, however, is that + // `setjmp` is much slower than `sigsetjmp` due to the + // preservation of the proceses signal mask. The reason + // `longjmp` appears to work is that it seems to call a function + // (according to published macOS sources) called + // `_sigunaltstack` which updates the kernel to say the + // sigaltstack is no longer in use. We ideally want to call that + // here but I don't think there's a stable way for us to call + // that. + // + // Given all that, on macOS only, we do the next best thing. We + // return from the signal handler after updating the register + // context. This will cause control to return to our + // `unwind_shim` function defined here which will perform the + // `Unwind` (`siglongjmp`) for us. The reason this works is that + // by returning from the signal handler we'll trigger all the + // normal machinery for "the signal handler is done running" + // which will clear the sigaltstack flag and allow reusing it + // for the next signal. Then upon resuming in our custom code we + // blow away the stack anyway with a longjmp. + } else if cfg!(target_os = "macos") { + unsafe extern "C" fn unwind_shim(jmp_buf: *const u8) { + Unwind(jmp_buf) + } + set_pc(context, unwind_shim as usize, jmp_buf as usize); + return true; } else { Unwind(jmp_buf) } @@ -181,6 +217,40 @@ cfg_if::cfg_if! { } } } + + // This is only used on macOS targets for calling an unwinding shim + // function to ensure that we return from the signal handler. + // + // See more comments above where this is called for what it's doing. + unsafe fn set_pc(cx: *mut libc::c_void, pc: usize, arg1: usize) { + cfg_if::cfg_if! { + if #[cfg(not(target_os = "macos"))] { + unreachable!(); // not used on these platforms + } else if #[cfg(target_arch = "x86_64")] { + let cx = &mut *(cx as *mut libc::ucontext_t); + (*cx.uc_mcontext).__ss.__rip = pc as u64; + (*cx.uc_mcontext).__ss.__rdi = arg1 as u64; + // We're simulating a "pseudo-call" so we need to ensure + // stack alignment is properly respected, notably that on a + // `call` instruction the stack is 8/16-byte aligned, then + // the function adjusts itself to be 16-byte aligned. + // + // Most of the time the stack pointer is 16-byte aligned at + // the time of the trap but for more robust-ness with JIT + // code where it may ud2 in a prologue check before the + // stack is aligned we double-check here. + if (*cx.uc_mcontext).__ss.__rsp % 16 == 0 { + (*cx.uc_mcontext).__ss.__rsp -= 8; + } + } else if #[cfg(target_arch = "aarch64")] { + let cx = &mut *(cx as *mut libc::ucontext_t); + (*cx.uc_mcontext).__ss.__pc = pc as u64; + (*cx.uc_mcontext).__ss.__x[0] = arg1 as u64; + } else { + compile_error!("unsupported macos target architecture"); + } + } + } } else if #[cfg(target_os = "windows")] { use winapi::um::errhandlingapi::*; use winapi::um::winnt::*;