Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More optimizations for calling into WebAssembly #2759

Merged
merged 4 commits into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion crates/runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ more-asserts = "0.2.1"
cfg-if = "1.0"
backtrace = "0.3.55"
lazy_static = "1.3.0"
psm = "0.1.11"
rand = "0.8.3"
anyhow = "1.0.38"

Expand Down
86 changes: 21 additions & 65 deletions crates/runtime/src/externref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,13 +522,13 @@ pub struct VMExternRefActivationsTable {
/// than create a new hash set every GC.
precise_stack_roots: RefCell<HashSet<VMExternRefWithTraits>>,

/// A pointer to a `u8` on the youngest host stack frame before we called
/// A pointer to the youngest host stack frame before we called
/// into Wasm for the first time. When walking the stack in garbage
/// collection, if we don't find this frame, then we failed to walk every
/// Wasm stack frame, which means we failed to find all on-stack,
/// inside-a-Wasm-frame roots, and doing a GC could lead to freeing one of
/// those missed roots, and use after free.
stack_canary: Cell<Option<NonNull<u8>>>,
stack_canary: Cell<Option<usize>>,
}

impl VMExternRefActivationsTable {
Expand Down Expand Up @@ -717,73 +717,29 @@ impl VMExternRefActivationsTable {
}
}

/// Set the stack canary around a call into Wasm.
/// Fetches the current value of this table's stack canary.
///
/// The return value should not be dropped until after the Wasm call has
/// returned.
/// This should only be used in conjunction with setting the stack canary
/// below if the return value is `None` typically. This is called from RAII
/// guards in `wasmtime::func::invoke_wasm_and_catch_traps`.
///
/// While this method is always safe to call (or not call), it is unsafe to
/// call the `wasmtime_runtime::gc` function unless this method is called at
/// the proper times and its return value properly outlives its Wasm call.
///
/// For `gc` to be safe, this is only *strictly required* to surround the
/// oldest host-->Wasm stack frame transition on this thread, but repeatedly
/// calling it is idempotent and cheap, so it is recommended to call this
/// for every host-->Wasm call.
///
/// # Example
///
/// ```no_run
/// use wasmtime_runtime::*;
///
/// # let get_table_from_somewhere = || unimplemented!();
/// let table: &VMExternRefActivationsTable = get_table_from_somewhere();
///
/// // Set the canary before a Wasm call. The canary should always be a
/// // local on the stack.
/// let canary = 0;
/// let auto_reset_canary = table.set_stack_canary(&canary);
/// For more information on canaries see the gc functions below.
#[inline]
pub fn stack_canary(&self) -> Option<usize> {
self.stack_canary.get()
}

/// Sets the current value of the stack canary.
///
/// // Do the call into Wasm.
/// # let call_into_wasm = || unimplemented!();
/// call_into_wasm();
/// This is called from RAII guards in
/// `wasmtime::func::invoke_wasm_and_catch_traps`. This is used to update
/// the stack canary to a concrete value and then reset it back to `None`
/// when wasm is finished.
///
/// // Only drop the value returned by `set_stack_canary` after the Wasm
/// // call has returned.
/// drop(auto_reset_canary);
/// ```
/// For more information on canaries see the gc functions below.
#[inline]
pub fn set_stack_canary<'a>(&'a self, canary: &u8) -> impl Drop + 'a {
let should_reset = if self.stack_canary.get().is_none() {
let canary = canary as *const u8 as *mut u8;
self.stack_canary.set(Some(unsafe {
debug_assert!(!canary.is_null());
NonNull::new_unchecked(canary)
}));
true
} else {
false
};

return AutoResetCanary {
table: self,
should_reset,
};

struct AutoResetCanary<'a> {
table: &'a VMExternRefActivationsTable,
should_reset: bool,
}

impl Drop for AutoResetCanary<'_> {
#[inline]
fn drop(&mut self) {
if self.should_reset {
debug_assert!(self.table.stack_canary.get().is_some());
self.table.stack_canary.set(None);
}
}
}
pub fn set_stack_canary(&self, canary: Option<usize>) {
self.stack_canary.set(canary);
}
}

Expand Down Expand Up @@ -1066,7 +1022,7 @@ pub unsafe fn gc(
log::debug!("end GC");
return;
}
Some(canary) => canary.as_ptr() as usize,
Some(canary) => canary,
};

// There is a stack canary, so there must be Wasm frames on the stack. The
Expand Down
145 changes: 27 additions & 118 deletions crates/runtime/src/traphandlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::cell::{Cell, UnsafeCell};
use std::error::Error;
use std::mem::MaybeUninit;
use std::ptr;
use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
use std::sync::atomic::Ordering::SeqCst;
use std::sync::Once;
use wasmtime_environ::ir;

Expand Down Expand Up @@ -38,19 +38,32 @@ cfg_if::cfg_if! {

pub use sys::SignalHandler;

/// This function performs the low-overhead platform-specific initialization
/// that we want to do eagerly to ensure a more-deterministic global process
/// state.
/// Globally-set callback to determine whether a program counter is actually a
/// wasm trap.
///
/// This is especially relevant for signal handlers since handler ordering
/// depends on installation order: the wasm signal handler must run *before*
/// the other crash handlers and since POSIX signal handlers work LIFO, this
/// function needs to be called at the end of the startup process, after other
/// handlers have been installed. This function can thus be called multiple
/// times, having no effect after the first call.
pub fn init_traps() -> Result<(), Trap> {
/// This is initialized during `init_traps` below. The definition lives within
/// `wasmtime` currently.
static mut IS_WASM_PC: fn(usize) -> bool = |_| false;

/// This function is required to be called before any WebAssembly is entered.
/// This will configure global state such as signal handlers to prepare the
/// process to receive wasm traps.
///
/// This function must not only be called globally once before entering
/// WebAssembly but it must also be called once-per-thread that enters
/// WebAssembly. Currently in wasmtime's integration this function is called on
/// creation of a `Store`.
///
/// The `is_wasm_pc` argument is used when a trap happens to determine if a
/// program counter is the pc of an actual wasm trap or not. This is then used
/// to disambiguate faults that happen due to wasm and faults that happen due to
/// bugs in Rust or elsewhere.
pub fn init_traps(is_wasm_pc: fn(usize) -> bool) -> Result<(), Trap> {
static INIT: Once = Once::new();
INIT.call_once(|| unsafe { sys::platform_init() });
INIT.call_once(|| unsafe {
IS_WASM_PC = is_wasm_pc;
sys::platform_init();
});
sys::lazy_per_thread_init()
}

Expand Down Expand Up @@ -208,19 +221,11 @@ pub unsafe trait TrapInfo {
/// Converts this object into an `Any` to dynamically check its type.
fn as_any(&self) -> &dyn Any;

/// Returns whether the given program counter lies within wasm code,
/// indicating whether we should handle a trap or not.
fn is_wasm_trap(&self, pc: usize) -> bool;

/// Uses `call` to call a custom signal handler, if one is specified.
///
/// Returns `true` if `call` returns true, otherwise returns `false`.
fn custom_signal_handler(&self, call: &dyn Fn(&SignalHandler) -> bool) -> bool;

/// Returns the maximum size, in bytes, the wasm native stack is allowed to
/// grow to.
fn max_wasm_stack(&self) -> usize;

/// Callback invoked whenever WebAssembly has entirely consumed the fuel
/// that it was allotted.
///
Expand Down Expand Up @@ -251,7 +256,6 @@ impl<'a> CallThreadState<'a> {
}

fn with(self, closure: impl FnOnce(&CallThreadState) -> i32) -> Result<(), Trap> {
let _reset = self.update_stack_limit()?;
let ret = tls::set(&self, || closure(&self));
if ret != 0 {
return Ok(());
Expand All @@ -273,98 +277,6 @@ impl<'a> CallThreadState<'a> {
}
}

/// Checks and/or initializes the wasm native call stack limit.
///
/// This function will inspect the current state of the stack and calling
/// context to determine which of three buckets we're in:
///
/// 1. We are the first wasm call on the stack. This means that we need to
/// set up a stack limit where beyond which if the native wasm stack
/// pointer goes beyond forces a trap. For now we simply reserve an
/// arbitrary chunk of bytes (1 MB from roughly the current native stack
/// pointer). This logic will likely get tweaked over time.
///
/// 2. We aren't the first wasm call on the stack. In this scenario the wasm
/// stack limit is already configured. This case of wasm -> host -> wasm
/// we assume that the native stack consumed by the host is accounted for
/// in the initial stack limit calculation. That means that in this
/// scenario we do nothing.
///
/// 3. We were previously interrupted. In this case we consume the interrupt
/// here and return a trap, clearing the interrupt and allowing the next
/// wasm call to proceed.
///
/// The return value here is a trap for case 3, a noop destructor in case 2,
/// and a meaningful destructor in case 1
///
/// For more information about interrupts and stack limits see
/// `crates/environ/src/cranelift.rs`.
///
/// Note that this function must be called with `self` on the stack, not the
/// heap/etc.
#[inline]
fn update_stack_limit(&self) -> Result<impl Drop + '_, Trap> {
// Determine the stack pointer where, after which, any wasm code will
// immediately trap. This is checked on the entry to all wasm functions.
//
// Note that this isn't 100% precise. We are requested to give wasm
// `max_wasm_stack` bytes, but what we're actually doing is giving wasm
// probably a little less than `max_wasm_stack` because we're
// calculating the limit relative to this function's approximate stack
// pointer. Wasm will be executed on a frame beneath this one (or next
// to it). In any case it's expected to be at most a few hundred bytes
// of slop one way or another. When wasm is typically given a MB or so
// (a million bytes) the slop shouldn't matter too much.
let wasm_stack_limit = psm::stack_pointer() as usize - self.trap_info.max_wasm_stack();

let interrupts = self.trap_info.interrupts();
let reset_stack_limit = match interrupts.stack_limit.compare_exchange(
usize::max_value(),
wasm_stack_limit,
SeqCst,
SeqCst,
) {
Ok(_) => {
// We're the first wasm on the stack so we've now reserved the
// `max_wasm_stack` bytes of native stack space for wasm.
// Nothing left to do here now except reset back when we're
// done.
true
}
Err(n) if n == wasmtime_environ::INTERRUPTED => {
// This means that an interrupt happened before we actually
// called this function, which means that we're now
// considered interrupted. Be sure to consume this interrupt
// as part of this process too.
interrupts.stack_limit.store(usize::max_value(), SeqCst);
return Err(Trap::Wasm {
trap_code: ir::TrapCode::Interrupt,
backtrace: Backtrace::new_unresolved(),
});
}
Err(_) => {
// The stack limit was previously set by a previous wasm
// call on the stack. We leave the original stack limit for
// wasm in place in that case, and don't reset the stack
// limit when we're done.
false
}
};

struct Reset<'a>(bool, &'a AtomicUsize);

impl Drop for Reset<'_> {
#[inline]
fn drop(&mut self) {
if self.0 {
self.1.store(usize::max_value(), SeqCst);
}
}
}

Ok(Reset(reset_stack_limit, &interrupts.stack_limit))
}

fn unwind_with(&self, reason: UnwindReason) -> ! {
unsafe {
(*self.unwind.get()).as_mut_ptr().write(reason);
Expand All @@ -387,6 +299,7 @@ impl<'a> CallThreadState<'a> {
/// instance, and the trap handler should quickly return.
/// * a different pointer - a jmp_buf buffer to longjmp to, meaning that
/// the wasm trap was succesfully handled.
#[cfg_attr(target_os = "macos", allow(dead_code))] // macOS is more raw and doesn't use this
fn jmp_buf_if_trap(
&self,
pc: *const u8,
Expand Down Expand Up @@ -415,7 +328,7 @@ impl<'a> CallThreadState<'a> {
}

// If this fault wasn't in wasm code, then it's not our problem
if !self.trap_info.is_wasm_trap(pc as usize) {
if unsafe { !IS_WASM_PC(pc as usize) } {
return ptr::null();
}

Expand Down Expand Up @@ -480,10 +393,6 @@ mod tls {

#[inline(never)] // see module docs for why this is here
pub fn replace(val: Ptr) -> Ptr {
// Mark the current thread as handling interrupts for this specific
// CallThreadState: may clobber the previous entry.
super::super::sys::register_tls(val);

PTR.with(|p| p.replace(val))
}

Expand Down
Loading