Skip to content

Commit

Permalink
Optimize Func::call and its C API
Browse files Browse the repository at this point in the history
This commit is an alternative to #3298 which achieves effectively the
same goal of optimizing the `Func::call` API as well as its C API
sibling of `wasmtime_func_call`. The strategy taken here is different
than #3298 though where a new API isn't created, rather a small tweak to
an existing API is done. Specifically this commit handles the major
sources of slowness with `Func::call` with:

* Looking up the type of a function, to typecheck the arguments with and
  use to guide how the results should be loaded, no longer hits the
  rwlock in the `Engine` but instead each `Func` contains its own
  `FuncType`. This can be an unnecessary allocation for funcs not used
  with `Func::call`, so this is a downside of this implementation
  relative to #3298. A mitigating factor, though, is that instance
  exports are loaded lazily into the `Store` and in theory not too many
  funcs are active in the store as `Func` objects.

* Temporary storage is amortized with a long-lived `Vec` in the `Store`
  rather than allocating a new vector on each call. This is basically
  the same strategy as #3294 only applied to different types in
  different places. Specifically `wasmtime::Store` now retains a
  `Vec<u128>` for `Func::call`, and the C API retains a `Vec<Val>` for
  calling `Func::call`.

* Finally, an API breaking change is made to `Func::call` and its type
  signature (as well as `Func::call_async`). Instead of returning
  `Box<[Val]>` as it did before this function now takes a
  `results: &mut [Val]` parameter. This allows the caller to manage the
  allocation and we can amortize-remove it in `wasmtime_func_call` by
  using space after the parameters in the `Vec<Val>` we're passing in.
  This change is naturally a breaking change and we'll want to consider
  it carefully, but mitigating factors are that most embeddings are
  likely using `TypedFunc::call` instead and this signature taking a
  mutable slice better aligns with `Func::new` which receives a mutable
  slice for the results.

Overall this change, in the benchmark of "call a nop function from the C
API" is not quite as good as #3298. It's still a bit slower, on the
order of 15ns, because there's lots of capacity checks around vectors
and the type checks are slightly less optimized than before. Overall
though this is still significantly better than today because allocations
and the rwlock to acquire the type information are both avoided. I
personally feel that this change is the best to do because it has less
of an API impact than #3298.
  • Loading branch information
alexcrichton committed Sep 8, 2021
1 parent 8ebaaf9 commit 5f37933
Show file tree
Hide file tree
Showing 26 changed files with 398 additions and 253 deletions.
64 changes: 40 additions & 24 deletions crates/c-api/src/func.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use crate::{
wasm_extern_t, wasm_functype_t, wasm_store_t, wasm_val_t, wasm_val_vec_t, wasmtime_error_t,
wasmtime_extern_t, wasmtime_val_t, wasmtime_val_union, CStoreContext, CStoreContextMut,
};
use anyhow::anyhow;
use std::ffi::c_void;
use std::mem::{self, MaybeUninit};
use std::panic::{self, AssertUnwindSafe};
Expand Down Expand Up @@ -109,6 +108,22 @@ pub unsafe extern "C" fn wasm_func_new_with_env(
})
}

/// Places the `args` into `dst` and additionally reserves space in `dst` for `results_size`
/// returns. The params/results slices are then returned separately.
fn translate_args<'a>(
dst: &'a mut Vec<Val>,
args: impl ExactSizeIterator<Item = Val>,
results_size: usize,
) -> (&'a [Val], &'a mut [Val]) {
debug_assert!(dst.is_empty());
let num_args = args.len();
dst.reserve(args.len() + results_size);
dst.extend(args);
dst.extend((0..results_size).map(|_| Val::null()));
let (a, b) = dst.split_at_mut(num_args);
(a, b)
}

#[no_mangle]
pub unsafe extern "C" fn wasm_func_call(
func: &mut wasm_func_t,
Expand All @@ -118,23 +133,20 @@ pub unsafe extern "C" fn wasm_func_call(
let f = func.func();
let results = (*results).as_uninit_slice();
let args = (*args).as_slice();
if results.len() != f.ty(func.ext.store.context()).results().len() {
return Box::into_raw(Box::new(wasm_trap_t::new(
anyhow!("wrong number of results provided").into(),
)));
}
let params = args.iter().map(|i| i.val()).collect::<Vec<_>>();
let mut dst = Vec::new();
let (wt_params, wt_results) =
translate_args(&mut dst, args.iter().map(|i| i.val()), results.len());

// We're calling arbitrary code here most of the time, and we in general
// want to try to insulate callers against bugs in wasmtime/wasi/etc if we
// can. As a result we catch panics here and transform them to traps to
// allow the caller to have any insulation possible against Rust panics.
let result = panic::catch_unwind(AssertUnwindSafe(|| {
f.call(func.ext.store.context_mut(), &params)
f.call(func.ext.store.context_mut(), wt_params, wt_results)
}));
match result {
Ok(Ok(out)) => {
for (slot, val) in results.iter_mut().zip(out.into_vec().into_iter()) {
Ok(Ok(())) => {
for (slot, val) in results.iter_mut().zip(wt_results.iter().cloned()) {
crate::initialize(slot, wasm_val_t::from_val(val));
}
ptr::null_mut()
Expand Down Expand Up @@ -261,35 +273,39 @@ pub(crate) unsafe fn c_callback_to_rust_fn(

#[no_mangle]
pub unsafe extern "C" fn wasmtime_func_call(
store: CStoreContextMut<'_>,
mut store: CStoreContextMut<'_>,
func: &Func,
args: *const wasmtime_val_t,
nargs: usize,
results: *mut MaybeUninit<wasmtime_val_t>,
nresults: usize,
trap_ret: &mut *mut wasm_trap_t,
) -> Option<Box<wasmtime_error_t>> {
if nresults != func.ty(&store).results().len() {
return Some(Box::new(wasmtime_error_t::from(anyhow!(
"wrong number of results provided"
))));
}
let params = crate::slice_from_raw_parts(args, nargs)
.iter()
.map(|i| i.to_val())
.collect::<Vec<_>>();
let mut store = store.as_context_mut();
let mut params = mem::take(&mut store.data_mut().wasm_val_storage);
let (wt_params, wt_results) = translate_args(
&mut params,
crate::slice_from_raw_parts(args, nargs)
.iter()
.map(|i| i.to_val()),
nresults,
);

// We're calling arbitrary code here most of the time, and we in general
// want to try to insulate callers against bugs in wasmtime/wasi/etc if we
// can. As a result we catch panics here and transform them to traps to
// allow the caller to have any insulation possible against Rust panics.
let result = panic::catch_unwind(AssertUnwindSafe(|| func.call(store, &params)));
let result = panic::catch_unwind(AssertUnwindSafe(|| {
func.call(&mut store, wt_params, wt_results)
}));
match result {
Ok(Ok(out)) => {
Ok(Ok(())) => {
let results = crate::slice_from_raw_parts_mut(results, nresults);
for (slot, val) in results.iter_mut().zip(out.into_vec().into_iter()) {
crate::initialize(slot, wasmtime_val_t::from_val(val));
for (slot, val) in results.iter_mut().zip(wt_results.iter()) {
crate::initialize(slot, wasmtime_val_t::from_val(val.clone()));
}
params.truncate(0);
store.data_mut().wasm_val_storage = params;
None
}
Ok(Err(trap)) => match trap.downcast::<Trap>() {
Expand Down
9 changes: 8 additions & 1 deletion crates/c-api/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use crate::{wasm_engine_t, wasmtime_error_t, wasmtime_val_t, ForeignData};
use std::cell::UnsafeCell;
use std::ffi::c_void;
use std::sync::Arc;
use wasmtime::{AsContext, AsContextMut, InterruptHandle, Store, StoreContext, StoreContextMut};
use wasmtime::{
AsContext, AsContextMut, InterruptHandle, Store, StoreContext, StoreContextMut, Val,
};

/// This representation of a `Store` is used to implement the `wasm.h` API.
///
Expand Down Expand Up @@ -71,6 +73,10 @@ pub struct StoreData {
/// Temporary storage for usage during a wasm->host call to store values
/// in a slice we pass to the C API.
pub hostcall_val_storage: Vec<wasmtime_val_t>,

/// Temporary storage for usage during host->wasm calls, same as above but
/// for a different direction.
pub wasm_val_storage: Vec<Val>,
}

#[no_mangle]
Expand All @@ -90,6 +96,7 @@ pub extern "C" fn wasmtime_store_new(
#[cfg(feature = "wasi")]
wasi: None,
hostcall_val_storage: Vec::new(),
wasm_val_storage: Vec::new(),
},
),
})
Expand Down
27 changes: 17 additions & 10 deletions crates/fuzzing/src/oracles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,10 @@ pub fn differential_execution(

let ty = f.ty(&store);
let params = dummy::dummy_values(ty.params());
let mut results = vec![Val::I32(0); ty.results().len()];
let this_result = f
.call(&mut store, &params)
.call(&mut store, &params, &mut results)
.map(|()| results.into())
.map_err(|e| e.downcast::<Trap>().unwrap());

let existing_result = export_func_results
Expand All @@ -312,7 +314,7 @@ pub fn differential_execution(
match instance.get_export(&mut *store, "hangLimitInitializer") {
None => return,
Some(Extern::Func(f)) => {
f.call(store, &[])
f.call(store, &[], &mut [])
.expect("initializing the hang limit should not fail");
}
Some(_) => panic!("unexpected hangLimitInitializer export"),
Expand Down Expand Up @@ -478,7 +480,8 @@ pub fn make_api_calls(api: crate::generators::api::ApiCalls) {
let f = &funcs[nth];
let ty = f.ty(&store);
let params = dummy::dummy_values(ty.params());
let _ = f.call(store, &params);
let mut results = vec![Val::I32(0); ty.results().len()];
let _ = f.call(store, &params, &mut results);
}
}
}
Expand Down Expand Up @@ -549,7 +552,7 @@ pub fn table_ops(
let args: Vec<_> = (0..ops.num_params())
.map(|_| Val::ExternRef(Some(ExternRef::new(CountDrops(num_dropped.clone())))))
.collect();
let _ = run.call(&mut store, &args);
let _ = run.call(&mut store, &args, &mut []);
}

assert_eq!(num_dropped.load(SeqCst), ops.num_params() as usize);
Expand Down Expand Up @@ -653,7 +656,7 @@ pub fn differential_wasmi_execution(wasm: &[u8], config: &crate::generators::Con

// Introspect wasmtime module to find name of an exported function and of an
// exported memory.
let (func_name, _ty) = first_exported_function(&wasmtime_module)?;
let (func_name, ty) = first_exported_function(&wasmtime_module)?;
let memory_name = first_exported_memory(&wasmtime_module)?;

let wasmi_mem_export = wasmi_instance.export_by_name(memory_name).unwrap();
Expand All @@ -668,8 +671,10 @@ pub fn differential_wasmi_execution(wasm: &[u8], config: &crate::generators::Con
let wasmtime_main = wasmtime_instance
.get_func(&mut wasmtime_store, func_name)
.expect("function export is present");
let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, &[]);
let wasmtime_val = wasmtime_vals.map(|v| v.iter().next().cloned());
let mut wasmtime_results = vec![Val::I32(0); ty.results().len()];
let wasmtime_val = wasmtime_main
.call(&mut wasmtime_store, &[], &mut wasmtime_results)
.map(|()| wasmtime_results.get(0).cloned());

debug!(
"Successful execution: wasmi returned {:?}, wasmtime returned {:?}",
Expand Down Expand Up @@ -831,15 +836,17 @@ fn run_in_wasmtime(
.context("Wasmtime cannot instantiate module")?;

// Find the first exported function.
let (func_name, _ty) =
let (func_name, ty) =
first_exported_function(&wasmtime_module).context("Cannot find exported function")?;
let wasmtime_main = wasmtime_instance
.get_func(&mut wasmtime_store, &func_name[..])
.expect("function export is present");

// Execute the function and return the values.
let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, params);
wasmtime_vals.map(|v| v.to_vec())
let mut results = vec![Val::I32(0); ty.results().len()];
wasmtime_main
.call(&mut wasmtime_store, params, &mut results)
.map(|()| results)
}

// Introspect wasmtime module to find the name of the first exported function.
Expand Down
16 changes: 9 additions & 7 deletions crates/fuzzing/src/oracles/v8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ pub fn differential_v8_execution(wasm: &[u8], config: &crate::generators::Config
let wasmtime_main = wasmtime_instance
.get_func(&mut wasmtime_store, func)
.expect("function export is present");
let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, &wasmtime_params);
let mut wasmtime_vals = vec![Val::I32(0); ty.results().len()];
let wasmtime_result =
wasmtime_main.call(&mut wasmtime_store, &wasmtime_params, &mut wasmtime_vals);
log::trace!("finished wasmtime invocation");

// V8: call the first exported func
Expand All @@ -112,23 +114,23 @@ pub fn differential_v8_execution(wasm: &[u8], config: &crate::generators::Config
log::trace!("finished v8 invocation");

// Verify V8 and wasmtime match
match (wasmtime_vals, v8_vals) {
(Ok(wasmtime), Ok(v8)) => {
match (wasmtime_result, v8_vals) {
(Ok(()), Ok(v8)) => {
log::trace!("both executed successfully");
match wasmtime.len() {
match wasmtime_vals.len() {
0 => assert!(v8.is_undefined()),
1 => assert_val_match(&wasmtime[0], &v8, &mut scope),
1 => assert_val_match(&wasmtime_vals[0], &v8, &mut scope),
_ => {
let array = v8::Local::<'_, v8::Array>::try_from(v8).unwrap();
for (i, wasmtime) in wasmtime.iter().enumerate() {
for (i, wasmtime) in wasmtime_vals.iter().enumerate() {
let v8 = array.get_index(&mut scope, i as u32).unwrap();
assert_val_match(wasmtime, &v8, &mut scope);
// ..
}
}
}
}
(Ok(_), Err(msg)) => {
(Ok(()), Err(msg)) => {
panic!("wasmtime succeeded at invocation, v8 failed: {}", msg)
}
(Err(err), Ok(_)) => {
Expand Down
Loading

0 comments on commit 5f37933

Please sign in to comment.