Skip to content

Commit

Permalink
Don't copy VMBuiltinFunctionsArray into each VMContext
Browse files Browse the repository at this point in the history
This is another PR along the lines of "let's squeeze all possible
performance we can out of instantiation". Before this PR we would copy,
by value, the contents of `VMBuiltinFunctionsArray` into each
`VMContext` allocated. This array of function pointers is modestly-sized
but growing over time as we add various intrinsics. Additionally it's
the exact same for all `VMContext` allocations.

This PR attempts to speed up instantiation slightly by instead storing
an indirection to the function array. This means that calling a builtin
intrinsic is a tad bit slower since it requires two loads instead of one
(one to get the base pointer, another to get the actual address).
Otherwise though `VMContext` initialization is now simply setting one
pointer instead of doing a `memcpy` from one location to another.

With some macro-magic this commit also replaces the previous
implementation with one that's more `const`-friendly which also gets us
compile-time type-checks of libcalls as well as compile-time
verification that all libcalls are defined.

Overall, as with bytecodealliance#3739, the win is very modest here. Locally I measured
a speedup from 1.9us to 1.7us taken to instantiate an empty module with
one function. While small at these scales it's still a 10% improvement!
  • Loading branch information
alexcrichton committed Jan 28, 2022
1 parent 34537a3 commit 30bc61d
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 109 deletions.
11 changes: 8 additions & 3 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,15 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
let mut mem_flags = ir::MemFlags::trusted();
mem_flags.set_readonly();

// Load the base of the array of builtin functions
let array_offset = i32::try_from(self.offsets.vmctx_builtin_functions()).unwrap();
let array_addr = pos.ins().load(pointer_type, mem_flags, base, array_offset);

// Load the callee address.
let body_offset =
i32::try_from(self.offsets.vmctx_builtin_function(callee_func_idx)).unwrap();
let func_addr = pos.ins().load(pointer_type, mem_flags, base, body_offset);
let body_offset = i32::try_from(callee_func_idx.index() * pointer_type.bytes()).unwrap();
let func_addr = pos
.ins()
.load(pointer_type, mem_flags, array_addr, body_offset);

(base, func_addr)
}
Expand Down
20 changes: 5 additions & 15 deletions crates/environ/src/vmoffsets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
// memories: [VMMemoryDefinition; module.num_defined_memories],
// globals: [VMGlobalDefinition; module.num_defined_globals],
// anyfuncs: [VMCallerCheckedAnyfunc; module.num_imported_functions + module.num_defined_functions],
// builtins: VMBuiltinFunctionsArray,
// builtins: *mut VMBuiltinFunctionsArray,
// }

use crate::{
BuiltinFunctionIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex,
GlobalIndex, MemoryIndex, Module, TableIndex, TypeIndex,
DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, GlobalIndex, MemoryIndex,
Module, TableIndex, TypeIndex,
};
use more_asserts::assert_lt;
use std::convert::TryFrom;
Expand Down Expand Up @@ -287,11 +287,7 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
.unwrap();
ret.size = ret
.builtin_functions
.checked_add(
BuiltinFunctionIndex::builtin_functions_total_number()
.checked_mul(u32::from(ret.pointer_size()))
.unwrap(),
)
.checked_add(u32::from(ret.pointer_size()))
.unwrap();

return ret;
Expand Down Expand Up @@ -597,7 +593,7 @@ impl<P: PtrSize> VMOffsets<P> {

/// The offset of the builtin functions array.
#[inline]
pub fn vmctx_builtin_functions_begin(&self) -> u32 {
pub fn vmctx_builtin_functions(&self) -> u32 {
self.builtin_functions
}

Expand Down Expand Up @@ -739,12 +735,6 @@ impl<P: PtrSize> VMOffsets<P> {
pub fn vmctx_vmglobal_import_from(&self, index: GlobalIndex) -> u32 {
self.vmctx_vmglobal_import(index) + u32::from(self.vmglobal_import_from())
}

/// Return the offset to builtin function in `VMBuiltinFunctionsArray` index `index`.
#[inline]
pub fn vmctx_builtin_function(&self, index: BuiltinFunctionIndex) -> u32 {
self.vmctx_builtin_functions_begin() + index.index() * u32::from(self.pointer_size())
}
}

/// Offsets for `VMExternData`.
Expand Down
6 changes: 2 additions & 4 deletions crates/runtime/src/instance/allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,10 +481,8 @@ unsafe fn initialize_vmcontext(instance: &mut Instance, req: InstanceAllocationR
}

// Initialize the built-in functions
ptr::write(
instance.vmctx_plus_offset(instance.offsets.vmctx_builtin_functions_begin()),
VMBuiltinFunctionsArray::initialized(),
);
*instance.vmctx_plus_offset(instance.offsets.vmctx_builtin_functions()) =
VMBuiltinFunctionsArray::new();

// Initialize the imports
debug_assert_eq!(req.imports.functions.len(), module.num_imported_funcs);
Expand Down
69 changes: 38 additions & 31 deletions crates/runtime/src/libcalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,14 @@ pub extern "C" fn wasmtime_f64_nearest(x: f64) -> f64 {
}

/// Implementation of memory.grow for locally-defined 32-bit memories.
pub unsafe extern "C" fn wasmtime_memory32_grow(
pub unsafe extern "C" fn memory32_grow(
vmctx: *mut VMContext,
delta: u64,
memory_index: u32,
) -> usize {
) -> *mut u8 {
// Memory grow can invoke user code provided in a ResourceLimiter{,Async},
// so we need to catch a possible panic
match std::panic::catch_unwind(|| {
let ret = match std::panic::catch_unwind(|| {
let instance = (*vmctx).instance_mut();
let memory_index = MemoryIndex::from_u32(memory_index);
instance.memory_grow(memory_index, delta)
Expand All @@ -201,11 +201,12 @@ pub unsafe extern "C" fn wasmtime_memory32_grow(
Ok(Ok(None)) => usize::max_value(),
Ok(Err(err)) => crate::traphandlers::raise_user_trap(err),
Err(p) => resume_panic(p),
}
};
ret as *mut u8
}

/// Implementation of `table.grow`.
pub unsafe extern "C" fn wasmtime_table_grow(
pub unsafe extern "C" fn table_grow(
vmctx: *mut VMContext,
table_index: u32,
delta: u32,
Expand Down Expand Up @@ -238,8 +239,11 @@ pub unsafe extern "C" fn wasmtime_table_grow(
}
}

pub use table_grow as table_grow_funcref;
pub use table_grow as table_grow_externref;

/// Implementation of `table.fill`.
pub unsafe extern "C" fn wasmtime_table_fill(
pub unsafe extern "C" fn table_fill(
vmctx: *mut VMContext,
table_index: u32,
dst: u32,
Expand Down Expand Up @@ -272,8 +276,11 @@ pub unsafe extern "C" fn wasmtime_table_fill(
}
}

pub use table_fill as table_fill_funcref;
pub use table_fill as table_fill_externref;

/// Implementation of `table.copy`.
pub unsafe extern "C" fn wasmtime_table_copy(
pub unsafe extern "C" fn table_copy(
vmctx: *mut VMContext,
dst_table_index: u32,
src_table_index: u32,
Expand All @@ -295,7 +302,7 @@ pub unsafe extern "C" fn wasmtime_table_copy(
}

/// Implementation of `table.init`.
pub unsafe extern "C" fn wasmtime_table_init(
pub unsafe extern "C" fn table_init(
vmctx: *mut VMContext,
table_index: u32,
elem_index: u32,
Expand All @@ -315,14 +322,14 @@ pub unsafe extern "C" fn wasmtime_table_init(
}

/// Implementation of `elem.drop`.
pub unsafe extern "C" fn wasmtime_elem_drop(vmctx: *mut VMContext, elem_index: u32) {
pub unsafe extern "C" fn elem_drop(vmctx: *mut VMContext, elem_index: u32) {
let elem_index = ElemIndex::from_u32(elem_index);
let instance = (*vmctx).instance_mut();
instance.elem_drop(elem_index);
}

/// Implementation of `memory.copy` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_copy(
pub unsafe extern "C" fn memory_copy(
vmctx: *mut VMContext,
dst_index: u32,
dst: u64,
Expand All @@ -342,7 +349,7 @@ pub unsafe extern "C" fn wasmtime_memory_copy(
}

/// Implementation of `memory.fill` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_fill(
pub unsafe extern "C" fn memory_fill(
vmctx: *mut VMContext,
memory_index: u32,
dst: u64,
Expand All @@ -360,7 +367,7 @@ pub unsafe extern "C" fn wasmtime_memory_fill(
}

/// Implementation of `memory.init`.
pub unsafe extern "C" fn wasmtime_memory_init(
pub unsafe extern "C" fn memory_init(
vmctx: *mut VMContext,
memory_index: u32,
data_index: u32,
Expand All @@ -380,22 +387,22 @@ pub unsafe extern "C" fn wasmtime_memory_init(
}

/// Implementation of `data.drop`.
pub unsafe extern "C" fn wasmtime_data_drop(vmctx: *mut VMContext, data_index: u32) {
pub unsafe extern "C" fn data_drop(vmctx: *mut VMContext, data_index: u32) {
let data_index = DataIndex::from_u32(data_index);
let instance = (*vmctx).instance_mut();
instance.data_drop(data_index)
}

/// Drop a `VMExternRef`.
pub unsafe extern "C" fn wasmtime_drop_externref(externref: *mut u8) {
pub unsafe extern "C" fn drop_externref(externref: *mut u8) {
let externref = externref as *mut crate::externref::VMExternData;
let externref = NonNull::new(externref).unwrap();
crate::externref::VMExternData::drop_and_dealloc(externref);
}

/// Do a GC and insert the given `externref` into the
/// `VMExternRefActivationsTable`.
pub unsafe extern "C" fn wasmtime_activations_table_insert_with_gc(
pub unsafe extern "C" fn activations_table_insert_with_gc(
vmctx: *mut VMContext,
externref: *mut u8,
) {
Expand All @@ -416,10 +423,7 @@ pub unsafe extern "C" fn wasmtime_activations_table_insert_with_gc(
}

/// Perform a Wasm `global.get` for `externref` globals.
pub unsafe extern "C" fn wasmtime_externref_global_get(
vmctx: *mut VMContext,
index: u32,
) -> *mut u8 {
pub unsafe extern "C" fn externref_global_get(vmctx: *mut VMContext, index: u32) -> *mut u8 {
let index = GlobalIndex::from_u32(index);
let instance = (*vmctx).instance();
let global = instance.defined_or_imported_global_ptr(index);
Expand All @@ -436,7 +440,7 @@ pub unsafe extern "C" fn wasmtime_externref_global_get(
}

/// Perform a Wasm `global.set` for `externref` globals.
pub unsafe extern "C" fn wasmtime_externref_global_set(
pub unsafe extern "C" fn externref_global_set(
vmctx: *mut VMContext,
index: u32,
externref: *mut u8,
Expand All @@ -460,13 +464,14 @@ pub unsafe extern "C" fn wasmtime_externref_global_set(
}

/// Implementation of `memory.atomic.notify` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_atomic_notify(
pub unsafe extern "C" fn memory_atomic_notify(
vmctx: *mut VMContext,
memory_index: u32,
addr: usize,
addr: *mut u8,
_count: u32,
) -> u32 {
let result = {
let addr = addr as usize;
let memory = MemoryIndex::from_u32(memory_index);
let instance = (*vmctx).instance();
// this should never overflow since addr + 4 either hits a guard page
Expand All @@ -475,7 +480,7 @@ pub unsafe extern "C" fn wasmtime_memory_atomic_notify(
let addr_to_check = addr.checked_add(4).unwrap();
validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| {
Err(Trap::User(anyhow::anyhow!(
"unimplemented: wasm atomics (fn wasmtime_memory_atomic_notify) unsupported",
"unimplemented: wasm atomics (fn memory_atomic_notify) unsupported",
)))
})
};
Expand All @@ -486,22 +491,23 @@ pub unsafe extern "C" fn wasmtime_memory_atomic_notify(
}

/// Implementation of `memory.atomic.wait32` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_atomic_wait32(
pub unsafe extern "C" fn memory_atomic_wait32(
vmctx: *mut VMContext,
memory_index: u32,
addr: usize,
addr: *mut u8,
_expected: u32,
_timeout: u64,
) -> u32 {
let result = {
let addr = addr as usize;
let memory = MemoryIndex::from_u32(memory_index);
let instance = (*vmctx).instance();
// see wasmtime_memory_atomic_notify for why this shouldn't overflow
// but we still double-check
let addr_to_check = addr.checked_add(4).unwrap();
validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| {
Err(Trap::User(anyhow::anyhow!(
"unimplemented: wasm atomics (fn wasmtime_memory_atomic_wait32) unsupported",
"unimplemented: wasm atomics (fn memory_atomic_wait32) unsupported",
)))
})
};
Expand All @@ -512,22 +518,23 @@ pub unsafe extern "C" fn wasmtime_memory_atomic_wait32(
}

/// Implementation of `memory.atomic.wait64` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_atomic_wait64(
pub unsafe extern "C" fn memory_atomic_wait64(
vmctx: *mut VMContext,
memory_index: u32,
addr: usize,
addr: *mut u8,
_expected: u64,
_timeout: u64,
) -> u32 {
let result = {
let addr = addr as usize;
let memory = MemoryIndex::from_u32(memory_index);
let instance = (*vmctx).instance();
// see wasmtime_memory_atomic_notify for why this shouldn't overflow
// but we still double-check
let addr_to_check = addr.checked_add(8).unwrap();
validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| {
Err(Trap::User(anyhow::anyhow!(
"unimplemented: wasm atomics (fn wasmtime_memory_atomic_wait64) unsupported",
"unimplemented: wasm atomics (fn memory_atomic_wait64) unsupported",
)))
})
};
Expand Down Expand Up @@ -561,15 +568,15 @@ unsafe fn validate_atomic_addr(
}

/// Hook for when an instance runs out of fuel.
pub unsafe extern "C" fn wasmtime_out_of_gas(vmctx: *mut VMContext) {
pub unsafe extern "C" fn out_of_gas(vmctx: *mut VMContext) {
match (*(*vmctx).instance().store()).out_of_gas() {
Ok(()) => {}
Err(err) => crate::traphandlers::raise_user_trap(err),
}
}

/// Hook for when an instance observes that the epoch has changed.
pub unsafe extern "C" fn wasmtime_new_epoch(vmctx: *mut VMContext) -> u64 {
pub unsafe extern "C" fn new_epoch(vmctx: *mut VMContext) -> u64 {
match (*(*vmctx).instance().store()).new_epoch() {
Ok(new_deadline) => new_deadline,
Err(err) => crate::traphandlers::raise_user_trap(err),
Expand Down
Loading

0 comments on commit 30bc61d

Please sign in to comment.