From daa6de315d086d2f449d5b15a3408838387321c9 Mon Sep 17 00:00:00 2001 From: Robin Freyler Date: Wed, 28 Sep 2022 11:55:58 +0200 Subject: [PATCH] Add `InstructionPtr` abstraction (take 2) (#482) * add InstructionPtr abstraction * remove lifetime from InstructionPtr for now We actually really want to have the lifetime here but it would complicate the Stack datastructure too much so we remove it for now. * make use of the new InstructionPtr in FuncFrame * add better test getter API to CodeMap * remove unnecessary end field from InstructionRef * inline some important hot path methods * fix doc links --- crates/wasmi/src/engine/code_map.rs | 95 +++++++++++++++---------- crates/wasmi/src/engine/executor.rs | 51 ++++++++----- crates/wasmi/src/engine/mod.rs | 11 +-- crates/wasmi/src/engine/stack/frames.rs | 48 +++++-------- crates/wasmi/src/engine/stack/mod.rs | 11 +-- 5 files changed, 117 insertions(+), 99 deletions(-) diff --git a/crates/wasmi/src/engine/code_map.rs b/crates/wasmi/src/engine/code_map.rs index 60e43fc9f3..1a3b22c9a4 100644 --- a/crates/wasmi/src/engine/code_map.rs +++ b/crates/wasmi/src/engine/code_map.rs @@ -2,6 +2,7 @@ use super::{super::Index, Instruction}; use alloc::vec::Vec; +use core::ptr::NonNull; /// A reference to a Wasm function body stored in the [`CodeMap`]. #[derive(Debug, Copy, Clone)] @@ -17,13 +18,11 @@ impl Index for FuncBody { } } -/// A reference to the [`Instructions`] of a [`FuncBody`]. +/// A reference to the instructions of a compiled Wasm function. #[derive(Debug, Copy, Clone)] pub struct InstructionsRef { /// The start index in the instructions array. start: usize, - /// The end index in the instructions array. - end: usize, } /// Meta information about a compiled function. @@ -88,8 +87,7 @@ impl CodeMap { { let start = self.insts.len(); self.insts.extend(insts); - let end = self.insts.len(); - let iref = InstructionsRef { start, end }; + let iref = InstructionsRef { start }; let header = FuncHeader { iref, len_locals, @@ -100,56 +98,77 @@ impl CodeMap { FuncBody(header_index) } - /// Resolves the instructions given an [`InstructionsRef`]. - pub fn insts(&self, iref: InstructionsRef) -> Instructions { - Instructions { - insts: &self.insts[iref.start..iref.end], - } + /// Returns an [`InstructionPtr`] to the instruction at [`InstructionsRef`]. + #[inline] + pub fn instr_ptr(&self, iref: InstructionsRef) -> InstructionPtr { + InstructionPtr::new(&self.insts[iref.start]) } /// Returns the [`FuncHeader`] of the [`FuncBody`]. pub fn header(&self, func_body: FuncBody) -> &FuncHeader { &self.headers[func_body.0] } + + /// Resolves the instruction at `index` of the compiled [`FuncBody`]. + #[cfg(test)] + pub fn get_instr(&self, func_body: FuncBody, index: usize) -> Option<&Instruction> { + let header = self.header(func_body); + let start = header.iref.start; + let end = self.instr_end(func_body); + let instrs = &self.insts[start..end]; + instrs.get(index) + } + + /// Returns the `end` index of the instructions of [`FuncBody`]. + /// + /// This is important to synthesize how many instructions there are in + /// the function referred to by [`FuncBody`]. + #[cfg(test)] + pub fn instr_end(&self, func_body: FuncBody) -> usize { + self.headers + .get(func_body.0 + 1) + .map(|header| header.iref.start) + .unwrap_or(self.insts.len()) + } } -/// The instructions of a resolved [`FuncBody`]. +/// The instruction pointer to the instruction of a function on the call stack. #[derive(Debug, Copy, Clone)] -pub struct Instructions<'a> { - insts: &'a [Instruction], +pub struct InstructionPtr { + /// The pointer to the instruction. + ptr: NonNull, } -impl<'a> Instructions<'a> { - /// Returns the instruction at the given index. +impl InstructionPtr { + /// Creates a new [`InstructionPtr`] for `instr`. + pub fn new(instr: &Instruction) -> Self { + Self { + ptr: NonNull::from(instr), + } + } + + /// Offset the [`InstructionPtr`] by the given value. /// - /// # Panics + /// # Safety /// - /// If there is no instruction at the given index. - #[cfg(test)] - pub fn get(&self, index: usize) -> Option<&Instruction> { - self.insts.get(index) + /// The caller is responsible for calling this method only with valid + /// offset values so that the [`InstructionPtr`] never points out of valid + /// bounds of the instructions of the same compiled Wasm function. + #[inline(always)] + pub unsafe fn offset(&mut self, by: isize) { + let new_ptr = &*self.ptr.as_ptr().offset(by); + self.ptr = NonNull::from(new_ptr); } - /// Returns a shared reference to the instruction at the given `pc`. + /// Returns a shared reference to the currently pointed at [`Instruction`]. /// - /// # Panics (Debug) + /// # Safety /// - /// Panics in debug mode if the `pc` is invalid for the [`Instructions`]. + /// The caller is responsible for calling this method only when it is + /// guaranteed that the [`InstructionPtr`] is validly pointing inside + /// the boundaries of its associated compiled Wasm function. #[inline(always)] - pub unsafe fn get_release_unchecked(&self, pc: usize) -> &'a Instruction { - debug_assert!( - self.insts.get(pc).is_some(), - "unexpectedly missing instruction at index {pc}", - ); - // # Safety - // - // This access is safe since all possible accesses have already been - // checked during Wasm validation. Functions and their instructions including - // jump addresses are immutable after Wasm function compilation and validation - // and therefore this bounds check can be safely eliminated. - // - // Note that eliminating this bounds check is extremely valuable since this - // part of the `wasmi` interpreter is part of the interpreter's hot path. - self.insts.get_unchecked(pc) + pub unsafe fn get(&self) -> &Instruction { + self.ptr.as_ref() } } diff --git a/crates/wasmi/src/engine/executor.rs b/crates/wasmi/src/engine/executor.rs index 8274289180..707eed1032 100644 --- a/crates/wasmi/src/engine/executor.rs +++ b/crates/wasmi/src/engine/executor.rs @@ -2,7 +2,7 @@ use super::{ super::{Memory, Table}, bytecode::{BranchParams, FuncIdx, GlobalIdx, Instruction, LocalDepth, Offset, SignatureIdx}, cache::InstanceCache, - code_map::Instructions, + code_map::InstructionPtr, stack::ValueStackRef, AsContextMut, CallOutcome, @@ -33,22 +33,19 @@ use wasmi_core::{memory_units::Pages, ExtendInto, LittleEndianConvert, UntypedVa pub fn execute_frame<'engine>( mut ctx: impl AsContextMut, value_stack: &'engine mut ValueStack, - instrs: Instructions<'engine>, cache: &'engine mut InstanceCache, frame: &mut FuncFrame, ) -> Result { - Executor::new(value_stack, instrs, ctx.as_context_mut(), cache, frame).execute() + Executor::new(value_stack, ctx.as_context_mut(), cache, frame).execute() } /// An execution context for executing a `wasmi` function frame. #[derive(Debug)] struct Executor<'ctx, 'engine, 'func, HostData> { - /// The program counter. - pc: usize, + /// The pointer to the currently executed instruction. + ip: InstructionPtr, /// Stores the value stack of live values on the Wasm stack. value_stack: ValueStackRef<'engine>, - /// The instructions of the executed function frame. - instrs: Instructions<'engine>, /// A mutable [`Store`] context. /// /// [`Store`]: [`crate::v1::Store`] @@ -64,18 +61,16 @@ impl<'ctx, 'engine, 'func, HostData> Executor<'ctx, 'engine, 'func, HostData> { #[inline(always)] pub fn new( value_stack: &'engine mut ValueStack, - instrs: Instructions<'engine>, ctx: StoreContextMut<'ctx, HostData>, cache: &'engine mut InstanceCache, frame: &'func mut FuncFrame, ) -> Self { cache.update_instance(frame.instance()); - let pc = frame.pc(); + let ip = frame.ip(); let value_stack = ValueStackRef::new(value_stack); Self { - pc, + ip, value_stack, - instrs, ctx, cache, frame, @@ -276,11 +271,11 @@ impl<'ctx, 'engine, 'func, HostData> Executor<'ctx, 'engine, 'func, HostData> { /// Returns the [`Instruction`] at the current program counter. #[inline(always)] - fn instr(&self) -> &'engine Instruction { + fn instr(&self) -> &Instruction { // # Safety // // Properly constructed `wasmi` bytecode can never produce invalid `pc`. - unsafe { self.instrs.get_release_unchecked(self.pc) } + unsafe { self.ip.get() } } /// Returns the default linear memory. @@ -478,17 +473,33 @@ impl<'ctx, 'engine, 'func, HostData> Executor<'ctx, 'engine, 'func, HostData> { } fn try_next_instr(&mut self) -> Result<(), TrapCode> { - self.pc += 1; + self.next_instr(); Ok(()) } fn next_instr(&mut self) { - self.pc += 1; + // Safety: This is safe since we carefully constructed the `wasmi` + // bytecode in conjunction with Wasm validation so that the + // offsets of the instruction pointer within the sequence of + // instructions never make the instruction pointer point out + // of bounds of the instructions that belong to the function + // that is currently executed. + unsafe { + self.ip.offset(1); + }; } fn branch_to(&mut self, target: BranchParams) { self.value_stack.drop_keep(target.drop_keep()); - self.pc = (self.pc as isize + target.offset().into_i32() as isize) as usize; + // Safety: This is safe since we carefully constructed the `wasmi` + // bytecode in conjunction with Wasm validation so that the + // offsets of the instruction pointer within the sequence of + // instructions never make the instruction pointer point out + // of bounds of the instructions that belong to the function + // that is currently executed. + unsafe { + self.ip.offset(target.offset().into_i32() as isize); + } } fn sync_stack_ptr(&mut self) { @@ -496,8 +507,8 @@ impl<'ctx, 'engine, 'func, HostData> Executor<'ctx, 'engine, 'func, HostData> { } fn call_func(&mut self, func: Func) -> Result { - self.pc += 1; - self.frame.update_pc(self.pc); + self.next_instr(); + self.frame.update_ip(self.ip); self.sync_stack_ptr(); Ok(CallOutcome::NestedCall(func)) } @@ -559,7 +570,9 @@ impl<'ctx, 'engine, 'func, HostData> Executor<'ctx, 'engine, 'func, HostData> { // A normalized index will always yield a target without panicking. let normalized_index = cmp::min(index as usize, max_index); // Update `pc`: - self.pc += normalized_index + 1; + unsafe { + self.ip.offset((normalized_index + 1) as isize); + } } fn visit_ret(&mut self, drop_keep: DropKeep) -> Result { diff --git a/crates/wasmi/src/engine/mod.rs b/crates/wasmi/src/engine/mod.rs index ada155a51e..973e36e9f4 100644 --- a/crates/wasmi/src/engine/mod.rs +++ b/crates/wasmi/src/engine/mod.rs @@ -173,9 +173,11 @@ impl Engine { /// If the [`FuncBody`] is invalid for the [`Engine`]. #[cfg(test)] pub(crate) fn resolve_inst(&self, func_body: FuncBody, index: usize) -> Option { - let this = self.inner.lock(); - let iref = this.code_map.header(func_body).iref(); - this.code_map.insts(iref).get(index).copied() + self.inner + .lock() + .code_map + .get_instr(func_body, index) + .copied() } /// Executes the given [`Func`] using the given arguments `params` and stores the result into `results`. @@ -412,7 +414,6 @@ impl EngineInner { } let value_stack = &mut self.stack.values; - let instrs = self.code_map.insts(frame.iref()); - execute_frame(ctx, value_stack, instrs, cache, frame).map_err(make_trap) + execute_frame(ctx, value_stack, cache, frame).map_err(make_trap) } } diff --git a/crates/wasmi/src/engine/stack/frames.rs b/crates/wasmi/src/engine/stack/frames.rs index dedf2520ee..59a948e333 100644 --- a/crates/wasmi/src/engine/stack/frames.rs +++ b/crates/wasmi/src/engine/stack/frames.rs @@ -1,15 +1,15 @@ //! Data structures to represent the Wasm call stack during execution. use super::{err_stack_overflow, DEFAULT_MAX_RECURSION_DEPTH}; -use crate::{core::TrapCode, engine::code_map::InstructionsRef, Instance}; +use crate::{core::TrapCode, engine::code_map::InstructionPtr, Instance}; use alloc::vec::Vec; use core::mem::replace; /// A function frame of a function on the call stack. #[derive(Debug, Copy, Clone)] pub struct FuncFrame { - /// The reference to the instructions of the function frame. - iref: InstructionsRef, + /// The pointer to the currently executed instruction. + ip: InstructionPtr, /// The instance in which the function has been defined. /// /// # Note @@ -18,44 +18,28 @@ pub struct FuncFrame { /// non-local to the function such as linear memories, global variables /// and tables. instance: Instance, - /// The current value of the program counter. - /// - /// # Note - /// - /// The program counter always points to the instruction - /// that is going to executed next. - pc: usize, } impl FuncFrame { - /// Returns the program counter. - pub fn pc(&self) -> usize { - self.pc + /// Creates a new [`FuncFrame`]. + pub fn new(ip: InstructionPtr, instance: Instance) -> Self { + Self { ip, instance } } - /// Updates the program counter. - pub fn update_pc(&mut self, new_pc: usize) { - self.pc = new_pc; + /// Returns the current instruction pointer. + pub fn ip(&self) -> InstructionPtr { + self.ip } - /// Creates a new [`FuncFrame`]. - pub fn new(iref: InstructionsRef, instance: Instance) -> Self { - Self { - iref, - instance, - pc: 0, - } + /// Updates the instruction pointer. + pub fn update_ip(&mut self, new_ip: InstructionPtr) { + self.ip = new_ip; } /// Returns the instance of the [`FuncFrame`]. pub fn instance(&self) -> Instance { self.instance } - - /// Returns a reference to the instructions of the [`FuncFrame`]. - pub fn iref(&self) -> InstructionsRef { - self.iref - } } /// The live function call stack storing the live function activation frames. @@ -83,22 +67,22 @@ impl CallStack { } /// Initializes the [`CallStack`] given the Wasm function. - pub(crate) fn init(&mut self, iref: InstructionsRef, instance: Instance) -> FuncFrame { + pub(crate) fn init(&mut self, ip: InstructionPtr, instance: Instance) -> FuncFrame { self.clear(); - FuncFrame::new(iref, instance) + FuncFrame::new(ip, instance) } /// Pushes a Wasm function onto the [`CallStack`]. pub(crate) fn push( &mut self, caller: &mut FuncFrame, - iref: InstructionsRef, + ip: InstructionPtr, instance: Instance, ) -> Result { if self.len() == self.recursion_limit { return Err(err_stack_overflow()); } - let frame = FuncFrame::new(iref, instance); + let frame = FuncFrame::new(ip, instance); let caller = replace(caller, frame); self.frames.push(caller); Ok(frame) diff --git a/crates/wasmi/src/engine/stack/mod.rs b/crates/wasmi/src/engine/stack/mod.rs index ecb26c4948..362b6dd921 100644 --- a/crates/wasmi/src/engine/stack/mod.rs +++ b/crates/wasmi/src/engine/stack/mod.rs @@ -6,7 +6,7 @@ pub use self::{ values::{ValueStack, ValueStackRef}, }; use super::{ - code_map::{CodeMap, InstructionsRef}, + code_map::{CodeMap, InstructionPtr}, func_types::FuncTypeRegistry, FuncParams, }; @@ -141,9 +141,9 @@ impl Stack { wasm_func: &WasmFuncEntity, code_map: &'engine CodeMap, ) -> Result { - let iref = self.call_wasm_impl(wasm_func, code_map)?; + let ip = self.call_wasm_impl(wasm_func, code_map)?; let instance = wasm_func.instance(); - let frame = self.frames.push(caller, iref, instance)?; + let frame = self.frames.push(caller, ip, instance)?; Ok(frame) } @@ -152,7 +152,7 @@ impl Stack { &mut self, wasm_func: &WasmFuncEntity, code_map: &'engine CodeMap, - ) -> Result { + ) -> Result { let header = code_map.header(wasm_func.func_body()); let max_stack_height = header.max_stack_height(); self.values.reserve(max_stack_height)?; @@ -161,7 +161,8 @@ impl Stack { .extend_zeros(len_locals) .expect("stack overflow is unexpected due to previous stack reserve"); let iref = header.iref(); - Ok(iref) + let ip = code_map.instr_ptr(iref); + Ok(ip) } /// Signals the [`Stack`] to return the last Wasm function call.