From 48a7fa4a9771bf3e7e21928b1295608f2259b28e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 19 Apr 2021 12:50:40 -0700 Subject: [PATCH 1/2] Optimize `table.init` instruction and instantiation This commit optimizes table initialization as part of instance instantiation and also applies the same optimization to the `table.init` instruction. One part of this commit is to remove some preexisting duplication between instance instantiation and the `table.init` instruction itself, after this the actual implementation of `table.init` is optimized to effectively have fewer bounds checks in fewer places and have a much tighter loop for instantiation. A big fallout from this change is that memory/table initializer offsets are now stored as `u32` instead of `usize` to remove a few casts in a few places. This ended up requiring moving some overflow checks that happened in parsing to later in code itself because otherwise the wrong spec test errors are emitted during testing. I've tried to trace where these can possibly overflow but I think that I managed to get everything. In a local synthetic test where an empty module with a single 80,000 element initializer this improves total instantiation time by 4x (562us => 141us) --- cranelift/wasm/src/environ/dummy.rs | 4 +- cranelift/wasm/src/environ/spec.rs | 4 +- cranelift/wasm/src/sections_translator.rs | 16 +--- crates/environ/src/module.rs | 19 ++-- crates/environ/src/module_environ.rs | 4 +- crates/runtime/src/instance.rs | 88 ++++++++++++------- crates/runtime/src/instance/allocator.rs | 100 +++++++--------------- crates/runtime/src/table.rs | 29 ++++++- 8 files changed, 138 insertions(+), 126 deletions(-) diff --git a/cranelift/wasm/src/environ/dummy.rs b/cranelift/wasm/src/environ/dummy.rs index ea9157e55249..6ac82b73f00d 100644 --- a/cranelift/wasm/src/environ/dummy.rs +++ b/cranelift/wasm/src/environ/dummy.rs @@ -747,7 +747,7 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment { &mut self, _table_index: TableIndex, _base: Option, - _offset: usize, + _offset: u32, _elements: Box<[FuncIndex]>, ) -> WasmResult<()> { // We do nothing @@ -792,7 +792,7 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment { &mut self, _memory_index: MemoryIndex, _base: Option, - _offset: usize, + _offset: u32, _data: &'data [u8], ) -> WasmResult<()> { // We do nothing diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs index 3a2389c44d8e..ab7974f576dc 100644 --- a/cranelift/wasm/src/environ/spec.rs +++ b/cranelift/wasm/src/environ/spec.rs @@ -937,7 +937,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment { &mut self, table_index: TableIndex, base: Option, - offset: usize, + offset: u32, elements: Box<[FuncIndex]>, ) -> WasmResult<()>; @@ -984,7 +984,7 @@ pub trait ModuleEnvironment<'data>: TargetEnvironment { &mut self, memory_index: MemoryIndex, base: Option, - offset: usize, + offset: u32, data: &'data [u8], ) -> WasmResult<()>; diff --git a/cranelift/wasm/src/sections_translator.rs b/cranelift/wasm/src/sections_translator.rs index c617c9943c89..3ce00e61de39 100644 --- a/cranelift/wasm/src/sections_translator.rs +++ b/cranelift/wasm/src/sections_translator.rs @@ -377,7 +377,7 @@ pub fn parse_element_section<'data>( } => { let mut init_expr_reader = init_expr.get_binary_reader(); let (base, offset) = match init_expr_reader.read_operator()? { - Operator::I32Const { value } => (None, value as u32 as usize), + Operator::I32Const { value } => (None, value as u32), Operator::GlobalGet { global_index } => { (Some(GlobalIndex::from_u32(global_index)), 0) } @@ -388,12 +388,6 @@ pub fn parse_element_section<'data>( )); } }; - // Check for offset + len overflow - if offset.checked_add(segments.len()).is_none() { - return Err(wasm_unsupported!( - "element segment offset and length overflows" - )); - } environ.declare_table_elements( TableIndex::from_u32(table_index), base, @@ -429,7 +423,7 @@ pub fn parse_data_section<'data>( } => { let mut init_expr_reader = init_expr.get_binary_reader(); let (base, offset) = match init_expr_reader.read_operator()? { - Operator::I32Const { value } => (None, value as u32 as usize), + Operator::I32Const { value } => (None, value as u32), Operator::GlobalGet { global_index } => { (Some(GlobalIndex::from_u32(global_index)), 0) } @@ -440,12 +434,6 @@ pub fn parse_data_section<'data>( )) } }; - // Check for offset + len overflow - if offset.checked_add(data.len()).is_none() { - return Err(wasm_unsupported!( - "data segment offset and length overflows" - )); - } environ.declare_data_initialization( MemoryIndex::from_u32(memory_index), base, diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index dd984841184f..38a429382515 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -7,6 +7,7 @@ use cranelift_wasm::*; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; +use std::convert::TryFrom; use std::sync::Arc; /// Implemenation styles for WebAssembly linear memory. @@ -86,7 +87,7 @@ pub struct MemoryInitializer { /// Optionally, a global variable giving a base index. pub base: Option, /// The offset to add to the base. - pub offset: usize, + pub offset: u32, /// The data to write into the linear memory. pub data: Box<[u8]>, } @@ -168,7 +169,15 @@ impl MemoryInitialization { // Perform a bounds check on the segment // As this segment is referencing a defined memory without a global base, the last byte // written to by the segment cannot exceed the memory's initial minimum size - if (initializer.offset + initializer.data.len()) + let offset = usize::try_from(initializer.offset).unwrap(); + let end = match offset.checked_add(initializer.data.len()) { + Some(end) => end, + None => { + out_of_bounds = true; + continue; + } + }; + if end > ((module.memory_plans[initializer.memory_index].memory.minimum as usize) * WASM_PAGE_SIZE) @@ -178,8 +187,8 @@ impl MemoryInitialization { } let pages = &mut map[index]; - let mut page_index = initializer.offset / WASM_PAGE_SIZE; - let mut page_offset = initializer.offset % WASM_PAGE_SIZE; + let mut page_index = offset / WASM_PAGE_SIZE; + let mut page_offset = offset % WASM_PAGE_SIZE; let mut data_offset = 0; let mut data_remaining = initializer.data.len(); @@ -268,7 +277,7 @@ pub struct TableInitializer { /// Optionally, a global variable giving a base index. pub base: Option, /// The offset to add to the base. - pub offset: usize, + pub offset: u32, /// The values to write into the table elements. pub elements: Box<[FuncIndex]>, } diff --git a/crates/environ/src/module_environ.rs b/crates/environ/src/module_environ.rs index 8106c1a3fb72..393a83975efe 100644 --- a/crates/environ/src/module_environ.rs +++ b/crates/environ/src/module_environ.rs @@ -705,7 +705,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data &mut self, table_index: TableIndex, base: Option, - offset: usize, + offset: u32, elements: Box<[FuncIndex]>, ) -> WasmResult<()> { for element in elements.iter() { @@ -794,7 +794,7 @@ impl<'data> cranelift_wasm::ModuleEnvironment<'data> for ModuleEnvironment<'data &mut self, memory_index: MemoryIndex, base: Option, - offset: usize, + offset: u32, data: &'data [u8], ) -> WasmResult<()> { match &mut self.result.module.memory_initialization { diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs index 916aa90f26c8..f2143681fa47 100644 --- a/crates/runtime/src/instance.rs +++ b/crates/runtime/src/instance.rs @@ -573,11 +573,11 @@ impl Instance { return None; } - Some(unsafe { &*self.anyfunc_ptr(index) }) + unsafe { Some(&*self.vmctx_plus_offset(self.offsets.vmctx_anyfunc(index))) } } - unsafe fn anyfunc_ptr(&self, index: FuncIndex) -> *mut VMCallerCheckedAnyfunc { - self.vmctx_plus_offset(self.offsets.vmctx_anyfunc(index)) + unsafe fn anyfunc_base(&self) -> *mut VMCallerCheckedAnyfunc { + self.vmctx_plus_offset(self.offsets.vmctx_anyfuncs_begin()) } fn find_passive_segment<'a, I, D, T>( @@ -611,38 +611,56 @@ impl Instance { src: u32, len: u32, ) -> Result<(), Trap> { - // https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-table-init - - let table = self.get_table(table_index); - let elements = Self::find_passive_segment( elem_index, &self.module.passive_elements_map, &self.module.passive_elements, &self.dropped_elements, ); + self.table_init_segment(table_index, elements, dst, src, len) + } - if src - .checked_add(len) - .map_or(true, |n| n as usize > elements.len()) - || dst.checked_add(len).map_or(true, |m| m > table.size()) - { - return Err(Trap::wasm(ir::TrapCode::TableOutOfBounds)); - } + pub(crate) fn table_init_segment( + &self, + table_index: TableIndex, + elements: &[FuncIndex], + dst: u32, + src: u32, + len: u32, + ) -> Result<(), Trap> { + // https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-table-init - // TODO(#983): investigate replacing this get/set loop with a `memcpy`. - for (dst, src) in (dst..dst + len).zip(src..src + len) { - let elem = self - .get_caller_checked_anyfunc(elements[src as usize]) - .map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| { - f as *const VMCallerCheckedAnyfunc as *mut _ - }); - - table - .set(dst, TableElement::FuncRef(elem)) - .expect("should never panic because we already did the bounds check above"); - } + let table = self.get_table(table_index); + let elements = match elements + .get(usize::try_from(src).unwrap()..) + .and_then(|s| s.get(..usize::try_from(len).unwrap())) + { + Some(elements) => elements, + None => return Err(Trap::wasm(ir::TrapCode::TableOutOfBounds)), + }; + + match table.element_type() { + TableElementType::Func => unsafe { + let base = self.anyfunc_base(); + table.init_funcs( + dst, + elements.iter().map(|idx| { + if *idx == FuncIndex::reserved_value() { + ptr::null_mut() + } else { + debug_assert!(idx.as_u32() < self.offsets.num_defined_functions); + base.add(usize::try_from(idx.as_u32()).unwrap()) + } + }), + )?; + }, + + TableElementType::Val(_) => { + debug_assert!(elements.iter().all(|e| *e == FuncIndex::reserved_value())); + table.fill(dst, TableElement::ExternRef(None), len)?; + } + } Ok(()) } @@ -773,16 +791,26 @@ impl Instance { src: u32, len: u32, ) -> Result<(), Trap> { - // https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-memory-init - - let memory = self.get_memory(memory_index); - let data = Self::find_passive_segment( data_index, &self.module.passive_data_map, &self.module.passive_data, &self.dropped_data, ); + self.memory_init_segment(memory_index, &data, dst, src, len) + } + + pub(crate) fn memory_init_segment( + &self, + memory_index: MemoryIndex, + data: &[u8], + dst: u32, + src: u32, + len: u32, + ) -> Result<(), Trap> { + // https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-memory-init + + let memory = self.get_memory(memory_index); if src .checked_add(len) diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs index 1e4d8008774d..c91f89a74ff4 100644 --- a/crates/runtime/src/instance/allocator.rs +++ b/crates/runtime/src/instance/allocator.rs @@ -2,7 +2,7 @@ use crate::externref::{ModuleInfoLookup, VMExternRefActivationsTable, EMPTY_MODU use crate::imports::Imports; use crate::instance::{Instance, InstanceHandle, ResourceLimiter, RuntimeMemoryCreator}; use crate::memory::{DefaultMemoryCreator, Memory}; -use crate::table::{Table, TableElement}; +use crate::table::Table; use crate::traphandlers::Trap; use crate::vmcontext::{ VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMFunctionBody, VMFunctionImport, @@ -19,10 +19,9 @@ use std::rc::Rc; use std::slice; use std::sync::Arc; use thiserror::Error; -use wasmtime_environ::entity::{packed_option::ReservedValue, EntityRef, EntitySet, PrimaryMap}; +use wasmtime_environ::entity::{EntityRef, EntitySet, PrimaryMap}; use wasmtime_environ::wasm::{ - DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, GlobalInit, SignatureIndex, - TableElementType, WasmType, + DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, GlobalInit, SignatureIndex, WasmType, }; use wasmtime_environ::{ ir, MemoryInitialization, MemoryInitializer, Module, ModuleType, TableInitializer, VMOffsets, @@ -212,7 +211,7 @@ impl<'a> From<&'a PrimaryMap> for Shared fn get_table_init_start( init: &TableInitializer, instance: &Instance, -) -> Result { +) -> Result { match init.base { Some(base) => { let val = unsafe { @@ -223,7 +222,7 @@ fn get_table_init_start( } }; - init.offset.checked_add(val as usize).ok_or_else(|| { + init.offset.checked_add(val).ok_or_else(|| { InstantiationError::Link(LinkError( "element segment global base overflows".to_owned(), )) @@ -237,6 +236,7 @@ fn check_table_init_bounds(instance: &Instance) -> Result<(), InstantiationError for init in &instance.module.table_initializers { let table = instance.get_table(init.table_index); let start = get_table_init_start(init, instance)?; + let start = usize::try_from(start).unwrap(); let end = start.checked_add(init.elements.len()); match end { @@ -256,34 +256,15 @@ fn check_table_init_bounds(instance: &Instance) -> Result<(), InstantiationError fn initialize_tables(instance: &Instance) -> Result<(), InstantiationError> { for init in &instance.module.table_initializers { - let table = instance.get_table(init.table_index); - let start = get_table_init_start(init, instance)?; - let end = start.checked_add(init.elements.len()); - - match end { - Some(end) if end <= table.size() as usize => { - for (i, func_idx) in init.elements.iter().enumerate() { - let item = match table.element_type() { - TableElementType::Func => instance - .get_caller_checked_anyfunc(*func_idx) - .map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| { - f as *const VMCallerCheckedAnyfunc as *mut VMCallerCheckedAnyfunc - }) - .into(), - TableElementType::Val(_) => { - assert!(*func_idx == FuncIndex::reserved_value()); - TableElement::ExternRef(None) - } - }; - table.set(u32::try_from(start + i).unwrap(), item).unwrap(); - } - } - _ => { - return Err(InstantiationError::Trap(Trap::wasm( - ir::TrapCode::TableOutOfBounds, - ))) - } - } + instance + .table_init_segment( + init.table_index, + &init.elements, + get_table_init_start(init, instance)?, + 0, + init.elements.len() as u32, + ) + .map_err(InstantiationError::Trap)?; } Ok(()) @@ -292,7 +273,7 @@ fn initialize_tables(instance: &Instance) -> Result<(), InstantiationError> { fn get_memory_init_start( init: &MemoryInitializer, instance: &Instance, -) -> Result { +) -> Result { match init.base { Some(base) => { let val = unsafe { @@ -303,7 +284,7 @@ fn get_memory_init_start( } }; - init.offset.checked_add(val as usize).ok_or_else(|| { + init.offset.checked_add(val).ok_or_else(|| { InstantiationError::Link(LinkError("data segment global base overflows".to_owned())) }) } @@ -311,24 +292,6 @@ fn get_memory_init_start( } } -unsafe fn get_memory_slice<'instance>( - init: &MemoryInitializer, - instance: &'instance Instance, -) -> &'instance mut [u8] { - let memory = if let Some(defined_memory_index) = - instance.module.defined_memory_index(init.memory_index) - { - instance.memory(defined_memory_index) - } else { - let import = instance.imported_memory(init.memory_index); - let foreign_instance = (&mut *(import).vmctx).instance(); - let foreign_memory = &mut *(import).from; - let foreign_index = foreign_instance.memory_index(foreign_memory); - foreign_instance.memory(foreign_index) - }; - &mut *ptr::slice_from_raw_parts_mut(memory.base, memory.current_length) -} - fn check_memory_init_bounds( instance: &Instance, initializers: &[MemoryInitializer], @@ -336,6 +299,7 @@ fn check_memory_init_bounds( for init in initializers { let memory = instance.get_memory(init.memory_index); let start = get_memory_init_start(init, instance)?; + let start = usize::try_from(start).unwrap(); let end = start.checked_add(init.data.len()); match end { @@ -358,21 +322,15 @@ fn initialize_memories( initializers: &[MemoryInitializer], ) -> Result<(), InstantiationError> { for init in initializers { - let memory = instance.get_memory(init.memory_index); - let start = get_memory_init_start(init, instance)?; - let end = start.checked_add(init.data.len()); - - match end { - Some(end) if end <= memory.current_length => { - let mem_slice = unsafe { get_memory_slice(init, instance) }; - mem_slice[start..end].copy_from_slice(&init.data); - } - _ => { - return Err(InstantiationError::Trap(Trap::wasm( - ir::TrapCode::HeapOutOfBounds, - ))) - } - } + instance + .memory_init_segment( + init.memory_index, + &init.data, + get_memory_init_start(init, instance)?, + 0, + init.data.len() as u32, + ) + .map_err(InstantiationError::Trap)?; } Ok(()) @@ -496,6 +454,7 @@ unsafe fn initialize_vmcontext(instance: &Instance, req: InstanceAllocationReque ); // Initialize the functions + let mut base = instance.anyfunc_base(); for (index, sig) in instance.module.functions.iter() { let type_index = req.shared_signatures.lookup(*sig); @@ -510,13 +469,14 @@ unsafe fn initialize_vmcontext(instance: &Instance, req: InstanceAllocationReque }; ptr::write( - instance.anyfunc_ptr(index), + base, VMCallerCheckedAnyfunc { func_ptr, type_index, vmctx, }, ); + base = base.add(1); } // Initialize the defined tables diff --git a/crates/runtime/src/table.rs b/crates/runtime/src/table.rs index 34cd654da24b..73aa67a79caf 100644 --- a/crates/runtime/src/table.rs +++ b/crates/runtime/src/table.rs @@ -7,7 +7,7 @@ use crate::{ResourceLimiter, Trap, VMExternRef}; use anyhow::{bail, Result}; use std::cell::{Cell, RefCell}; use std::cmp::min; -use std::convert::TryInto; +use std::convert::{TryFrom, TryInto}; use std::ops::Range; use std::ptr; use std::rc::Rc; @@ -212,6 +212,33 @@ impl Table { } } + /// Fill `table[dst..]` with values from `items` + /// + /// Returns a trap error on out-of-bounds accesses. + pub fn init_funcs( + &self, + dst: u32, + items: impl ExactSizeIterator, + ) -> Result<(), Trap> { + let ty = TableElementType::Func; + assert!(ty == TableElementType::Func); + + self.with_elements_mut(|elements| { + let elements = match elements + .get_mut(usize::try_from(dst).unwrap()..) + .and_then(|s| s.get_mut(..items.len())) + { + Some(elements) => elements, + None => return Err(Trap::wasm(ir::TrapCode::TableOutOfBounds)), + }; + + for (item, slot) in items.zip(elements) { + Self::set_raw(ty, slot, item.into()) + } + Ok(()) + }) + } + /// Fill `table[dst..dst + len]` with `val`. /// /// Returns a trap error on out-of-bounds accesses. From 1decec3612d7eb2dd76eaaa6aea8c71c566388a7 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 19 Apr 2021 15:25:05 -0700 Subject: [PATCH 2/2] Review comments --- crates/runtime/src/table.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/runtime/src/table.rs b/crates/runtime/src/table.rs index 73aa67a79caf..0ad9c98e5740 100644 --- a/crates/runtime/src/table.rs +++ b/crates/runtime/src/table.rs @@ -220,8 +220,7 @@ impl Table { dst: u32, items: impl ExactSizeIterator, ) -> Result<(), Trap> { - let ty = TableElementType::Func; - assert!(ty == TableElementType::Func); + assert!(self.element_type() == TableElementType::Func); self.with_elements_mut(|elements| { let elements = match elements @@ -233,7 +232,7 @@ impl Table { }; for (item, slot) in items.zip(elements) { - Self::set_raw(ty, slot, item.into()) + *slot = item as *mut u8; } Ok(()) })