diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 89f1472ba4a2..a45e3fffa540 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -115,17 +115,6 @@ wasmtime_option_group! { /// The maximum runtime size of each linear memory in the pooling /// allocator, in bytes. pub pooling_max_memory_size: Option, - - /// Whether to enable call-indirect caching. - pub cache_call_indirects: Option, - - /// The maximum call-indirect cache slot count. - /// - /// One slot is allocated per indirect callsite; if the module - /// has more indirect callsites than this limit, then the - /// first callsites in linear order in the code section, up to - /// the limit, will receive a cache slot. - pub max_call_indirect_cache_slots: Option, } enum Optimize { @@ -576,12 +565,6 @@ impl CommonOptions { if let Some(enable) = self.opts.memory_init_cow { config.memory_init_cow(enable); } - if let Some(enable) = self.opts.cache_call_indirects { - config.cache_call_indirects(enable); - } - if let Some(max) = self.opts.max_call_indirect_cache_slots { - config.max_call_indirect_cache_slots(max); - } match_feature! { ["pooling-allocator" : self.opts.pooling_allocator.or(pooling_allocator_default)] diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 80075c648735..c720cf18e401 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -147,14 +147,8 @@ impl wasmtime_environ::Compiler for Compiler { context.func.collect_debug_info(); } - let mut func_env = FuncEnvironment::new( - isa, - translation, - types, - &self.tunables, - self.wmemcheck, - input.call_indirect_start, - ); + let mut func_env = + FuncEnvironment::new(isa, translation, types, &self.tunables, self.wmemcheck); // The `stack_limit` global value below is the implementation of stack // overflow checks in Wasmtime. @@ -206,11 +200,7 @@ impl wasmtime_environ::Compiler for Compiler { flags: MemFlags::trusted(), }); context.func.stack_limit = Some(stack_limit); - let FunctionBodyData { - validator, - body, - call_indirect_start: _, - } = input; + let FunctionBodyData { validator, body } = input; let mut validator = validator.into_validator(mem::take(&mut compiler.cx.validator_allocations)); compiler.cx.func_translator.translate_body( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 6ade601af03f..8ec068b39522 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -12,9 +12,9 @@ use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap}; use cranelift_frontend::FunctionBuilder; use cranelift_frontend::Variable; use cranelift_wasm::{ - CallIndirectSiteIndex, EngineOrModuleTypeIndex, FuncIndex, FuncTranslationState, GlobalIndex, - GlobalVariable, Heap, HeapData, HeapStyle, MemoryIndex, TableData, TableIndex, TableSize, - TargetEnvironment, TypeIndex, WasmHeapTopType, WasmHeapType, WasmResult, + EngineOrModuleTypeIndex, FuncIndex, FuncTranslationState, GlobalIndex, GlobalVariable, Heap, + HeapData, HeapStyle, MemoryIndex, TableData, TableIndex, TableSize, TargetEnvironment, + TypeIndex, WasmHeapTopType, WasmHeapType, WasmResult, }; use std::mem; use wasmparser::Operator; @@ -138,9 +138,6 @@ pub struct FuncEnvironment<'module_environment> { #[cfg(feature = "wmemcheck")] wmemcheck: bool, - - /// The current call-indirect-cache index. - pub call_indirect_index: usize, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -150,7 +147,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { types: &'module_environment ModuleTypesBuilder, tunables: &'module_environment Tunables, wmemcheck: bool, - call_indirect_start: usize, ) -> Self { let builtin_functions = BuiltinFunctions::new(isa); @@ -178,8 +174,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { // functions should consume at least some fuel. fuel_consumed: 1, - call_indirect_index: call_indirect_start, - #[cfg(feature = "wmemcheck")] wmemcheck, #[cfg(feature = "wmemcheck")] @@ -1030,37 +1024,6 @@ impl<'module_environment> FuncEnvironment<'module_environment> { _ => unreachable!(), } } - - /// Allocate the next CallIndirectSiteIndex for indirect-target - /// caching purposes, if slots remain below the slot-count limit. - fn alloc_call_indirect_index(&mut self) -> Option { - // We need to check to see if we have reached the cache-slot - // limit. - // - // There are two kinds of limit behavior: - // - // 1. Our function's start-index is below the limit, but we - // hit the limit in the middle of the function. We will - // allocate slots up to the limit, then stop exactly when we - // hit it. - // - // 2. Our function is beyond the limit-count of - // `call_indirect`s. The counting prescan in - // `ModuleEnvironment` that assigns start indices will - // saturate at the limit, and this function's start index - // will be exactly the limit, so we get zero slots and exit - // immediately at every call to this function. - if self.call_indirect_index >= self.tunables.max_call_indirect_cache_slots { - return None; - } - - let idx = CallIndirectSiteIndex::from_u32( - u32::try_from(self.call_indirect_index) - .expect("More than 2^32 callsites; should be limited by impl limits"), - ); - self.call_indirect_index += 1; - Some(idx) - } } struct Call<'a, 'func, 'module_env> { @@ -1172,68 +1135,6 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { Ok(self.indirect_call_inst(sig_ref, func_addr, &real_call_args)) } - /// Get the address of the call-indirect cache slot for a given callsite. - pub fn call_indirect_cache_slot_addr( - &mut self, - call_site: CallIndirectSiteIndex, - vmctx: ir::Value, - ) -> ir::Value { - let offset = self.env.offsets.vmctx_call_indirect_cache(call_site); - self.builder.ins().iadd_imm(vmctx, i64::from(offset)) - } - - /// Load the cached index and code pointer for an indirect call. - /// - /// Generates IR like: - /// - /// ```ignore - /// v1 = load.i64 cache_ptr+0 ;; cached index (cache key) - /// v2 = load.i64 cache_ptr+8 ;; cached raw code pointer (cache value) - /// ``` - /// - /// and returns `(index, code_ptr)` (e.g. from above, `(v1, v2)`). - fn load_cached_indirect_index_and_code_ptr( - &mut self, - cache_ptr: ir::Value, - ) -> (ir::Value, ir::Value) { - let cached_index = self.builder.ins().load( - I32, - MemFlags::trusted(), - cache_ptr, - Offset32::from(self.env.offsets.ptr.vmcall_indirect_cache_index()), - ); - let cached_code_ptr = self.builder.ins().load( - self.env.pointer_type(), - MemFlags::trusted(), - cache_ptr, - Offset32::from(self.env.offsets.ptr.vmcall_indirect_cache_wasm_call()), - ); - - (cached_index, cached_code_ptr) - } - - /// Update the indirect-call cache: store a new index and raw code - /// pointer in the slot for a given callsite. - fn store_cached_indirect_index_and_code_ptr( - &mut self, - cache_ptr: ir::Value, - index: ir::Value, - code_ptr: ir::Value, - ) { - self.builder.ins().store( - MemFlags::trusted(), - index, - cache_ptr, - Offset32::from(self.env.offsets.ptr.vmcall_indirect_cache_index()), - ); - self.builder.ins().store( - MemFlags::trusted(), - code_ptr, - cache_ptr, - Offset32::from(self.env.offsets.ptr.vmcall_indirect_cache_wasm_call()), - ); - } - /// Do an indirect call through the given funcref table. pub fn indirect_call( mut self, @@ -1243,126 +1144,14 @@ impl<'a, 'func, 'module_env> Call<'a, 'func, 'module_env> { callee: ir::Value, call_args: &[ir::Value], ) -> WasmResult> { - // If we are performing call-indirect caching with this table, check the cache. - let caching = if self.env.tunables.cache_call_indirects { - let plan = &self.env.module.table_plans[table_index]; - // We can do the indirect call caching optimization only - // if table elements will not change (no opcodes exist - // that could write the table, and table not exported), - // and if we can use the zero-index as a sentinenl for "no - // cache entry" (initial zeroed vmctx state). - !plan.written && !plan.non_null_zero - } else { - false - }; - - // Allocate a call-indirect cache slot if caching is - // enabled. Note that this still may be `None` if we run out - // of slots. - let call_site = if caching { - self.env.alloc_call_indirect_index() - } else { - None - }; - - let (code_ptr, callee_vmctx) = if let Some(call_site) = call_site { - // Get a local copy of `vmctx`. - let vmctx = self.env.vmctx(self.builder.func); - let vmctx = self - .builder - .ins() - .global_value(self.env.pointer_type(), vmctx); - - // Get the address of the cache slot in the VMContext - // struct. - let slot = self.call_indirect_cache_slot_addr(call_site, vmctx); - - // Create the following CFG and generate code with the following outline: - // - // (load cached index and code pointer) - // hit = icmp eq (cached index), (callee) - // brif hit, call_block((cached code ptr), vmctx), miss_block - // - // miss_block: - // (compute actual code pointer, with checks) - // same_instance = icmp eq (callee vmctx), (vmctx) - // brif same_instance update_block, call_block((actual code ptr), (callee vmctx)) - // - // update_block: - // (store actual index and actual code pointer) - // jump call_block((actual code ptr), (callee vmctx)) - // - // call_block(code_ptr, callee_vmctx): - // (unchecked call-indirect sequence) - - // Create two-level conditionals with CFG. - let current_block = self.builder.current_block().unwrap(); - let miss_block = self.builder.create_block(); - let call_block = self.builder.create_block(); - let update_block = self.builder.create_block(); - - self.builder.insert_block_after(miss_block, current_block); - self.builder.insert_block_after(update_block, miss_block); - self.builder.insert_block_after(call_block, update_block); - self.builder.set_cold_block(miss_block); - self.builder.set_cold_block(update_block); - - // Load cached values, check for hit, branch to - // call block or miss block. - - let (cached_index, cached_code_ptr) = - self.load_cached_indirect_index_and_code_ptr(slot); - let hit = self.builder.ins().icmp(IntCC::Equal, cached_index, callee); - self.builder - .ins() - .brif(hit, call_block, &[cached_code_ptr, vmctx], miss_block, &[]); - - // Miss block: compute actual callee code pointer and - // vmctx, and update cache if same-instance. - - self.builder.seal_block(miss_block); - self.builder.switch_to_block(miss_block); - - if let Some((code_ptr, callee_vmctx)) = - self.check_and_load_code_and_callee_vmctx(table_index, ty_index, callee, true)? - { - // If callee vmctx is equal to ours, update the cache. - let same_instance = self.builder.ins().icmp(IntCC::Equal, callee_vmctx, vmctx); - - self.builder.ins().brif( - same_instance, - update_block, - &[], - call_block, - &[code_ptr, callee_vmctx], - ); - - self.builder.seal_block(update_block); - self.builder.switch_to_block(update_block); - - self.store_cached_indirect_index_and_code_ptr(slot, callee, code_ptr); - self.builder - .ins() - .jump(call_block, &[code_ptr, callee_vmctx]); - } - - // Call block: do the call. - - self.builder.seal_block(call_block); - self.builder.switch_to_block(call_block); - - let code_ptr = self - .builder - .append_block_param(call_block, self.env.pointer_type()); - let callee_vmctx = self - .builder - .append_block_param(call_block, self.env.pointer_type()); - (code_ptr, callee_vmctx) - } else { - match self.check_and_load_code_and_callee_vmctx(table_index, ty_index, callee, false)? { - Some(pair) => pair, - None => return Ok(None), - } + let (code_ptr, callee_vmctx) = match self.check_and_load_code_and_callee_vmctx( + table_index, + ty_index, + callee, + false, + )? { + Some(pair) => pair, + None => return Ok(None), }; self.unchecked_call_impl(sig_ref, code_ptr, callee_vmctx, call_args) diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index 4548095b8946..f8edcc8455a7 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -11,7 +11,6 @@ use crate::{ }; use anyhow::{bail, Result}; use cranelift_entity::packed_option::ReservedValue; -use cranelift_entity::EntityRef; use std::borrow::Cow; use std::collections::HashMap; use std::mem; @@ -19,8 +18,8 @@ use std::path::PathBuf; use std::sync::Arc; use wasmparser::{ types::Types, CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind, - FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Operator, Parser, - Payload, TypeRef, Validator, ValidatorResources, + FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef, + Validator, ValidatorResources, }; use wasmtime_types::{ConstExpr, ConstOp, ModuleInternedTypeIndex, SizeOverflow, WasmHeapTopType}; @@ -116,8 +115,6 @@ pub struct FunctionBodyData<'a> { pub body: FunctionBody<'a>, /// Validator for the function body pub validator: FuncToValidate, - /// The start index for call-indirects in this body. - pub call_indirect_start: usize, } #[derive(Debug, Default)] @@ -436,9 +433,6 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { // this never gets past validation ExternalKind::Tag => unreachable!(), }; - if let EntityIndex::Table(table) = entity { - self.flag_written_table(table); - } self.result .module .exports @@ -504,10 +498,6 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { let (offset, escaped) = ConstExpr::from_wasmparser(offset_expr)?; debug_assert!(escaped.is_empty()); - if !offset.provably_nonzero_i32() { - self.flag_table_possibly_non_null_zero_element(table_index); - } - self.result .module .table_initialization @@ -547,8 +537,6 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { self.result.code_index + self.result.module.num_imported_funcs as u32; let func_index = FuncIndex::from_u32(func_index); - let call_indirect_start = self.result.module.num_call_indirect_caches; - if self.tunables.generate_native_debuginfo { let sig_index = self.result.module.functions[func_index].signature; let sig = self.types[sig_index].unwrap_func(); @@ -567,12 +555,9 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { params: sig.params().into(), }); } - self.prescan_code_section(&body)?; - self.result.function_body_inputs.push(FunctionBodyData { - validator, - body, - call_indirect_start, - }); + self.result + .function_body_inputs + .push(FunctionBodyData { validator, body }); self.result.code_index += 1; } @@ -684,83 +669,6 @@ and for re-adding support for interface types you can see this issue: Ok(()) } - /// Check various properties in function bodies in a "pre-pass" as - /// needed, before we actually generate code. Currently this is - /// used for: - /// - /// - Call-indirect caching: we need to know whether a table is - /// "immutable", i.e., there are opcodes that could update its - /// entries. If this is the case then the optimization isn't - /// applicable. We can check this by simply scanning all functions - /// for the relevant opcodes. - /// - /// We also need to know how many `call_indirect` opcodes are in - /// the whole module so that we know how large a `vmctx` struct - /// to reserve and what its layout will be; and the starting - /// index in this count for each function, so we can generate - /// its code (with accesses to its own `call_indirect` callsite - /// caches) in parallel. - fn prescan_code_section(&mut self, body: &FunctionBody<'_>) -> Result<()> { - if self.tunables.cache_call_indirects { - for op in body.get_operators_reader()? { - let op = op?; - match op { - // Check whether a table may be mutated by any - // opcode. (Note that we separately check for - // table exports so we can detect mutations from - // the outside; here we are only concerned with - // mutations by our own module's code.) - Operator::TableSet { table } - | Operator::TableFill { table } - | Operator::TableInit { table, .. } - | Operator::TableCopy { - dst_table: table, .. - } => { - // We haven't yet validated the body during - // this pre-scan, so we need to check that - // `dst_table` is in bounds. Ignore if not: - // we'll catch the error later. - let table = TableIndex::from_u32(table); - if table.index() < self.result.module.table_plans.len() { - self.flag_written_table(table); - } - } - // Count the `call_indirect` sites so we can - // assign them unique slots. - // - // We record the value of this counter as a - // start-index as we start to scan each function, - // and that function's compilation (which is - // normally a separate parallel task) counts on - // its own from that start index. - Operator::CallIndirect { .. } => { - self.result.module.num_call_indirect_caches += 1; - - // Cap the `num_call_indirect_caches` counter - // at `max_call_indirect_cache_slots` so that - // we don't allocate more than that amount of - // space in the VMContext struct. - // - // Note that we also separately check against - // this limit when emitting code for each - // individual slot because we may cross the - // limit in the middle of a function; also - // once we hit the limit, the start-index for - // each subsequent function will be saturated - // at the limit. - self.result.module.num_call_indirect_caches = core::cmp::min( - self.result.module.num_call_indirect_caches, - self.tunables.max_call_indirect_cache_slots, - ); - } - - _ => {} - } - } - } - Ok(()) - } - fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) { match section.as_known() { KnownCustom::Name(name) => { @@ -886,14 +794,6 @@ and for re-adding support for interface types you can see this issue: self.result.module.num_escaped_funcs += 1; } - fn flag_written_table(&mut self, table: TableIndex) { - self.result.module.table_plans[table].written = true; - } - - fn flag_table_possibly_non_null_zero_element(&mut self, table: TableIndex) { - self.result.module.table_plans[table].non_null_zero = true; - } - /// Parses the Name section of the wasm module. fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> { for subsection in names { diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index 724d0aad5578..e983333c3326 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -338,24 +338,13 @@ pub struct TablePlan { pub table: Table, /// Our chosen implementation style. pub style: TableStyle, - /// Whether the table is observed to be written or possibly - /// written: either by some opcode present in the code section, or - /// by the fact that the table is exported. - pub written: bool, - /// Whether this table may have a non-null zero element. - pub non_null_zero: bool, } impl TablePlan { /// Draw up a plan for implementing a `Table`. pub fn for_table(table: Table, tunables: &Tunables) -> Self { let style = TableStyle::for_table(table, tunables); - Self { - table, - style, - written: false, - non_null_zero: false, - } + Self { table, style } } } diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index d32cd5bb72c1..c6fb03aa19b3 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -65,14 +65,6 @@ pub struct Tunables { /// Whether or not Wasm functions target the winch abi. pub winch_callable: bool, - - /// Whether we implement a one-entry cache at each call_indirect - /// site. - pub cache_call_indirects: bool, - - /// The maximum number of call-indirect cache slots that we will - /// allocate for one instance. - pub max_call_indirect_cache_slots: usize, } impl Tunables { @@ -125,8 +117,6 @@ impl Tunables { relaxed_simd_deterministic: false, tail_callable: false, winch_callable: false, - cache_call_indirects: false, - max_call_indirect_cache_slots: 50_000, } } diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs index d913f2d62c6e..852c76d094e7 100644 --- a/crates/environ/src/vmoffsets.rs +++ b/crates/environ/src/vmoffsets.rs @@ -24,12 +24,11 @@ // owned_memories: [VMMemoryDefinition; module.num_owned_memories], // globals: [VMGlobalDefinition; module.num_defined_globals], // func_refs: [VMFuncRef; module.num_escaped_funcs], -// call_indirect_caches: [VMCallIndirectCache; module.num_call_indirect_caches], // } use crate::{ - CallIndirectSiteIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, - FuncRefIndex, GlobalIndex, MemoryIndex, Module, TableIndex, + DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, FuncRefIndex, + GlobalIndex, MemoryIndex, Module, TableIndex, }; use cranelift_entity::packed_option::ReservedValue; use wasmtime_types::OwnedMemoryIndex; @@ -74,8 +73,6 @@ pub struct VMOffsets

{ /// The number of escaped functions in the module, the size of the func_refs /// array. pub num_escaped_funcs: u32, - /// The number of call_indirect cache entries in the cache array. - pub num_call_indirect_caches: u32, // precalculated offsets of various member fields imported_functions: u32, @@ -87,7 +84,6 @@ pub struct VMOffsets

{ owned_memories: u32, defined_globals: u32, defined_func_refs: u32, - call_indirect_caches: u32, size: u32, } @@ -219,23 +215,6 @@ pub trait PtrSize { .unwrap() } - // Offsets within `VMCallIndirectCache`. - - /// Return the offset of `VMCallIndirectCache::wasm_call`. - fn vmcall_indirect_cache_wasm_call(&self) -> u8 { - 0 - } - - /// Return the offset of `VMCallIndirectCache::index`. - fn vmcall_indirect_cache_index(&self) -> u8 { - self.size() - } - - /// Return the size of a `VMCallIndirectCache`. - fn size_of_vmcall_indirect_cache(&self) -> u8 { - 2 * self.size() - } - /// Return the offset to the `magic` value in this `VMContext`. #[inline] fn vmctx_magic(&self) -> u8 { @@ -354,8 +333,6 @@ pub struct VMOffsetsFields

{ /// The number of escaped functions in the module, the size of the function /// references array. pub num_escaped_funcs: u32, - /// The number of call_indirect cache entries in the cache array. - pub num_call_indirect_caches: u32, } impl VMOffsets

{ @@ -382,7 +359,6 @@ impl VMOffsets

{ num_owned_memories, num_defined_globals: cast_to_u32(module.globals.len() - module.num_imported_globals), num_escaped_funcs: cast_to_u32(module.num_escaped_funcs), - num_call_indirect_caches: cast_to_u32(module.num_call_indirect_caches), }) } @@ -412,7 +388,6 @@ impl VMOffsets

{ num_defined_memories: _, num_owned_memories: _, num_escaped_funcs: _, - num_call_indirect_caches: _, // used as the initial size below size, @@ -441,7 +416,6 @@ impl VMOffsets

{ } calculate_sizes! { - call_indirect_caches: "call_indirect caches", defined_func_refs: "module functions", defined_globals: "defined globals", owned_memories: "owned memories", @@ -468,7 +442,6 @@ impl From> for VMOffsets

{ num_owned_memories: fields.num_owned_memories, num_defined_globals: fields.num_defined_globals, num_escaped_funcs: fields.num_escaped_funcs, - num_call_indirect_caches: fields.num_call_indirect_caches, imported_functions: 0, imported_tables: 0, imported_memories: 0, @@ -478,7 +451,6 @@ impl From> for VMOffsets

{ owned_memories: 0, defined_globals: 0, defined_func_refs: 0, - call_indirect_caches: 0, size: 0, }; @@ -533,10 +505,6 @@ impl From> for VMOffsets

{ ret.num_escaped_funcs, ret.ptr.size_of_vm_func_ref(), ), - size(call_indirect_caches) = cmul( - ret.num_call_indirect_caches, - ret.ptr.size_of_vmcall_indirect_cache(), - ), } ret.size = next_field_offset; @@ -727,12 +695,6 @@ impl VMOffsets

{ self.defined_func_refs } - /// The offset of the `call_indirect_caches` array. - #[inline] - pub fn vmctx_call_indirec_caches_begin(&self) -> u32 { - self.call_indirect_caches - } - /// Return the size of the `VMContext` allocation. #[inline] pub fn size_of_vmctx(&self) -> u32 { @@ -877,31 +839,6 @@ impl VMOffsets

{ pub fn vmctx_vmglobal_import_from(&self, index: GlobalIndex) -> u32 { self.vmctx_vmglobal_import(index) + u32::from(self.vmglobal_import_from()) } - - /// Return the offset to the `VMCallIndirectCache` for the given - /// call-indirect site. - #[inline] - pub fn vmctx_call_indirect_cache(&self, call_site: CallIndirectSiteIndex) -> u32 { - assert!(call_site.as_u32() < self.num_call_indirect_caches); - self.vmctx_call_indirec_caches_begin() - + call_site.as_u32() * u32::from(self.ptr.size_of_vmcall_indirect_cache()) - } - - /// Return the offset to the `wasm_call` field in `*const - /// VMCallIndirectCache` with call-site ID `call_site`. - #[inline] - pub fn vmctx_call_indirect_cache_wasm_call(&self, call_site: CallIndirectSiteIndex) -> u32 { - self.vmctx_call_indirect_cache(call_site) - + u32::from(self.ptr.vmcall_indirect_cache_wasm_call()) - } - - /// Return the offset to the `index` field in `*const - /// VMCallIndirectCache` with call-site ID `call_site`. - #[inline] - pub fn vmctx_call_indirect_cache_index(&self, call_site: CallIndirectSiteIndex) -> u32 { - self.vmctx_call_indirect_cache(call_site) - + u32::from(self.ptr.vmcall_indirect_cache_index()) - } } /// Offsets for `VMDrcHeader`. diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index 2946137004df..d6fd64824af2 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -180,9 +180,7 @@ impl Config { self.wasmtime.memory_guaranteed_dense_image_size, )) .allocation_strategy(self.wasmtime.strategy.to_wasmtime()) - .generate_address_map(self.wasmtime.generate_address_map) - .cache_call_indirects(self.wasmtime.cache_call_indirects) - .max_call_indirect_cache_slots(self.wasmtime.max_call_indirect_cache_slots); + .generate_address_map(self.wasmtime.generate_address_map); if !self.module_config.config.simd_enabled { cfg.wasm_relaxed_simd(false); @@ -501,10 +499,6 @@ pub struct WasmtimeConfig { native_unwind_info: bool, /// Configuration for the compiler to use. pub compiler_strategy: CompilerStrategy, - /// Whether we enable indirect-call caching. - cache_call_indirects: bool, - /// The maximum number of call-indirect cache slots. - max_call_indirect_cache_slots: usize, table_lazy_init: bool, /// Whether or not fuzzing should enable PCC. diff --git a/crates/types/src/lib.rs b/crates/types/src/lib.rs index 2fe7cb056c1a..dd9946fb2ac4 100644 --- a/crates/types/src/lib.rs +++ b/crates/types/src/lib.rs @@ -1146,12 +1146,6 @@ entity_impl!(TagIndex); pub struct StaticModuleIndex(u32); entity_impl!(StaticModuleIndex); -/// Index of a `call_indirect` instruction in a module, used for -/// caching that callsite's target in the VMContext. -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)] -pub struct CallIndirectSiteIndex(u32); -entity_impl!(CallIndirectSiteIndex); - /// An index of an entity. #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)] pub enum EntityIndex { diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 2e678f65a0a8..3c4e49b00716 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -151,8 +151,6 @@ struct ConfigTunables { debug_adapter_modules: Option, relaxed_simd_deterministic: Option, tail_callable: Option, - cache_call_indirects: Option, - max_call_indirect_cache_slots: Option, } /// User-provided configuration for the compiler. @@ -986,57 +984,6 @@ impl Config { self } - /// Configures whether we enable the "indirect call cache" optimization. - /// - /// This feature adds, for each `call_indirect` instruction in a - /// Wasm module (i.e., a function-pointer call in guest code), a - /// one-entry cache that speeds up the translation from a table - /// index to the actual machine code. By default, the VM's - /// implementation of this translation requires several - /// indirections and checks (table bounds-check, function - /// signature-check, table lazy-initialization logic). The intent - /// of this feature is to speed up indirect calls substantially - /// when they are repeated frequently in hot code. - /// - /// While it accelerates repeated calls, this feature has the - /// potential to slow down instantiation slightly, because it adds - /// additional state (the cache storage -- usually 16 bytes per - /// `call_indirect` instruction for each instance) that has to be - /// initialized. In practice, we have not seen - /// measurable/statistically-significant impact from this, though. - /// - /// Until we have further experience with this feature, it will - /// remain off: it is `false` by default. - pub fn cache_call_indirects(&mut self, enable: bool) -> &mut Self { - self.tunables.cache_call_indirects = Some(enable); - self - } - - /// Configures the "indirect call cache" maximum capacity. - /// - /// If the [`Config::cache_call_indirects`] configuration option - /// is enabled, the engine allocates "cache slots" directly in its - /// per-instance state struct for each `call_indirect` in the - /// module's code. We place a limit on this count in order to - /// avoid inflating the state too much with very large modules. If - /// a module exceeds the limit, the first `max` indirect - /// call-sites will still have a one-entry cache, but any indirect - /// call-sites beyond the limit (in linear order in the module's - /// code section) do not participate in the caching, as if the - /// option were turned off. - /// - /// There is also an internal hard cap to this limit: - /// configurations with `max` beyond `50_000` will effectively cap - /// the limit at `50_000`. This is so that instance state does not - /// become unreasonably large. - /// - /// This is `50_000` by default. - pub fn max_call_indirect_cache_slots(&mut self, max: usize) -> &mut Self { - const HARD_CAP: usize = 50_000; // See doc-comment above. - self.tunables.max_call_indirect_cache_slots = Some(core::cmp::min(max, HARD_CAP)); - self - } - /// Configures which compilation strategy will be used for wasm modules. /// /// This method can be used to configure which compiler is used for wasm @@ -1849,8 +1796,6 @@ impl Config { debug_adapter_modules relaxed_simd_deterministic tail_callable - cache_call_indirects - max_call_indirect_cache_slots } // If we're going to compile with winch, we must use the winch calling convention. diff --git a/crates/wasmtime/src/engine/serialization.rs b/crates/wasmtime/src/engine/serialization.rs index 5da038859808..f350f9286821 100644 --- a/crates/wasmtime/src/engine/serialization.rs +++ b/crates/wasmtime/src/engine/serialization.rs @@ -360,8 +360,6 @@ impl Metadata<'_> { relaxed_simd_deterministic, tail_callable, winch_callable, - cache_call_indirects, - max_call_indirect_cache_slots, // This doesn't affect compilation, it's just a runtime setting. dynamic_memory_growth_reserve: _, @@ -429,16 +427,6 @@ impl Metadata<'_> { other.winch_callable, "Winch calling convention", )?; - Self::check_bool( - cache_call_indirects, - other.cache_call_indirects, - "caching of call-indirect targets", - )?; - Self::check_int( - max_call_indirect_cache_slots, - other.max_call_indirect_cache_slots, - "maximum slot count for caching of call-indirect targets", - )?; Ok(()) } diff --git a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs index ce49c34743d5..a0bbcb08fe9c 100644 --- a/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs +++ b/crates/wasmtime/src/runtime/vm/gc/enabled/drc.rs @@ -900,7 +900,6 @@ mod tests { num_owned_memories: 0, num_defined_globals: 0, num_escaped_funcs: 0, - num_call_indirect_caches: 0, }); assert_eq!( @@ -929,7 +928,6 @@ mod tests { num_owned_memories: 0, num_defined_globals: 0, num_escaped_funcs: 0, - num_call_indirect_caches: 0, }); assert_eq!( offsets.vm_gc_ref_activation_table_next() as usize, @@ -957,7 +955,6 @@ mod tests { num_owned_memories: 0, num_defined_globals: 0, num_escaped_funcs: 0, - num_call_indirect_caches: 0, }); assert_eq!( offsets.vm_gc_ref_activation_table_end() as usize, diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index d9da984eb987..073f6d80adac 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -871,53 +871,6 @@ mod test_vmruntime_limits { } } -/// One call-indirect cache entry. -/// -/// It consists of the last observed function-pointer index, and the -/// direct code pointer (with the same vmctx, i.e., in the same -/// instance) to call if this index matches. -#[derive(Debug, Clone)] -#[allow(dead_code)] // not actually used in Rust runtime code; only in generated code. -#[repr(C)] -pub struct VMCallIndirectCache { - /// Function pointer for this funcref if being called via the Wasm - /// calling convention. - pub wasm_call: NonNull, - - /// Table index corresponding to the above function pointer. - pub index: usize, - // If more elements are added here, remember to add offset_of tests below! -} - -unsafe impl Send for VMCallIndirectCache {} -unsafe impl Sync for VMCallIndirectCache {} - -#[cfg(test)] -mod test_vm_call_indirect_cache { - use super::VMCallIndirectCache; - use core::mem::offset_of; - use std::mem::size_of; - use wasmtime_environ::{Module, PtrSize, VMOffsets}; - - #[test] - fn check_vm_call_indirect_cache_offsets() { - let module = Module::new(); - let offsets = VMOffsets::new(size_of::<*mut u8>() as u8, &module); - assert_eq!( - size_of::(), - usize::from(offsets.ptr.size_of_vmcall_indirect_cache()) - ); - assert_eq!( - offset_of!(VMCallIndirectCache, wasm_call), - usize::from(offsets.ptr.vmcall_indirect_cache_wasm_call()) - ); - assert_eq!( - offset_of!(VMCallIndirectCache, index), - usize::from(offsets.ptr.vmcall_indirect_cache_index()) - ); - } -} - /// The VM "context", which is pointed to by the `vmctx` arg in Cranelift. /// This has information about globals, memories, tables, and other runtime /// state associated with the current instance. diff --git a/tests/all/module.rs b/tests/all/module.rs index eb658f69fec1..94a56acf56cb 100644 --- a/tests/all/module.rs +++ b/tests/all/module.rs @@ -251,53 +251,6 @@ fn compile_a_component() -> Result<()> { Ok(()) } -#[test] -fn call_indirect_caching_and_memory64() -> Result<()> { - let mut config = Config::new(); - config.wasm_memory64(true); - config.cache_call_indirects(true); - let engine = Engine::new(&config)?; - Module::new( - &engine, - "(module - (memory i64 1) - (func (param i64) (result i32) - local.get 0 - i32.load offset=0x100000000 - ) - )", - )?; - Ok(()) -} - -#[test] -fn call_indirect_caching_out_of_bounds_table_index() -> Result<()> { - let mut config = Config::new(); - config.cache_call_indirects(true); - let engine = Engine::new(&config)?; - // Test an out-of-bounds table index: this is exposed to the prescan - // that call-indirect caching must perform during compilation, so we - // need to make sure the error is properly handled by the validation - // that comes later. - let err = Module::new( - &engine, - "(module - (func (param i32) - ref.null func - local.get 0 - table.set 32 ;; out-of-bounds table index - ) - )", - ) - .unwrap_err(); - let err = format!("{err:?}"); - assert!( - err.contains("table index out of bounds"), - "bad error: {err}" - ); - Ok(()) -} - #[test] fn tail_call_defaults() -> Result<()> { let wasm_with_tail_calls = "(module (func $a return_call $a))"; diff --git a/tests/disas/indirect-call-caching-exclude-0-index.wat b/tests/disas/indirect-call-caching-exclude-0-index.wat deleted file mode 100644 index c1446fb231b9..000000000000 --- a/tests/disas/indirect-call-caching-exclude-0-index.wat +++ /dev/null @@ -1,107 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y" ] - -;; This test checks that we do *not* get the indirect-call caching optimization -;; when it must not be used: in this case, because there is a non-null element -;; at index 0 in the table (`0` is the default value for vmctx struct initialization -;; and so is our "not yet cached" sentinel). - -(module - (table 10 10 funcref) - - (func $f1 (result i32) i32.const 1) - (func $f2 (result i32) i32.const 2) - (func $f3 (result i32) i32.const 3) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32)) - - (elem (i32.const 0) func $f1 $f2 $f3)) -;; function u0:0(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @003f v3 = iconst.i32 1 -;; @0041 jump block1(v3) ; v3 = 1 -;; -;; block1(v2: i32): -;; @0041 return v2 -;; } -;; -;; function u0:1(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0044 v3 = iconst.i32 2 -;; @0046 jump block1(v3) ; v3 = 2 -;; -;; block1(v2: i32): -;; @0046 return v2 -;; } -;; -;; function u0:2(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0049 v3 = iconst.i32 3 -;; @004b jump block1(v3) ; v3 = 3 -;; -;; block1(v2: i32): -;; @004b return v2 -;; } -;; -;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0050 v4 = iconst.i32 10 -;; @0050 v5 = icmp uge v2, v4 ; v4 = 10 -;; @0050 v6 = uextend.i64 v2 -;; @0050 v7 = global_value.i64 gv4 -;; @0050 v8 = ishl_imm v6, 3 -;; @0050 v9 = iadd v7, v8 -;; @0050 v10 = iconst.i64 0 -;; @0050 v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @0050 v12 = load.i64 table_oob aligned table v11 -;; @0050 v13 = band_imm v12, -2 -;; @0050 brif v12, block3(v13), block2 -;; -;; block2 cold: -;; @0050 v15 = iconst.i32 0 -;; @0050 v16 = global_value.i64 gv3 -;; @0050 v17 = call fn0(v16, v15, v2) ; v15 = 0 -;; @0050 jump block3(v17) -;; -;; block3(v14: i64): -;; @0050 v18 = global_value.i64 gv3 -;; @0050 v19 = load.i64 notrap aligned readonly v18+80 -;; @0050 v20 = load.i32 notrap aligned readonly v19 -;; @0050 v21 = load.i32 icall_null aligned readonly v14+16 -;; @0050 v22 = icmp eq v21, v20 -;; @0050 trapz v22, bad_sig -;; @0050 v23 = load.i64 notrap aligned readonly v14+8 -;; @0050 v24 = load.i64 notrap aligned readonly v14+24 -;; @0050 v25 = call_indirect sig0, v23(v24, v0) -;; @0053 jump block1(v25) -;; -;; block1(v3: i32): -;; @0053 return v3 -;; } diff --git a/tests/disas/indirect-call-caching-exclude-table-export.wat b/tests/disas/indirect-call-caching-exclude-table-export.wat deleted file mode 100644 index 1e7ee89f53c6..000000000000 --- a/tests/disas/indirect-call-caching-exclude-table-export.wat +++ /dev/null @@ -1,106 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y" ] - -;; This test checks that we do *not* get the indirect-call caching optimization -;; when it must not be used: in this case, because the table is exported so -;; could be mutated (invalidating the cache, which we would not detect). - -(module - (table (export "t") 10 10 funcref) - - (func $f1 (result i32) i32.const 1) - (func $f2 (result i32) i32.const 2) - (func $f3 (result i32) i32.const 3) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32)) - - (elem (i32.const 1) func $f1 $f2 $f3)) -;; function u0:0(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0043 v3 = iconst.i32 1 -;; @0045 jump block1(v3) ; v3 = 1 -;; -;; block1(v2: i32): -;; @0045 return v2 -;; } -;; -;; function u0:1(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0048 v3 = iconst.i32 2 -;; @004a jump block1(v3) ; v3 = 2 -;; -;; block1(v2: i32): -;; @004a return v2 -;; } -;; -;; function u0:2(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @004d v3 = iconst.i32 3 -;; @004f jump block1(v3) ; v3 = 3 -;; -;; block1(v2: i32): -;; @004f return v2 -;; } -;; -;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0054 v4 = iconst.i32 10 -;; @0054 v5 = icmp uge v2, v4 ; v4 = 10 -;; @0054 v6 = uextend.i64 v2 -;; @0054 v7 = global_value.i64 gv4 -;; @0054 v8 = ishl_imm v6, 3 -;; @0054 v9 = iadd v7, v8 -;; @0054 v10 = iconst.i64 0 -;; @0054 v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @0054 v12 = load.i64 table_oob aligned table v11 -;; @0054 v13 = band_imm v12, -2 -;; @0054 brif v12, block3(v13), block2 -;; -;; block2 cold: -;; @0054 v15 = iconst.i32 0 -;; @0054 v16 = global_value.i64 gv3 -;; @0054 v17 = call fn0(v16, v15, v2) ; v15 = 0 -;; @0054 jump block3(v17) -;; -;; block3(v14: i64): -;; @0054 v18 = global_value.i64 gv3 -;; @0054 v19 = load.i64 notrap aligned readonly v18+80 -;; @0054 v20 = load.i32 notrap aligned readonly v19 -;; @0054 v21 = load.i32 icall_null aligned readonly v14+16 -;; @0054 v22 = icmp eq v21, v20 -;; @0054 trapz v22, bad_sig -;; @0054 v23 = load.i64 notrap aligned readonly v14+8 -;; @0054 v24 = load.i64 notrap aligned readonly v14+24 -;; @0054 v25 = call_indirect sig0, v23(v24, v0) -;; @0057 jump block1(v25) -;; -;; block1(v3: i32): -;; @0057 return v3 -;; } diff --git a/tests/disas/indirect-call-caching-exclude-table-writes.wat b/tests/disas/indirect-call-caching-exclude-table-writes.wat deleted file mode 100644 index 387da41b742c..000000000000 --- a/tests/disas/indirect-call-caching-exclude-table-writes.wat +++ /dev/null @@ -1,138 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y" ] - -;; This test checks that we do *not* get the indirect-call caching optimization -;; when it must not be used: in this case, because the table is updated with a -;; `table.set` instruction (invalidating the cache, which we would not detect). - -(module - (table 10 10 funcref) - - (func $f1 (result i32) i32.const 1) - (func $f2 (result i32) i32.const 2) - (func $f3 (result i32) i32.const 3) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32)) - - (func (export "update_table") - i32.const 1 - ref.null func - table.set) - - (elem (i32.const 1) func $f1 $f2 $f3)) -;; function u0:0(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0052 v3 = iconst.i32 1 -;; @0054 jump block1(v3) ; v3 = 1 -;; -;; block1(v2: i32): -;; @0054 return v2 -;; } -;; -;; function u0:1(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0057 v3 = iconst.i32 2 -;; @0059 jump block1(v3) ; v3 = 2 -;; -;; block1(v2: i32): -;; @0059 return v2 -;; } -;; -;; function u0:2(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @005c v3 = iconst.i32 3 -;; @005e jump block1(v3) ; v3 = 3 -;; -;; block1(v2: i32): -;; @005e return v2 -;; } -;; -;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0063 v4 = iconst.i32 10 -;; @0063 v5 = icmp uge v2, v4 ; v4 = 10 -;; @0063 v6 = uextend.i64 v2 -;; @0063 v7 = global_value.i64 gv4 -;; @0063 v8 = ishl_imm v6, 3 -;; @0063 v9 = iadd v7, v8 -;; @0063 v10 = iconst.i64 0 -;; @0063 v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @0063 v12 = load.i64 table_oob aligned table v11 -;; @0063 v13 = band_imm v12, -2 -;; @0063 brif v12, block3(v13), block2 -;; -;; block2 cold: -;; @0063 v15 = iconst.i32 0 -;; @0063 v16 = global_value.i64 gv3 -;; @0063 v17 = call fn0(v16, v15, v2) ; v15 = 0 -;; @0063 jump block3(v17) -;; -;; block3(v14: i64): -;; @0063 v18 = global_value.i64 gv3 -;; @0063 v19 = load.i64 notrap aligned readonly v18+80 -;; @0063 v20 = load.i32 notrap aligned readonly v19 -;; @0063 v21 = load.i32 icall_null aligned readonly v14+16 -;; @0063 v22 = icmp eq v21, v20 -;; @0063 trapz v22, bad_sig -;; @0063 v23 = load.i64 notrap aligned readonly v14+8 -;; @0063 v24 = load.i64 notrap aligned readonly v14+24 -;; @0063 v25 = call_indirect sig0, v23(v24, v0) -;; @0066 jump block1(v25) -;; -;; block1(v3: i32): -;; @0066 return v3 -;; } -;; -;; function u0:4(i64 vmctx, i64) tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0069 v2 = iconst.i32 1 -;; @006b v3 = iconst.i64 0 -;; @006d v4 = iconst.i32 10 -;; @006d v5 = icmp uge v2, v4 ; v2 = 1, v4 = 10 -;; @006d v6 = uextend.i64 v2 ; v2 = 1 -;; @006d v7 = global_value.i64 gv4 -;; @006d v8 = ishl_imm v6, 3 -;; @006d v9 = iadd v7, v8 -;; @006d v10 = iconst.i64 0 -;; @006d v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @006d v12 = bor_imm v3, 1 ; v3 = 0 -;; @006d store table_oob aligned table v12, v11 -;; @006f jump block1 -;; -;; block1: -;; @006f return -;; } diff --git a/tests/disas/indirect-call-caching-slot-limit-1.wat b/tests/disas/indirect-call-caching-slot-limit-1.wat deleted file mode 100644 index cedf4aaf4e6a..000000000000 --- a/tests/disas/indirect-call-caching-slot-limit-1.wat +++ /dev/null @@ -1,157 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y", "-Omax-call-indirect-cache-slots=2" ] - -;; This test checks that we properly bound the number of call-indirect -;; cache slots. The first case (here) is when the limit falls in the -;; middle of a function. We set the limit to 2 above; we have 3 -;; `call_indirect`s below; the last should not have caching code. -;; -;; In particular, below we see the cache probe sequence in block0 -;; (first) and block3 (second); but the third call, starting in -;; block8, has no cache slot access and just performs the checks -;; unconditionally. - -(module - (table 10 10 funcref) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32) - call_indirect (result i32) - call_indirect (result i32))) - -;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0033 v4 = global_value.i64 gv3 -;; @0033 v5 = iadd_imm v4, 144 -;; @0033 v6 = load.i32 notrap aligned v5+8 -;; @0033 v7 = load.i64 notrap aligned v5 -;; @0033 v8 = icmp eq v6, v2 -;; @0033 brif v8, block3(v7, v4), block2 -;; -;; block2 cold: -;; @0033 v9 = iconst.i32 10 -;; @0033 v10 = icmp.i32 uge v2, v9 ; v9 = 10 -;; @0033 v11 = uextend.i64 v2 -;; @0033 v12 = global_value.i64 gv4 -;; @0033 v13 = ishl_imm v11, 3 -;; @0033 v14 = iadd v12, v13 -;; @0033 v15 = iconst.i64 0 -;; @0033 v16 = select_spectre_guard v10, v15, v14 ; v15 = 0 -;; @0033 v17 = load.i64 table_oob aligned table v16 -;; @0033 v18 = band_imm v17, -2 -;; @0033 brif v17, block6(v18), block5 -;; -;; block4 cold: -;; @0033 store.i32 notrap aligned v2, v5+8 -;; @0033 store.i64 notrap aligned v28, v5 -;; @0033 jump block3(v28, v29) -;; -;; block3(v31: i64, v32: i64): -;; @0033 v33 = call_indirect sig0, v31(v32, v0) -;; @0036 v34 = global_value.i64 gv3 -;; @0036 v35 = iadd_imm v34, 160 -;; @0036 v36 = load.i32 notrap aligned v35+8 -;; @0036 v37 = load.i64 notrap aligned v35 -;; @0036 v38 = icmp eq v36, v33 -;; @0036 brif v38, block8(v37, v34), block7 -;; -;; block7 cold: -;; @0036 v39 = iconst.i32 10 -;; @0036 v40 = icmp.i32 uge v33, v39 ; v39 = 10 -;; @0036 v41 = uextend.i64 v33 -;; @0036 v42 = global_value.i64 gv4 -;; @0036 v43 = ishl_imm v41, 3 -;; @0036 v44 = iadd v42, v43 -;; @0036 v45 = iconst.i64 0 -;; @0036 v46 = select_spectre_guard v40, v45, v44 ; v45 = 0 -;; @0036 v47 = load.i64 table_oob aligned table v46 -;; @0036 v48 = band_imm v47, -2 -;; @0036 brif v47, block11(v48), block10 -;; -;; block9 cold: -;; @0036 store.i32 notrap aligned v33, v35+8 -;; @0036 store.i64 notrap aligned v58, v35 -;; @0036 jump block8(v58, v59) -;; -;; block8(v61: i64, v62: i64): -;; @0036 v63 = call_indirect sig0, v61(v62, v0) -;; @0039 v64 = iconst.i32 10 -;; @0039 v65 = icmp uge v63, v64 ; v64 = 10 -;; @0039 v66 = uextend.i64 v63 -;; @0039 v67 = global_value.i64 gv4 -;; @0039 v68 = ishl_imm v66, 3 -;; @0039 v69 = iadd v67, v68 -;; @0039 v70 = iconst.i64 0 -;; @0039 v71 = select_spectre_guard v65, v70, v69 ; v70 = 0 -;; @0039 v72 = load.i64 table_oob aligned table v71 -;; @0039 v73 = band_imm v72, -2 -;; @0039 brif v72, block13(v73), block12 -;; -;; block5 cold: -;; @0033 v20 = iconst.i32 0 -;; @0033 v21 = global_value.i64 gv3 -;; @0033 v22 = call fn0(v21, v20, v2) ; v20 = 0 -;; @0033 jump block6(v22) -;; -;; block6(v19: i64) cold: -;; @0033 v23 = global_value.i64 gv3 -;; @0033 v24 = load.i64 notrap aligned readonly v23+80 -;; @0033 v25 = load.i32 notrap aligned readonly v24+4 -;; @0033 v26 = load.i32 icall_null aligned readonly v19+16 -;; @0033 v27 = icmp eq v26, v25 -;; @0033 trapz v27, bad_sig -;; @0033 v28 = load.i64 notrap aligned readonly v19+8 -;; @0033 v29 = load.i64 notrap aligned readonly v19+24 -;; @0033 v30 = icmp eq v29, v4 -;; @0033 brif v30, block4, block3(v28, v29) -;; -;; block10 cold: -;; @0036 v50 = iconst.i32 0 -;; @0036 v51 = global_value.i64 gv3 -;; @0036 v52 = call fn0(v51, v50, v33) ; v50 = 0 -;; @0036 jump block11(v52) -;; -;; block11(v49: i64) cold: -;; @0036 v53 = global_value.i64 gv3 -;; @0036 v54 = load.i64 notrap aligned readonly v53+80 -;; @0036 v55 = load.i32 notrap aligned readonly v54+4 -;; @0036 v56 = load.i32 icall_null aligned readonly v49+16 -;; @0036 v57 = icmp eq v56, v55 -;; @0036 trapz v57, bad_sig -;; @0036 v58 = load.i64 notrap aligned readonly v49+8 -;; @0036 v59 = load.i64 notrap aligned readonly v49+24 -;; @0036 v60 = icmp eq v59, v34 -;; @0036 brif v60, block9, block8(v58, v59) -;; -;; block12 cold: -;; @0039 v75 = iconst.i32 0 -;; @0039 v76 = global_value.i64 gv3 -;; @0039 v77 = call fn0(v76, v75, v63) ; v75 = 0 -;; @0039 jump block13(v77) -;; -;; block13(v74: i64): -;; @0039 v78 = global_value.i64 gv3 -;; @0039 v79 = load.i64 notrap aligned readonly v78+80 -;; @0039 v80 = load.i32 notrap aligned readonly v79+4 -;; @0039 v81 = load.i32 icall_null aligned readonly v74+16 -;; @0039 v82 = icmp eq v81, v80 -;; @0039 trapz v82, bad_sig -;; @0039 v83 = load.i64 notrap aligned readonly v74+8 -;; @0039 v84 = load.i64 notrap aligned readonly v74+24 -;; @0039 v85 = call_indirect sig0, v83(v84, v0) -;; @003c jump block1(v85) -;; -;; block1(v3: i32): -;; @003c return v3 -;; } diff --git a/tests/disas/indirect-call-caching-slot-limit-2.wat b/tests/disas/indirect-call-caching-slot-limit-2.wat deleted file mode 100644 index 719d5e68b7c3..000000000000 --- a/tests/disas/indirect-call-caching-slot-limit-2.wat +++ /dev/null @@ -1,180 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y", "-Omax-call-indirect-cache-slots=2" ] - -;; This test checks that we properly bound the number of call-indirect -;; cache slots. The second case (here) is when the limit falls -;; entirely before a function. We set the limit to 2 above; we have 2 -;; callsites in the first function; the second function should have no -;; caching. -;; -;; In particular, below we see the cache probe sequence in block0 -;; (first) and block3 (second) in `u0:0` (`call_it`); but the call in -;; the second function in `u0:1` (`call_it_2`), starting in block0 in -;; that function, has no cache slot access and just performs the -;; checks unconditionally. - -(module - (table 10 10 funcref) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32) - call_indirect (result i32)) - - (func (export "call_it_2") (param i32) (result i32) - local.get 0 - call_indirect (result i32))) - -;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0040 v4 = global_value.i64 gv3 -;; @0040 v5 = iadd_imm v4, 176 -;; @0040 v6 = load.i32 notrap aligned v5+8 -;; @0040 v7 = load.i64 notrap aligned v5 -;; @0040 v8 = icmp eq v6, v2 -;; @0040 brif v8, block3(v7, v4), block2 -;; -;; block2 cold: -;; @0040 v9 = iconst.i32 10 -;; @0040 v10 = icmp.i32 uge v2, v9 ; v9 = 10 -;; @0040 v11 = uextend.i64 v2 -;; @0040 v12 = global_value.i64 gv4 -;; @0040 v13 = ishl_imm v11, 3 -;; @0040 v14 = iadd v12, v13 -;; @0040 v15 = iconst.i64 0 -;; @0040 v16 = select_spectre_guard v10, v15, v14 ; v15 = 0 -;; @0040 v17 = load.i64 table_oob aligned table v16 -;; @0040 v18 = band_imm v17, -2 -;; @0040 brif v17, block6(v18), block5 -;; -;; block4 cold: -;; @0040 store.i32 notrap aligned v2, v5+8 -;; @0040 store.i64 notrap aligned v28, v5 -;; @0040 jump block3(v28, v29) -;; -;; block3(v31: i64, v32: i64): -;; @0040 v33 = call_indirect sig0, v31(v32, v0) -;; @0043 v34 = global_value.i64 gv3 -;; @0043 v35 = iadd_imm v34, 192 -;; @0043 v36 = load.i32 notrap aligned v35+8 -;; @0043 v37 = load.i64 notrap aligned v35 -;; @0043 v38 = icmp eq v36, v33 -;; @0043 brif v38, block8(v37, v34), block7 -;; -;; block7 cold: -;; @0043 v39 = iconst.i32 10 -;; @0043 v40 = icmp.i32 uge v33, v39 ; v39 = 10 -;; @0043 v41 = uextend.i64 v33 -;; @0043 v42 = global_value.i64 gv4 -;; @0043 v43 = ishl_imm v41, 3 -;; @0043 v44 = iadd v42, v43 -;; @0043 v45 = iconst.i64 0 -;; @0043 v46 = select_spectre_guard v40, v45, v44 ; v45 = 0 -;; @0043 v47 = load.i64 table_oob aligned table v46 -;; @0043 v48 = band_imm v47, -2 -;; @0043 brif v47, block11(v48), block10 -;; -;; block9 cold: -;; @0043 store.i32 notrap aligned v33, v35+8 -;; @0043 store.i64 notrap aligned v58, v35 -;; @0043 jump block8(v58, v59) -;; -;; block8(v61: i64, v62: i64): -;; @0043 v63 = call_indirect sig0, v61(v62, v0) -;; @0046 jump block1(v63) -;; -;; block5 cold: -;; @0040 v20 = iconst.i32 0 -;; @0040 v21 = global_value.i64 gv3 -;; @0040 v22 = call fn0(v21, v20, v2) ; v20 = 0 -;; @0040 jump block6(v22) -;; -;; block6(v19: i64) cold: -;; @0040 v23 = global_value.i64 gv3 -;; @0040 v24 = load.i64 notrap aligned readonly v23+80 -;; @0040 v25 = load.i32 notrap aligned readonly v24+4 -;; @0040 v26 = load.i32 icall_null aligned readonly v19+16 -;; @0040 v27 = icmp eq v26, v25 -;; @0040 trapz v27, bad_sig -;; @0040 v28 = load.i64 notrap aligned readonly v19+8 -;; @0040 v29 = load.i64 notrap aligned readonly v19+24 -;; @0040 v30 = icmp eq v29, v4 -;; @0040 brif v30, block4, block3(v28, v29) -;; -;; block10 cold: -;; @0043 v50 = iconst.i32 0 -;; @0043 v51 = global_value.i64 gv3 -;; @0043 v52 = call fn0(v51, v50, v33) ; v50 = 0 -;; @0043 jump block11(v52) -;; -;; block11(v49: i64) cold: -;; @0043 v53 = global_value.i64 gv3 -;; @0043 v54 = load.i64 notrap aligned readonly v53+80 -;; @0043 v55 = load.i32 notrap aligned readonly v54+4 -;; @0043 v56 = load.i32 icall_null aligned readonly v49+16 -;; @0043 v57 = icmp eq v56, v55 -;; @0043 trapz v57, bad_sig -;; @0043 v58 = load.i64 notrap aligned readonly v49+8 -;; @0043 v59 = load.i64 notrap aligned readonly v49+24 -;; @0043 v60 = icmp eq v59, v34 -;; @0043 brif v60, block9, block8(v58, v59) -;; -;; block1(v3: i32): -;; @0046 return v3 -;; } -;; -;; function u0:1(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @004b v4 = iconst.i32 10 -;; @004b v5 = icmp uge v2, v4 ; v4 = 10 -;; @004b v6 = uextend.i64 v2 -;; @004b v7 = global_value.i64 gv4 -;; @004b v8 = ishl_imm v6, 3 -;; @004b v9 = iadd v7, v8 -;; @004b v10 = iconst.i64 0 -;; @004b v11 = select_spectre_guard v5, v10, v9 ; v10 = 0 -;; @004b v12 = load.i64 table_oob aligned table v11 -;; @004b v13 = band_imm v12, -2 -;; @004b brif v12, block3(v13), block2 -;; -;; block2 cold: -;; @004b v15 = iconst.i32 0 -;; @004b v16 = global_value.i64 gv3 -;; @004b v17 = call fn0(v16, v15, v2) ; v15 = 0 -;; @004b jump block3(v17) -;; -;; block3(v14: i64): -;; @004b v18 = global_value.i64 gv3 -;; @004b v19 = load.i64 notrap aligned readonly v18+80 -;; @004b v20 = load.i32 notrap aligned readonly v19+4 -;; @004b v21 = load.i32 icall_null aligned readonly v14+16 -;; @004b v22 = icmp eq v21, v20 -;; @004b trapz v22, bad_sig -;; @004b v23 = load.i64 notrap aligned readonly v14+8 -;; @004b v24 = load.i64 notrap aligned readonly v14+24 -;; @004b v25 = call_indirect sig0, v23(v24, v0) -;; @004e jump block1(v25) -;; -;; block1(v3: i32): -;; @004e return v3 -;; } diff --git a/tests/disas/indirect-call-caching.wat b/tests/disas/indirect-call-caching.wat deleted file mode 100644 index a32015292b62..000000000000 --- a/tests/disas/indirect-call-caching.wat +++ /dev/null @@ -1,125 +0,0 @@ -;;! target = "x86_64" -;;! flags = [ "-Ocache-call-indirects=y" ] - -;; This test checks that we get the indirect-call caching optimization -;; where it should be applicable (immutable table, null 0-index). -;; -;; The key bit in the expectation below is the cached-index load (v6), -;; compare (v7), branch, fastpath in block2/block4. - -(module - (table 10 10 funcref) - - (func $f1 (result i32) i32.const 1) - (func $f2 (result i32) i32.const 2) - (func $f3 (result i32) i32.const 3) - - (func (export "call_it") (param i32) (result i32) - local.get 0 - call_indirect (result i32)) - - (elem (i32.const 1) func $f1 $f2 $f3)) -;; function u0:0(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @003f v3 = iconst.i32 1 -;; @0041 jump block1(v3) ; v3 = 1 -;; -;; block1(v2: i32): -;; @0041 return v2 -;; } -;; -;; function u0:1(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0044 v3 = iconst.i32 2 -;; @0046 jump block1(v3) ; v3 = 2 -;; -;; block1(v2: i32): -;; @0046 return v2 -;; } -;; -;; function u0:2(i64 vmctx, i64) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64): -;; @0049 v3 = iconst.i32 3 -;; @004b jump block1(v3) ; v3 = 3 -;; -;; block1(v2: i32): -;; @004b return v2 -;; } -;; -;; function u0:3(i64 vmctx, i64, i32) -> i32 tail { -;; gv0 = vmctx -;; gv1 = load.i64 notrap aligned readonly gv0+8 -;; gv2 = load.i64 notrap aligned gv1 -;; gv3 = vmctx -;; gv4 = load.i64 notrap aligned readonly gv3+88 -;; sig0 = (i64 vmctx, i64) -> i32 tail -;; sig1 = (i64 vmctx, i32 uext, i32 uext) -> i64 system_v -;; fn0 = colocated u1:9 sig1 -;; stack_limit = gv2 -;; -;; block0(v0: i64, v1: i64, v2: i32): -;; @0050 v4 = global_value.i64 gv3 -;; @0050 v5 = iadd_imm v4, 240 -;; @0050 v6 = load.i32 notrap aligned v5+8 -;; @0050 v7 = load.i64 notrap aligned v5 -;; @0050 v8 = icmp eq v6, v2 -;; @0050 brif v8, block3(v7, v4), block2 -;; -;; block2 cold: -;; @0050 v9 = iconst.i32 10 -;; @0050 v10 = icmp.i32 uge v2, v9 ; v9 = 10 -;; @0050 v11 = uextend.i64 v2 -;; @0050 v12 = global_value.i64 gv4 -;; @0050 v13 = ishl_imm v11, 3 -;; @0050 v14 = iadd v12, v13 -;; @0050 v15 = iconst.i64 0 -;; @0050 v16 = select_spectre_guard v10, v15, v14 ; v15 = 0 -;; @0050 v17 = load.i64 table_oob aligned table v16 -;; @0050 v18 = band_imm v17, -2 -;; @0050 brif v17, block6(v18), block5 -;; -;; block4 cold: -;; @0050 store.i32 notrap aligned v2, v5+8 -;; @0050 store.i64 notrap aligned v28, v5 -;; @0050 jump block3(v28, v29) -;; -;; block3(v31: i64, v32: i64): -;; @0050 v33 = call_indirect sig0, v31(v32, v0) -;; @0053 jump block1(v33) -;; -;; block5 cold: -;; @0050 v20 = iconst.i32 0 -;; @0050 v21 = global_value.i64 gv3 -;; @0050 v22 = call fn0(v21, v20, v2) ; v20 = 0 -;; @0050 jump block6(v22) -;; -;; block6(v19: i64) cold: -;; @0050 v23 = global_value.i64 gv3 -;; @0050 v24 = load.i64 notrap aligned readonly v23+80 -;; @0050 v25 = load.i32 notrap aligned readonly v24 -;; @0050 v26 = load.i32 icall_null aligned readonly v19+16 -;; @0050 v27 = icmp eq v26, v25 -;; @0050 trapz v27, bad_sig -;; @0050 v28 = load.i64 notrap aligned readonly v19+8 -;; @0050 v29 = load.i64 notrap aligned readonly v19+24 -;; @0050 v30 = icmp eq v29, v4 -;; @0050 brif v30, block4, block3(v28, v29) -;; -;; block1(v3: i32): -;; @0053 return v3 -;; }