From a787801f0dacb81383518e19642ddd49a3c504f1 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Fri, 22 Sep 2023 09:08:46 -0700 Subject: [PATCH] Make `wasmtime::WasmCoreDump` serializable This commit makes it so that the library type for core dumps is serializable into the standard binary format for core dumps. Additionally, this commit makes it so that we use the library type for generating core dumps in the CLI. We previously were using a one-off implementation of core dump generation that only had backtrace information and no instances, modules, globals, or memories included. The library type has all that information, so the core dumps produced by our CLI will both be more featureful and be generated by shared code paths going forward. Along the way, implementing all this required some new helper methods sprinkled throughout `wasmtime` and `wasmtime-runtime`: * `wasmtime::Instance::module`: get the module that a `wasmtime::Instance` is an instance of. This is public, since it seems generally useful. This involved adding a new return value from `ModuleRegistry::register_module` that is an identifier that can be used to recover a reference to the registered module. * `wasmtime::Instance::all_{globals,memories}`: get the full global/memory index space. I made these `pub(crate)` out of caution. I don't think we want to commit to exposing non-exported things in the public API, even if we internally need them for debugging-related features like core dumps. These also needed corresponding methods inside `wasmtime-runtime`. * `wasmtime::{Global,Memory}::hash_key`: this was needed to work around the fact that each time you call `{Global,Memory}::from_wasmtime`, it creates a new entry in the `StoreData` and so you can get duplicates. But we need to key some hash maps on globals and memories when constructing core dumps, so we can't treat the underlying `Stored` as a hash key because it isn't stable across duplicate `StoreData` entries. So we have these new methods. They are only `pub(crate)`, are definitely implementation details, and aren't exposed in the public API. * `wasmtime::FrameInfo::module`: Each frame in a backtrace now keeps a handle to its associated module instead of just the name. This is publicly exposed because it seems generally useful. This means I also deprecated `wasmtime::FrameInfo::module_name` since you can now instead do `frame.module().name()` to get that exact same info. I updated callers inside the repo. --- crates/cli-flags/src/lib.rs | 3 + crates/runtime/src/instance.rs | 63 +++++++- crates/wasmtime/src/coredump.rs | 195 ++++++++++++++++++++++++- crates/wasmtime/src/externals.rs | 9 ++ crates/wasmtime/src/instance.rs | 56 ++++++- crates/wasmtime/src/memory.rs | 9 ++ crates/wasmtime/src/module.rs | 16 +- crates/wasmtime/src/module/registry.rs | 56 +++++-- crates/wasmtime/src/store.rs | 54 +++++-- crates/wasmtime/src/trap.rs | 46 +++--- src/commands/run.rs | 76 ++++------ tests/all/component_model/import.rs | 8 +- tests/all/coredump.rs | 5 + tests/all/traps.rs | 10 +- 14 files changed, 500 insertions(+), 106 deletions(-) diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 9df8c1147ab8..6e53576d3ee4 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -350,6 +350,9 @@ impl CommonOptions { if let Some(enable) = self.debug.debug_info { config.debug_info(enable); } + if self.debug.coredump.is_some() { + config.coredump_on_trap(true); + } if let Some(level) = self.opts.opt_level { config.cranelift_opt_level(level); } diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs index 29c91450501e..7ec1470af249 100644 --- a/crates/runtime/src/instance.rs +++ b/crates/runtime/src/instance.rs @@ -384,6 +384,28 @@ impl Instance { } } + /// Get all globals within this instance. + /// + /// Returns both import and defined globals. + /// + /// Returns both exported and non-exported globals. + /// + /// Gives access to the full globals space. + pub fn all_globals<'a>( + &'a mut self, + ) -> impl ExactSizeIterator + 'a { + let module = self.module().clone(); + module.globals.keys().map(move |idx| { + ( + idx, + ExportGlobal { + definition: self.defined_or_imported_global_ptr(idx), + global: self.module().globals[idx], + }, + ) + }) + } + /// Get the globals defined in this instance (not imported). pub fn defined_globals<'a>( &'a mut self, @@ -1350,14 +1372,43 @@ impl InstanceHandle { self.instance_mut().get_table_with_lazy_init(index, range) } - /// Return the memories defined in this instance (not imported). - pub fn defined_memories<'a>(&'a mut self) -> impl ExactSizeIterator + 'a { - let idxs = (0..self.module().memory_plans.len()) - .skip(self.module().num_imported_memories) + /// Get all memories within this instance. + /// + /// Returns both import and defined memories. + /// + /// Returns both exported and non-exported memories. + /// + /// Gives access to the full memories space. + pub fn all_memories<'a>( + &'a mut self, + ) -> impl ExactSizeIterator + 'a { + let indices = (0..self.module().memory_plans.len()) .map(|i| MemoryIndex::new(i)) .collect::>(); - idxs.into_iter() - .map(|memidx| self.get_exported_memory(memidx)) + indices + .into_iter() + .map(|i| (i, self.get_exported_memory(i))) + } + + /// Return the memories defined in this instance (not imported). + pub fn defined_memories<'a>(&'a mut self) -> impl ExactSizeIterator + 'a { + let num_imported = self.module().num_imported_memories; + self.all_memories() + .skip(num_imported) + .map(|(_i, memory)| memory) + } + + /// Get all globals within this instance. + /// + /// Returns both import and defined globals. + /// + /// Returns both exported and non-exported globals. + /// + /// Gives access to the full globals space. + pub fn all_globals<'a>( + &'a mut self, + ) -> impl ExactSizeIterator + 'a { + self.instance_mut().all_globals() } /// Get the globals defined in this instance (not imported). diff --git a/crates/wasmtime/src/coredump.rs b/crates/wasmtime/src/coredump.rs index ad4d22363eba..fcea02aece83 100644 --- a/crates/wasmtime/src/coredump.rs +++ b/crates/wasmtime/src/coredump.rs @@ -1,6 +1,9 @@ -use std::fmt; +use std::{collections::HashMap, fmt}; -use crate::{store::StoreOpaque, FrameInfo, Global, Instance, Memory, Module, WasmBacktrace}; +use crate::{ + store::StoreOpaque, AsContextMut, FrameInfo, Global, Instance, Memory, Module, StoreContextMut, + WasmBacktrace, +}; /// Representation of a core dump of a WebAssembly module /// @@ -80,6 +83,194 @@ impl WasmCoreDump { pub fn memories(&self) -> &[Memory] { self.memories.as_ref() } + + /// Serialize this core dump into [the standard core dump binary + /// format][spec]. + /// + /// Once serialized, you can write this core dump do disk, send it over the + /// network, or pass it to other debugging tools that consume Wasm core + /// dumps. + /// + /// [spec]: https://github.com/WebAssembly/tool-conventions/blob/main/Coredump.md + pub fn serialize(&self, mut store: impl AsContextMut, name: &str) -> Vec { + let store = store.as_context_mut(); + self._serialize(store, name) + } + + fn _serialize(&self, mut store: StoreContextMut<'_, T>, name: &str) -> Vec { + let mut core_dump = wasm_encoder::Module::new(); + + core_dump.section(&wasm_encoder::CoreDumpSection::new(name)); + + // A map from each memory to its index in the core dump's memories + // section. + let mut memory_to_idx = HashMap::new(); + + let mut data = wasm_encoder::DataSection::new(); + + { + let mut memories = wasm_encoder::MemorySection::new(); + for mem in self.memories() { + let memory_idx = memories.len(); + memory_to_idx.insert(mem.hash_key(&store.0), memory_idx); + let ty = mem.ty(&store); + memories.memory(wasm_encoder::MemoryType { + minimum: mem.size(&store), + maximum: ty.maximum(), + memory64: ty.is_64(), + shared: ty.is_shared(), + }); + + // Attach the memory data, balancing number of data segments and + // binary size. We don't want to attach the whole memory in one + // big segment, since it likely contains a bunch of large runs + // of zeroes. But we can't encode the data without any potential + // runs of zeroes (i.e. including only non-zero data in our + // segments) because we can run up against the implementation + // limits for number of segments in a Wasm module this way. So + // to balance these conflicting desires, we break the memory up + // into reasonably-sized chunks and then trim runs of zeroes + // from the start and end of each chunk. + const CHUNK_SIZE: u32 = 4096; + for (i, chunk) in mem + .data(&store) + .chunks_exact(CHUNK_SIZE as usize) + .enumerate() + { + if let Some(start) = chunk.iter().position(|byte| *byte != 0) { + let end = chunk.iter().rposition(|byte| *byte != 0).unwrap() + 1; + let offset = (i as u32) * CHUNK_SIZE + (start as u32); + let offset = wasm_encoder::ConstExpr::i32_const(offset as i32); + data.active(memory_idx, &offset, chunk[start..end].iter().copied()); + } + } + } + core_dump.section(&memories); + } + + // A map from each global to its index in the core dump's globals + // section. + let mut global_to_idx = HashMap::new(); + + { + let mut globals = wasm_encoder::GlobalSection::new(); + for g in self.globals() { + global_to_idx.insert(g.hash_key(&store.0), globals.len()); + let ty = g.ty(&store); + let mutable = matches!(ty.mutability(), crate::Mutability::Var); + let val_type = match ty.content() { + crate::ValType::I32 => wasm_encoder::ValType::I32, + crate::ValType::I64 => wasm_encoder::ValType::I64, + crate::ValType::F32 => wasm_encoder::ValType::F32, + crate::ValType::F64 => wasm_encoder::ValType::F64, + crate::ValType::V128 => wasm_encoder::ValType::V128, + crate::ValType::FuncRef => wasm_encoder::ValType::FUNCREF, + crate::ValType::ExternRef => wasm_encoder::ValType::EXTERNREF, + }; + let init = match g.get(&mut store) { + crate::Val::I32(x) => wasm_encoder::ConstExpr::i32_const(x), + crate::Val::I64(x) => wasm_encoder::ConstExpr::i64_const(x), + crate::Val::F32(x) => { + wasm_encoder::ConstExpr::f32_const(unsafe { std::mem::transmute(x) }) + } + crate::Val::F64(x) => { + wasm_encoder::ConstExpr::f64_const(unsafe { std::mem::transmute(x) }) + } + crate::Val::V128(x) => wasm_encoder::ConstExpr::v128_const(x.as_u128() as i128), + crate::Val::FuncRef(_) => { + wasm_encoder::ConstExpr::ref_null(wasm_encoder::HeapType::Func) + } + crate::Val::ExternRef(_) => { + wasm_encoder::ConstExpr::ref_null(wasm_encoder::HeapType::Extern) + } + }; + globals.global(wasm_encoder::GlobalType { val_type, mutable }, &init); + } + core_dump.section(&globals); + } + + core_dump.section(&data); + drop(data); + + // A map from module id to its index within the core dump's modules + // section. + let mut module_to_index = HashMap::new(); + + { + let mut modules = wasm_encoder::CoreDumpModulesSection::new(); + for module in self.modules() { + module_to_index.insert(module.id(), modules.len()); + match module.name() { + Some(name) => modules.module(name), + None => modules.module(&format!("", modules.len())), + }; + } + core_dump.section(&modules); + } + + // TODO: We can't currently recover instances from stack frames. We can + // recover module via the frame's PC, but if there are multiple + // instances of the same module, we don't know which instance the frame + // is associated with. Therefore, we do a best effort job: remember the + // last instance of each module and always choose that one. We record + // that information here. + let mut module_to_instance = HashMap::new(); + + { + let mut instances = wasm_encoder::CoreDumpInstancesSection::new(); + for instance in self.instances() { + let module = instance.module(&store); + module_to_instance.insert(module.id(), instances.len()); + + let module_index = module_to_index[&module.id()]; + + let memories = instance + .all_memories(&mut store.0) + .collect::>() + .into_iter() + .map(|(_i, memory)| memory_to_idx[&memory.hash_key(&store.0)]) + .collect::>(); + + let globals = instance + .all_globals(&mut store.0) + .collect::>() + .into_iter() + .map(|(_i, global)| global_to_idx[&global.hash_key(&store.0)]) + .collect::>(); + + instances.instance(module_index, memories, globals); + } + core_dump.section(&instances); + } + + { + let thread_name = "main"; + let mut stack = wasm_encoder::CoreDumpStackSection::new(thread_name); + for frame in self.frames() { + // This isn't necessarily the right instance if there are + // multiple instances of the same module. See comment above + // `module_to_instance` for details. + let instance = module_to_instance[&frame.module().id()]; + + let func = frame.func_index(); + + let offset = frame + .func_offset() + .and_then(|o| u32::try_from(o).ok()) + .unwrap_or(0); + + // We can't currently recover locals and the operand stack. We + // should eventually be able to do that with Winch though. + let locals = []; + let operand_stack = []; + + stack.frame(instance, func, offset, locals, operand_stack); + } + core_dump.section(&stack); + } + + core_dump.finish() + } } impl fmt::Display for WasmCoreDump { diff --git a/crates/wasmtime/src/externals.rs b/crates/wasmtime/src/externals.rs index a43b90338a91..238a7ccf4516 100644 --- a/crates/wasmtime/src/externals.rs +++ b/crates/wasmtime/src/externals.rs @@ -349,6 +349,15 @@ impl Global { from: store[self.0].definition, } } + + /// Get a stable hash key for this global. + /// + /// Even if the same underlying global definition is added to the + /// `StoreData` multiple times and becomes multiple `wasmtime::Global`s, + /// this hash key will be consistent across all of these globals. + pub(crate) fn hash_key(&self, store: &StoreOpaque) -> impl std::hash::Hash + Eq { + store[self.0].definition as usize + } } /// A WebAssembly `table`, or an array of values. diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs index 95a0d4d3c0cd..65c452204041 100644 --- a/crates/wasmtime/src/instance.rs +++ b/crates/wasmtime/src/instance.rs @@ -2,7 +2,7 @@ use crate::linker::{Definition, DefinitionType}; use crate::store::{InstanceId, StoreOpaque, Stored}; use crate::types::matching; use crate::{ - AsContextMut, Engine, Export, Extern, Func, Global, Memory, Module, SharedMemory, + AsContextMut, Engine, Export, Extern, Func, Global, Memory, Module, SharedMemory, StoreContext, StoreContextMut, Table, TypedFunc, }; use anyhow::{anyhow, bail, Context, Result}; @@ -260,7 +260,7 @@ impl Instance { // Register the module just before instantiation to ensure we keep the module // properly referenced while in use by the store. - store.modules_mut().register_module(module); + let module_id = store.modules_mut().register_module(module); store.fill_func_refs(); // The first thing we do is issue an instance allocation request @@ -298,7 +298,7 @@ impl Instance { // conflicts with the borrow on `store.engine`) but this doesn't // matter in practice since initialization isn't even running any // code here anyway. - let id = store.add_instance(instance_handle.clone()); + let id = store.add_instance(instance_handle.clone(), module_id); // Additionally, before we start doing fallible instantiation, we // do one more step which is to insert an `InstanceData` @@ -363,6 +363,16 @@ impl Instance { Ok(()) } + /// Get this instance's module. + pub fn module<'a, T: 'a>(&'a self, store: impl Into>) -> &'a Module { + self._module(store.into().0) + } + + fn _module<'a>(&'a self, store: &'a StoreOpaque) -> &'a Module { + let InstanceData { id, .. } = store[self.0]; + store.module_for_instance(id).unwrap() + } + /// Returns the list of exported items from this [`Instance`]. /// /// # Panics @@ -536,6 +546,46 @@ impl Instance { pub(crate) fn id(&self, store: &StoreOpaque) -> InstanceId { store[self.0].id } + + /// Get all globals within this instance. + /// + /// Returns both import and defined globals. + /// + /// Returns both exported and non-exported globals. + /// + /// Gives access to the full globals space. + pub(crate) fn all_globals<'a>( + &'a self, + store: &'a mut StoreOpaque, + ) -> impl ExactSizeIterator + 'a { + let data = &store[self.0]; + let instance = store.instance_mut(data.id); + instance + .all_globals() + .collect::>() + .into_iter() + .map(|(i, g)| (i, unsafe { Global::from_wasmtime_global(g, store) })) + } + + /// Get all memories within this instance. + /// + /// Returns both import and defined memories. + /// + /// Returns both exported and non-exported memories. + /// + /// Gives access to the full memories space. + pub(crate) fn all_memories<'a>( + &'a self, + store: &'a mut StoreOpaque, + ) -> impl ExactSizeIterator + 'a { + let data = &store[self.0]; + let instance = store.instance_mut(data.id); + instance + .all_memories() + .collect::>() + .into_iter() + .map(|(i, m)| (i, unsafe { Memory::from_wasmtime_memory(m, store) })) + } } pub(crate) struct OwnedImports { diff --git a/crates/wasmtime/src/memory.rs b/crates/wasmtime/src/memory.rs index c473fa143dd1..d2d27b547856 100644 --- a/crates/wasmtime/src/memory.rs +++ b/crates/wasmtime/src/memory.rs @@ -577,6 +577,15 @@ impl Memory { pub(crate) fn comes_from_same_store(&self, store: &StoreOpaque) -> bool { store.store_data().contains(self.0) } + + /// Get a stable hash key for this memory. + /// + /// Even if the same underlying memory definition is added to the + /// `StoreData` multiple times and becomes multiple `wasmtime::Memory`s, + /// this hash key will be consistent across all of these memories. + pub(crate) fn hash_key(&self, store: &StoreOpaque) -> impl std::hash::Hash + Eq { + store[self.0].definition as usize + } } /// A linear memory. This trait provides an interface for raw memory buffers diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 3e812cfc8543..38a51fcd7ea0 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -26,7 +26,9 @@ use wasmtime_runtime::{ mod registry; -pub use registry::{is_wasm_trap_pc, register_code, unregister_code, ModuleRegistry}; +pub use registry::{ + is_wasm_trap_pc, register_code, unregister_code, ModuleRegistry, RegisteredModuleId, +}; /// A compiled WebAssembly module, ready to be instantiated. /// @@ -130,6 +132,14 @@ struct ModuleInner { offsets: VMOffsets, } +impl std::fmt::Debug for Module { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Module") + .field("name", &self.name()) + .finish_non_exhaustive() + } +} + impl Module { /// Creates a new WebAssembly `Module` from the given in-memory `bytes`. /// @@ -1094,6 +1104,10 @@ impl Module { (loc.start as usize, loc.length as usize) }) } + + pub(crate) fn id(&self) -> CompiledModuleId { + self.inner.module.unique_id() + } } impl ModuleInner { diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs index 46fb18262be9..65de4bf22536 100644 --- a/crates/wasmtime/src/module/registry.rs +++ b/crates/wasmtime/src/module/registry.rs @@ -44,10 +44,33 @@ struct LoadedCode { modules: BTreeMap, } +/// An identifier of a module that has previously been inserted into a +/// `ModuleRegistry`. +#[derive(Clone, Copy)] +pub enum RegisteredModuleId { + /// Index into `ModuleRegistry::modules_without_code`. + WithoutCode(usize), + /// Start address of the module's code so that we can get it again via + /// `ModuleRegistry::lookup_module`. + LoadedCode(usize), +} + impl ModuleRegistry { + /// Get a previously-registered module by id. + pub fn lookup_module_by_id(&self, id: RegisteredModuleId) -> Option<&Module> { + match id { + RegisteredModuleId::WithoutCode(idx) => self.modules_without_code.get(idx), + RegisteredModuleId::LoadedCode(pc) => { + let (module, _) = self.module_and_offset(pc)?; + Some(module) + } + } + } + /// Fetches information about a registered module given a program counter value. - pub fn lookup_module(&self, pc: usize) -> Option<&dyn ModuleInfo> { - self.module(pc).map(|(m, _)| m.module_info()) + pub fn lookup_module_info(&self, pc: usize) -> Option<&dyn ModuleInfo> { + let (module, _) = self.module_and_offset(pc)?; + Some(module.module_info()) } fn code(&self, pc: usize) -> Option<(&LoadedCode, usize)> { @@ -58,7 +81,7 @@ impl ModuleRegistry { Some((code, pc - *start)) } - fn module(&self, pc: usize) -> Option<(&Module, usize)> { + fn module_and_offset(&self, pc: usize) -> Option<(&Module, usize)> { let (code, offset) = self.code(pc)?; Some((code.module(pc)?, offset)) } @@ -72,17 +95,21 @@ impl ModuleRegistry { } /// Registers a new module with the registry. - pub fn register_module(&mut self, module: &Module) { - self.register(module.code_object(), Some(module)) + pub fn register_module(&mut self, module: &Module) -> RegisteredModuleId { + self.register(module.code_object(), Some(module)).unwrap() } #[cfg(feature = "component-model")] pub fn register_component(&mut self, component: &Component) { - self.register(component.code_object(), None) + self.register(component.code_object(), None); } /// Registers a new module with the registry. - fn register(&mut self, code: &Arc, module: Option<&Module>) { + fn register( + &mut self, + code: &Arc, + module: Option<&Module>, + ) -> Option { let text = code.code_memory().text(); // If there's not actually any functions in this module then we may @@ -92,14 +119,18 @@ impl ModuleRegistry { // module in the future. For that reason we continue to register empty // modules and retain them. if text.is_empty() { - self.modules_without_code.extend(module.cloned()); - return; + return module.map(|module| { + let id = RegisteredModuleId::WithoutCode(self.modules_without_code.len()); + self.modules_without_code.push(module.clone()); + id + }); } // The module code range is exclusive for end, so make it inclusive as // it may be a valid PC value let start_addr = text.as_ptr() as usize; let end_addr = start_addr + text.len() - 1; + let id = module.map(|_| RegisteredModuleId::LoadedCode(start_addr)); // If this module is already present in the registry then that means // it's either an overlapping image, for example for two modules @@ -110,7 +141,7 @@ impl ModuleRegistry { if let Some(module) = module { prev.push_module(module); } - return; + return id; } // Assert that this module's code doesn't collide with any other @@ -131,6 +162,7 @@ impl ModuleRegistry { } let prev = self.loaded_code.insert(end_addr, (start_addr, item)); assert!(prev.is_none()); + id } /// Fetches trap information about a program counter in a backtrace. @@ -148,8 +180,8 @@ impl ModuleRegistry { /// boolean indicates whether the engine used to compile this module is /// using environment variables to control debuginfo parsing. pub(crate) fn lookup_frame_info(&self, pc: usize) -> Option<(FrameInfo, &Module)> { - let (module, offset) = self.module(pc)?; - let info = FrameInfo::new(module, offset)?; + let (module, offset) = self.module_and_offset(pc)?; + let info = FrameInfo::new(module.clone(), offset)?; Some((info, module)) } diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index 88d7af66152f..d6f2bc574475 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -78,7 +78,7 @@ use crate::instance::InstanceData; use crate::linker::Definition; -use crate::module::BareModuleInfo; +use crate::module::{BareModuleInfo, RegisteredModuleId}; use crate::trampoline::VMHostGlobalContext; use crate::{module::ModuleRegistry, Engine, Module, Trap, Val, ValRaw}; use crate::{Global, Instance, Memory}; @@ -433,14 +433,24 @@ where /// instance allocator. struct StoreInstance { handle: InstanceHandle, + kind: StoreInstanceKind, +} + +enum StoreInstanceKind { + /// An actual, non-dummy instance. + Real { + /// The id of this instance's module inside our owning store's + /// `ModuleRegistry`. + module_id: RegisteredModuleId, + }, - /// Whether or not this is a dummy instance that is just an implementation - /// detail for something else. For example, host-created memories internally - /// create a dummy instance. + /// This is a dummy instance that is just an implementation detail for + /// something else. For example, host-created memories internally create a + /// dummy instance. /// - /// Regardless of the configured instance allocator for this engine, dummy + /// Regardless of the configured instance allocator for the engine, dummy /// instances always use the on-demand allocator to deallocate the instance. - dummy: bool, + Dummy, } #[derive(Copy, Clone)] @@ -1253,10 +1263,27 @@ impl StoreOpaque { &mut self.host_globals } - pub unsafe fn add_instance(&mut self, handle: InstanceHandle) -> InstanceId { + pub fn module_for_instance(&self, instance: InstanceId) -> Option<&'_ Module> { + match self.instances[instance.0].kind { + StoreInstanceKind::Dummy => None, + StoreInstanceKind::Real { module_id } => { + let module = self + .modules() + .lookup_module_by_id(module_id) + .expect("should always have a registered module for real instances"); + Some(module) + } + } + } + + pub unsafe fn add_instance( + &mut self, + handle: InstanceHandle, + module_id: RegisteredModuleId, + ) -> InstanceId { self.instances.push(StoreInstance { handle: handle.clone(), - dummy: false, + kind: StoreInstanceKind::Real { module_id }, }); InstanceId(self.instances.len() - 1) } @@ -1269,7 +1296,7 @@ impl StoreOpaque { pub unsafe fn add_dummy_instance(&mut self, handle: InstanceHandle) -> InstanceId { self.instances.push(StoreInstance { handle: handle.clone(), - dummy: true, + kind: StoreInstanceKind::Dummy, }); InstanceId(self.instances.len() - 1) } @@ -1290,7 +1317,7 @@ impl StoreOpaque { .enumerate() .filter_map(|(idx, inst)| { let id = InstanceId::from_index(idx); - if inst.dummy { + if let StoreInstanceKind::Dummy = inst.kind { None } else { Some(InstanceData::from_id(id)) @@ -1304,6 +1331,9 @@ impl StoreOpaque { /// Get all memories (host- or Wasm-defined) within this store. pub fn all_memories<'a>(&'a mut self) -> impl Iterator + 'a { + // NB: Host-created memories have dummy instances. Therefore, we can get + // all memories in the store by iterating over all instances (including + // dummy instances) and getting each of their defined memories. let mems = self .instances .iter_mut() @@ -2266,7 +2296,7 @@ impl Drop for StoreOpaque { let allocator = self.engine.allocator(); let ondemand = OnDemandInstanceAllocator::default(); for instance in self.instances.iter_mut() { - if instance.dummy { + if let StoreInstanceKind::Dummy = instance.kind { ondemand.deallocate_module(&mut instance.handle); } else { allocator.deallocate_module(&mut instance.handle); @@ -2291,7 +2321,7 @@ impl Drop for StoreOpaque { impl wasmtime_runtime::ModuleInfoLookup for ModuleRegistry { fn lookup(&self, pc: usize) -> Option<&dyn ModuleInfo> { - self.lookup_module(pc) + self.lookup_module_info(pc) } } diff --git a/crates/wasmtime/src/trap.rs b/crates/wasmtime/src/trap.rs index 1d0437aa1b28..b4460a25b953 100644 --- a/crates/wasmtime/src/trap.rs +++ b/crates/wasmtime/src/trap.rs @@ -373,7 +373,7 @@ impl fmt::Display for WasmBacktrace { } else { needs_newline = true; } - let name = frame.module_name().unwrap_or(""); + let name = frame.module().name().unwrap_or(""); write!(f, " {:>3}: ", i)?; if let Some(offset) = frame.module_offset() { @@ -425,7 +425,7 @@ impl fmt::Display for WasmBacktrace { /// to acquire this `FrameInfo`. For more information see [`WasmBacktrace`]. #[derive(Debug)] pub struct FrameInfo { - module_name: Option, + module: Module, func_index: u32, func_name: Option, func_start: FilePos, @@ -438,12 +438,18 @@ impl FrameInfo { /// /// Returns an object if this `pc` is known to this module, or returns `None` /// if no information can be found. - pub(crate) fn new(module: &Module, text_offset: usize) -> Option { - let module = module.compiled_module(); - let (index, _func_offset) = module.func_by_text_offset(text_offset)?; - let info = module.wasm_func_info(index); - let instr = - wasmtime_environ::lookup_file_pos(module.code_memory().address_map_data(), text_offset); + pub(crate) fn new(module: Module, text_offset: usize) -> Option { + let compiled_module = module.compiled_module(); + let (index, _func_offset) = compiled_module.func_by_text_offset(text_offset)?; + let info = compiled_module.wasm_func_info(index); + let func_start = info.start_srcloc; + let instr = wasmtime_environ::lookup_file_pos( + compiled_module.code_memory().address_map_data(), + text_offset, + ); + let index = compiled_module.module().func_index(index); + let func_index = index.index() as u32; + let func_name = compiled_module.func_name(index).map(|s| s.to_string()); // In debug mode for now assert that we found a mapping for `pc` within // the function, because otherwise something is buggy along the way and @@ -453,7 +459,7 @@ impl FrameInfo { // Note that if the module doesn't even have an address map due to // compilation settings then it's expected that `instr` is `None`. debug_assert!( - instr.is_some() || !module.has_address_map(), + instr.is_some() || !compiled_module.has_address_map(), "failed to find instruction for {:#x}", text_offset ); @@ -468,7 +474,7 @@ impl FrameInfo { // custom section contents. let mut symbols = Vec::new(); - if let Some(s) = &module.symbolize_context().ok().and_then(|c| c) { + if let Some(s) = &compiled_module.symbolize_context().ok().and_then(|c| c) { if let Some(offset) = instr.and_then(|i| i.file_offset()) { let to_lookup = u64::from(offset) - s.code_section_offset(); if let Ok(mut frames) = s.addr2line().find_frames(to_lookup).skip_all_loads() { @@ -492,14 +498,12 @@ impl FrameInfo { } } - let index = module.module().func_index(index); - Some(FrameInfo { - module_name: module.module().name.clone(), - func_index: index.index() as u32, - func_name: module.func_name(index).map(|s| s.to_string()), + module, + func_index, + func_name, instr, - func_start: info.start_srcloc, + func_start, symbols, }) } @@ -512,6 +516,13 @@ impl FrameInfo { self.func_index } + /// Returns the module for this frame. + /// + /// This is the module who's code was being run in this frame. + pub fn module(&self) -> &Module { + &self.module + } + /// Returns the identifer of the module that this frame is for. /// /// Module identifiers are present in the `name` section of a WebAssembly @@ -521,8 +532,9 @@ impl FrameInfo { /// debugging and therefore may be tweaked over time. /// /// This function returns `None` when no name can be found or inferred. + #[deprecated = "Use `frame.module().name()` instead of `frame.module_name()`"] pub fn module_name(&self) -> Option<&str> { - self.module_name.as_deref() + self.module.name() } /// Returns a descriptive name of the function for this frame, if one is diff --git a/src/commands/run.rs b/src/commands/run.rs index 01b04809b10b..93c67c1463b1 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -225,8 +225,8 @@ impl RunCommand { let main = self.load_module(&engine, &self.module_and_args[0])?; // Validate coredump-on-trap argument - if let Some(coredump_path) = &self.common.debug.coredump { - if coredump_path.contains("%") { + if let Some(path) = &self.common.debug.coredump { + if path.contains("%") { bail!("the coredump-on-trap path does not support patterns yet.") } } @@ -529,7 +529,7 @@ impl RunCommand { .wasi_cli_run() .call_run(&mut *store) .context("failed to invoke `run` function") - .map_err(|e| self.handle_coredump(e)); + .map_err(|e| self.handle_core_dump(&mut *store, e)); // Translate the `Result<(),()>` produced by wasm into a feigned // explicit exit here with status 1 if `Err(())` is returned. @@ -583,16 +583,18 @@ impl RunCommand { // Invoke the function and then afterwards print all the results that came // out, if there are any. let mut results = vec![Val::null(); ty.results().len()]; - let invoke_res = func.call(store, &values, &mut results).with_context(|| { - if let Some(name) = &self.invoke { - format!("failed to invoke `{}`", name) - } else { - format!("failed to invoke command default") - } - }); + let invoke_res = func + .call(&mut *store, &values, &mut results) + .with_context(|| { + if let Some(name) = &self.invoke { + format!("failed to invoke `{}`", name) + } else { + format!("failed to invoke command default") + } + }); if let Err(err) = invoke_res { - return Err(self.handle_coredump(err)); + return Err(self.handle_core_dump(&mut *store, err)); } if !results.is_empty() { @@ -617,7 +619,7 @@ impl RunCommand { Ok(()) } - fn handle_coredump(&self, err: Error) -> Error { + fn handle_core_dump(&self, store: &mut Store, err: Error) -> Error { let coredump_path = match &self.common.debug.coredump { Some(path) => path, None => return err, @@ -629,7 +631,7 @@ impl RunCommand { .to_str() .unwrap_or_else(|| "unknown"); - if let Err(coredump_err) = generate_coredump(&err, &source_name, coredump_path) { + if let Err(coredump_err) = write_core_dump(store, &err, &source_name, coredump_path) { eprintln!("warning: coredump failed to generate: {}", coredump_err); err } else { @@ -1037,36 +1039,22 @@ fn ctx_set_listenfd(mut num_fd: usize, builder: &mut WasiCtxBuilder) -> Result Result<()> { - let bt = err - .downcast_ref::() - .ok_or_else(|| anyhow!("no wasm backtrace found to generate coredump with"))?; - - let coredump = wasm_encoder::CoreDumpSection::new(source_name); - let mut stacksection = wasm_encoder::CoreDumpStackSection::new("main"); - for f in bt.frames() { - // We don't have the information at this point to map frames to - // individual instances of a module, so we won't be able to create the - // "frame ∈ instance ∈ module" hierarchy described in the core dump spec - // until we move core dump generation into the runtime. So for now - // instanceidx will be 0 for all frames - let instanceidx = 0; - stacksection.frame( - instanceidx, - f.func_index(), - u32::try_from(f.func_offset().unwrap_or(0)).unwrap(), - // We don't currently have access to locals/stack values - [], - [], - ); - } - let mut module = wasm_encoder::Module::new(); - module.section(&coredump); - module.section(&stacksection); - - let mut f = File::create(coredump_path) - .context(format!("failed to create file at `{}`", coredump_path))?; - f.write_all(module.as_slice()) - .with_context(|| format!("failed to write coredump file at `{}`", coredump_path))?; +fn write_core_dump( + store: &mut Store, + err: &anyhow::Error, + name: &str, + path: &str, +) -> Result<()> { + let core_dump = err + .downcast_ref::() + .expect("should have been configured to capture core dumps"); + + let core_dump = core_dump.serialize(store, name); + + let mut core_dump_file = + File::create(path).context(format!("failed to create file at `{}`", path))?; + core_dump_file + .write_all(&core_dump) + .with_context(|| format!("failed to write core dump file at `{}`", path))?; Ok(()) } diff --git a/tests/all/component_model/import.rs b/tests/all/component_model/import.rs index e77b9a73433b..9f8ae09f0a7c 100644 --- a/tests/all/component_model/import.rs +++ b/tests/all/component_model/import.rs @@ -373,23 +373,23 @@ fn attempt_to_leave_during_malloc() -> Result<()> { assert_eq!(trace.len(), 4); // This was our entry point... - assert_eq!(trace[3].module_name(), Some("m")); + assert_eq!(trace[3].module().name(), Some("m")); assert_eq!(trace[3].func_name(), Some("run")); // ... which called an imported function which ends up being originally // defined by the shim instance. The shim instance then does an indirect // call through a table which goes to the `canon.lower`'d host function - assert_eq!(trace[2].module_name(), Some("host_shim")); + assert_eq!(trace[2].module().name(), Some("host_shim")); assert_eq!(trace[2].func_name(), Some("shim_ret_string")); // ... and the lowered host function will call realloc to allocate space for // the result - assert_eq!(trace[1].module_name(), Some("m")); + assert_eq!(trace[1].module().name(), Some("m")); assert_eq!(trace[1].func_name(), Some("realloc")); // ... but realloc calls the shim instance and tries to exit the // component, triggering a dynamic trap - assert_eq!(trace[0].module_name(), Some("host_shim")); + assert_eq!(trace[0].module().name(), Some("host_shim")); assert_eq!(trace[0].func_name(), Some("shim_thunk")); // In addition to the above trap also ensure that when we enter a wasm diff --git a/tests/all/coredump.rs b/tests/all/coredump.rs index a7ee535f0235..492bce76db64 100644 --- a/tests/all/coredump.rs +++ b/tests/all/coredump.rs @@ -66,6 +66,7 @@ fn coredump_has_stack() -> Result<()> { assert_eq!(cd.frames()[0].func_name().unwrap(), "c"); assert_eq!(cd.frames()[1].func_name().unwrap(), "b"); assert_eq!(cd.frames()[2].func_name().unwrap(), "a"); + let _ = cd.serialize(&mut store, "stack"); Ok(()) } @@ -105,6 +106,7 @@ fn coredump_has_modules_and_instances() -> Result<()> { let cd = e.downcast_ref::().unwrap(); assert_eq!(cd.modules().len(), 2); assert_eq!(cd.instances().len(), 2); + let _ = cd.serialize(&mut store, "modules-and-instances"); Ok(()) } @@ -158,6 +160,7 @@ fn coredump_has_host_globals_and_memory() -> Result<()> { assert_eq!(core_dump.globals().len(), 1); assert_eq!(core_dump.memories().len(), 1); assert_eq!(core_dump.instances().len(), 1); + let _ = core_dump.serialize(&mut store, "host-globals-and-memory"); Ok(()) } @@ -191,6 +194,7 @@ fn coredump_has_defined_globals_and_memory() -> Result<()> { assert_eq!(core_dump.globals().len(), 1); assert_eq!(core_dump.memories().len(), 1); assert_eq!(core_dump.instances().len(), 1); + let _ = core_dump.serialize(&mut store, "defined-globals-and-memory"); Ok(()) } @@ -250,6 +254,7 @@ fn multiple_globals_memories_and_instances() -> Result<()> { assert_eq!(core_dump.globals().len(), 2); assert_eq!(core_dump.memories().len(), 2); assert_eq!(core_dump.instances().len(), 2); + let _ = core_dump.serialize(&mut store, "multi"); Ok(()) } diff --git a/tests/all/traps.rs b/tests/all/traps.rs index 5b7377fa1537..a77443ac209c 100644 --- a/tests/all/traps.rs +++ b/tests/all/traps.rs @@ -130,12 +130,12 @@ fn test_trap_trace() -> Result<()> { let trace = e.downcast_ref::().unwrap().frames(); assert_eq!(trace.len(), 2); - assert_eq!(trace[0].module_name().unwrap(), "hello_mod"); + assert_eq!(trace[0].module().name().unwrap(), "hello_mod"); assert_eq!(trace[0].func_index(), 1); assert_eq!(trace[0].func_name(), Some("hello")); assert_eq!(trace[0].func_offset(), Some(1)); assert_eq!(trace[0].module_offset(), Some(0x26)); - assert_eq!(trace[1].module_name().unwrap(), "hello_mod"); + assert_eq!(trace[1].module().name().unwrap(), "hello_mod"); assert_eq!(trace[1].func_index(), 0); assert_eq!(trace[1].func_name(), None); assert_eq!(trace[1].func_offset(), Some(1)); @@ -254,9 +254,9 @@ fn test_trap_trace_cb() -> Result<()> { let trace = e.downcast_ref::().unwrap().frames(); assert_eq!(trace.len(), 2); - assert_eq!(trace[0].module_name().unwrap(), "hello_mod"); + assert_eq!(trace[0].module().name().unwrap(), "hello_mod"); assert_eq!(trace[0].func_index(), 2); - assert_eq!(trace[1].module_name().unwrap(), "hello_mod"); + assert_eq!(trace[1].module().name().unwrap(), "hello_mod"); assert_eq!(trace[1].func_index(), 1); assert!(format!("{e:?}").contains("cb throw")); @@ -281,7 +281,7 @@ fn test_trap_stack_overflow() -> Result<()> { let trace = e.downcast_ref::().unwrap().frames(); assert!(trace.len() >= 32); for i in 0..trace.len() { - assert_eq!(trace[i].module_name().unwrap(), "rec_mod"); + assert_eq!(trace[i].module().name().unwrap(), "rec_mod"); assert_eq!(trace[i].func_index(), 0); assert_eq!(trace[i].func_name(), Some("run")); }