From ea6a6616ed3e805d0d5ebaa6b79fc91e28116b2b Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Fri, 22 Sep 2023 09:08:46 -0700 Subject: [PATCH] Make `wasmtime::WasmCoreDump` serializable This commit makes it so that the library type for core dumps is serializable into the standard binary format for core dumps. Additionally, this commit makes it so that we use the library type for generating core dumps in the CLI. We previously were using a one-off implementation of core dump generation that only had backtrace information and no instances, modules, globals, or memories included. The library type has all that information, so the core dumps produced by our CLI will both be more featureful and be generated by shared code paths going forward. Along the way, implementing all this required some new helper methods sprinkled throughout `wasmtime` and `wasmtime-runtime`: * `wasmtime::Instance::module`: get the module that a `wasmtime::Instance` is an instance of. This is public, since it seems generally useful. This involved adding a new return value from `ModuleRegistry::register_module` that is an identifier that can be used to recover a reference to the registered module. * `wasmtime::Instance::all_{globals,memories}`: get the full global/memory index space. I made these `pub(crate)` out of caution. I don't think we want to commit to exposing non-exported things in the public API, even if we internally need them for debugging-related features like core dumps. These also needed corresponding methods inside `wasmtime-runtime`. * `wasmtime::{Global,Memory}::hash_key`: this was needed to work around the fact that each time you call `{Global,Memory}::from_wasmtime`, it creates a new entry in the `StoreData` and so you can get duplicates. But we need to key some hash maps on globals and memories when constructing core dumps, so we can't treat the underlying `Stored` as a hash key because it isn't stable across duplicate `StoreData` entries. So we have these new methods. They are only `pub(crate)`, are definitely implementation details, and aren't exposed in the public API. * `wasmtime::FrameInfo::module`: Each frame in a backtrace now keeps a handle to its associated module instead of just the name. This is publicly exposed because it seems generally useful. This means I also deprecated `wasmtime::FrameInfo::module_name` since you can now instead do `frame.module().name()` to get that exact same info. I updated callers inside the repo. --- RELEASES.md | 7 + crates/c-api/src/trap.rs | 3 +- crates/cli-flags/src/lib.rs | 3 + crates/runtime/src/instance.rs | 63 +++++++- crates/wasmtime/src/coredump.rs | 198 ++++++++++++++++++++++++- crates/wasmtime/src/externals.rs | 52 +++++++ crates/wasmtime/src/instance.rs | 56 ++++++- crates/wasmtime/src/memory.rs | 46 ++++++ crates/wasmtime/src/module.rs | 16 +- crates/wasmtime/src/module/registry.rs | 56 +++++-- crates/wasmtime/src/store.rs | 54 +++++-- crates/wasmtime/src/trap.rs | 50 +++---- src/commands/run.rs | 76 ++++------ tests/all/component_model/import.rs | 8 +- tests/all/coredump.rs | 5 + tests/all/traps.rs | 10 +- 16 files changed, 587 insertions(+), 116 deletions(-) diff --git a/RELEASES.md b/RELEASES.md index 6d1ba437dda8..cc89c3f2231c 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -6,8 +6,15 @@ Unreleased. ### Added +* Added the `wasmtime::FrameInfo::module` method, which returns the + `wasmtime::Module` associated with the stack frame. + ### Changed +* The `wasmtime::FrameInfo::module_name` has been removed, however you can now + get identical results by chaining `wasmtime::FrameInfo::module` and + `wasmtime::Module::name`: `my_frame.module().name()`. + -------------------------------------------------------------------------------- ## 13.0.0 diff --git a/crates/c-api/src/trap.rs b/crates/c-api/src/trap.rs index 44b9dfb6043b..f2a4a64b6082 100644 --- a/crates/c-api/src/trap.rs +++ b/crates/c-api/src/trap.rs @@ -167,7 +167,8 @@ pub extern "C" fn wasmtime_frame_module_name<'a>( .module_name .get_or_init(|| { frame.trace.frames()[frame.idx] - .module_name() + .module() + .name() .map(|s| wasm_name_t::from(s.to_string().into_bytes())) }) .as_ref() diff --git a/crates/cli-flags/src/lib.rs b/crates/cli-flags/src/lib.rs index 9df8c1147ab8..6e53576d3ee4 100644 --- a/crates/cli-flags/src/lib.rs +++ b/crates/cli-flags/src/lib.rs @@ -350,6 +350,9 @@ impl CommonOptions { if let Some(enable) = self.debug.debug_info { config.debug_info(enable); } + if self.debug.coredump.is_some() { + config.coredump_on_trap(true); + } if let Some(level) = self.opts.opt_level { config.cranelift_opt_level(level); } diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs index 29c91450501e..7ec1470af249 100644 --- a/crates/runtime/src/instance.rs +++ b/crates/runtime/src/instance.rs @@ -384,6 +384,28 @@ impl Instance { } } + /// Get all globals within this instance. + /// + /// Returns both import and defined globals. + /// + /// Returns both exported and non-exported globals. + /// + /// Gives access to the full globals space. + pub fn all_globals<'a>( + &'a mut self, + ) -> impl ExactSizeIterator + 'a { + let module = self.module().clone(); + module.globals.keys().map(move |idx| { + ( + idx, + ExportGlobal { + definition: self.defined_or_imported_global_ptr(idx), + global: self.module().globals[idx], + }, + ) + }) + } + /// Get the globals defined in this instance (not imported). pub fn defined_globals<'a>( &'a mut self, @@ -1350,14 +1372,43 @@ impl InstanceHandle { self.instance_mut().get_table_with_lazy_init(index, range) } - /// Return the memories defined in this instance (not imported). - pub fn defined_memories<'a>(&'a mut self) -> impl ExactSizeIterator + 'a { - let idxs = (0..self.module().memory_plans.len()) - .skip(self.module().num_imported_memories) + /// Get all memories within this instance. + /// + /// Returns both import and defined memories. + /// + /// Returns both exported and non-exported memories. + /// + /// Gives access to the full memories space. + pub fn all_memories<'a>( + &'a mut self, + ) -> impl ExactSizeIterator + 'a { + let indices = (0..self.module().memory_plans.len()) .map(|i| MemoryIndex::new(i)) .collect::>(); - idxs.into_iter() - .map(|memidx| self.get_exported_memory(memidx)) + indices + .into_iter() + .map(|i| (i, self.get_exported_memory(i))) + } + + /// Return the memories defined in this instance (not imported). + pub fn defined_memories<'a>(&'a mut self) -> impl ExactSizeIterator + 'a { + let num_imported = self.module().num_imported_memories; + self.all_memories() + .skip(num_imported) + .map(|(_i, memory)| memory) + } + + /// Get all globals within this instance. + /// + /// Returns both import and defined globals. + /// + /// Returns both exported and non-exported globals. + /// + /// Gives access to the full globals space. + pub fn all_globals<'a>( + &'a mut self, + ) -> impl ExactSizeIterator + 'a { + self.instance_mut().all_globals() } /// Get the globals defined in this instance (not imported). diff --git a/crates/wasmtime/src/coredump.rs b/crates/wasmtime/src/coredump.rs index ad4d22363eba..3193364eae89 100644 --- a/crates/wasmtime/src/coredump.rs +++ b/crates/wasmtime/src/coredump.rs @@ -1,6 +1,9 @@ -use std::fmt; +use std::{collections::HashMap, fmt}; -use crate::{store::StoreOpaque, FrameInfo, Global, Instance, Memory, Module, WasmBacktrace}; +use crate::{ + store::StoreOpaque, AsContextMut, FrameInfo, Global, Instance, Memory, Module, StoreContextMut, + Val, ValType, WasmBacktrace, +}; /// Representation of a core dump of a WebAssembly module /// @@ -80,6 +83,197 @@ impl WasmCoreDump { pub fn memories(&self) -> &[Memory] { self.memories.as_ref() } + + /// Serialize this core dump into [the standard core dump binary + /// format][spec]. + /// + /// The `name` parameter may be a file path, URL, or arbitrary name for the + /// "main" Wasm service or executable that was running in this store. + /// + /// Once serialized, you can write this core dump to disk, send it over the + /// network, or pass it to other debugging tools that consume Wasm core + /// dumps. + /// + /// [spec]: https://github.com/WebAssembly/tool-conventions/blob/main/Coredump.md + pub fn serialize(&self, mut store: impl AsContextMut, name: &str) -> Vec { + let store = store.as_context_mut(); + self._serialize(store, name) + } + + fn _serialize(&self, mut store: StoreContextMut<'_, T>, name: &str) -> Vec { + let mut core_dump = wasm_encoder::Module::new(); + + core_dump.section(&wasm_encoder::CoreDumpSection::new(name)); + + // A map from each memory to its index in the core dump's memories + // section. + let mut memory_to_idx = HashMap::new(); + + let mut data = wasm_encoder::DataSection::new(); + + { + let mut memories = wasm_encoder::MemorySection::new(); + for mem in self.memories() { + let memory_idx = memories.len(); + memory_to_idx.insert(mem.hash_key(&store.0), memory_idx); + let ty = mem.ty(&store); + memories.memory(wasm_encoder::MemoryType { + minimum: mem.size(&store), + maximum: ty.maximum(), + memory64: ty.is_64(), + shared: ty.is_shared(), + }); + + // Attach the memory data, balancing number of data segments and + // binary size. We don't want to attach the whole memory in one + // big segment, since it likely contains a bunch of large runs + // of zeroes. But we can't encode the data without any potential + // runs of zeroes (i.e. including only non-zero data in our + // segments) because we can run up against the implementation + // limits for number of segments in a Wasm module this way. So + // to balance these conflicting desires, we break the memory up + // into reasonably-sized chunks and then trim runs of zeroes + // from the start and end of each chunk. + const CHUNK_SIZE: u32 = 4096; + for (i, chunk) in mem + .data(&store) + .chunks_exact(CHUNK_SIZE as usize) + .enumerate() + { + if let Some(start) = chunk.iter().position(|byte| *byte != 0) { + let end = chunk.iter().rposition(|byte| *byte != 0).unwrap() + 1; + let offset = (i as u32) * CHUNK_SIZE + (start as u32); + let offset = wasm_encoder::ConstExpr::i32_const(offset as i32); + data.active(memory_idx, &offset, chunk[start..end].iter().copied()); + } + } + } + core_dump.section(&memories); + } + + // A map from each global to its index in the core dump's globals + // section. + let mut global_to_idx = HashMap::new(); + + { + let mut globals = wasm_encoder::GlobalSection::new(); + for g in self.globals() { + global_to_idx.insert(g.hash_key(&store.0), globals.len()); + let ty = g.ty(&store); + let mutable = matches!(ty.mutability(), crate::Mutability::Var); + let val_type = match ty.content() { + ValType::I32 => wasm_encoder::ValType::I32, + ValType::I64 => wasm_encoder::ValType::I64, + ValType::F32 => wasm_encoder::ValType::F32, + ValType::F64 => wasm_encoder::ValType::F64, + ValType::V128 => wasm_encoder::ValType::V128, + ValType::FuncRef => wasm_encoder::ValType::FUNCREF, + ValType::ExternRef => wasm_encoder::ValType::EXTERNREF, + }; + let init = match g.get(&mut store) { + Val::I32(x) => wasm_encoder::ConstExpr::i32_const(x), + Val::I64(x) => wasm_encoder::ConstExpr::i64_const(x), + Val::F32(x) => { + wasm_encoder::ConstExpr::f32_const(unsafe { std::mem::transmute(x) }) + } + Val::F64(x) => { + wasm_encoder::ConstExpr::f64_const(unsafe { std::mem::transmute(x) }) + } + Val::V128(x) => wasm_encoder::ConstExpr::v128_const(x.as_u128() as i128), + Val::FuncRef(_) => { + wasm_encoder::ConstExpr::ref_null(wasm_encoder::HeapType::Func) + } + Val::ExternRef(_) => { + wasm_encoder::ConstExpr::ref_null(wasm_encoder::HeapType::Extern) + } + }; + globals.global(wasm_encoder::GlobalType { val_type, mutable }, &init); + } + core_dump.section(&globals); + } + + core_dump.section(&data); + drop(data); + + // A map from module id to its index within the core dump's modules + // section. + let mut module_to_index = HashMap::new(); + + { + let mut modules = wasm_encoder::CoreDumpModulesSection::new(); + for module in self.modules() { + module_to_index.insert(module.id(), modules.len()); + match module.name() { + Some(name) => modules.module(name), + None => modules.module(&format!("", modules.len())), + }; + } + core_dump.section(&modules); + } + + // TODO: We can't currently recover instances from stack frames. We can + // recover module via the frame's PC, but if there are multiple + // instances of the same module, we don't know which instance the frame + // is associated with. Therefore, we do a best effort job: remember the + // last instance of each module and always choose that one. We record + // that information here. + let mut module_to_instance = HashMap::new(); + + { + let mut instances = wasm_encoder::CoreDumpInstancesSection::new(); + for instance in self.instances() { + let module = instance.module(&store); + module_to_instance.insert(module.id(), instances.len()); + + let module_index = module_to_index[&module.id()]; + + let memories = instance + .all_memories(&mut store.0) + .collect::>() + .into_iter() + .map(|(_i, memory)| memory_to_idx[&memory.hash_key(&store.0)]) + .collect::>(); + + let globals = instance + .all_globals(&mut store.0) + .collect::>() + .into_iter() + .map(|(_i, global)| global_to_idx[&global.hash_key(&store.0)]) + .collect::>(); + + instances.instance(module_index, memories, globals); + } + core_dump.section(&instances); + } + + { + let thread_name = "main"; + let mut stack = wasm_encoder::CoreDumpStackSection::new(thread_name); + for frame in self.frames() { + // This isn't necessarily the right instance if there are + // multiple instances of the same module. See comment above + // `module_to_instance` for details. + let instance = module_to_instance[&frame.module().id()]; + + let func = frame.func_index(); + + let offset = frame + .func_offset() + .and_then(|o| u32::try_from(o).ok()) + .unwrap_or(0); + + // We can't currently recover locals and the operand stack. We + // should eventually be able to do that with Winch though. + let locals = []; + let operand_stack = []; + + stack.frame(instance, func, offset, locals, operand_stack); + } + core_dump.section(&stack); + } + + core_dump.finish() + } } impl fmt::Display for WasmCoreDump { diff --git a/crates/wasmtime/src/externals.rs b/crates/wasmtime/src/externals.rs index a43b90338a91..2f90384ed77b 100644 --- a/crates/wasmtime/src/externals.rs +++ b/crates/wasmtime/src/externals.rs @@ -349,6 +349,58 @@ impl Global { from: store[self.0].definition, } } + + /// Get a stable hash key for this global. + /// + /// Even if the same underlying global definition is added to the + /// `StoreData` multiple times and becomes multiple `wasmtime::Global`s, + /// this hash key will be consistent across all of these globals. + pub(crate) fn hash_key(&self, store: &StoreOpaque) -> impl std::hash::Hash + Eq { + store[self.0].definition as usize + } +} + +#[cfg(test)] +mod global_tests { + use super::*; + use crate::{Instance, Module, Store}; + + #[test] + fn hash_key_is_stable_across_duplicate_store_data_entries() -> Result<()> { + let mut store = Store::<()>::default(); + let module = Module::new( + store.engine(), + r#" + (module + (global (export "g") (mut i32) (i32.const 0)) + ) + "#, + )?; + let instance = Instance::new(&mut store, &module, &[])?; + + // Each time we `get_global`, we call `Global::from_wasmtime` which adds + // a new entry to `StoreData`, so `g1` and `g2` will have different + // indices into `StoreData`. + let g1 = instance.get_global(&mut store, "g").unwrap(); + let g2 = instance.get_global(&mut store, "g").unwrap(); + + // That said, they really point to the same global. + assert_eq!(g1.get(&mut store).unwrap_i32(), 0); + assert_eq!(g2.get(&mut store).unwrap_i32(), 0); + g1.set(&mut store, Val::I32(42))?; + assert_eq!(g1.get(&mut store).unwrap_i32(), 42); + assert_eq!(g2.get(&mut store).unwrap_i32(), 42); + + // And therefore their hash keys are the same. + assert!(g1.hash_key(&store.as_context().0) == g2.hash_key(&store.as_context().0)); + + // But the hash keys are different from different globals. + let instance2 = Instance::new(&mut store, &module, &[])?; + let g3 = instance2.get_global(&mut store, "g").unwrap(); + assert!(g1.hash_key(&store.as_context().0) != g3.hash_key(&store.as_context().0)); + + Ok(()) + } } /// A WebAssembly `table`, or an array of values. diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs index 95a0d4d3c0cd..0adb96c8d2ed 100644 --- a/crates/wasmtime/src/instance.rs +++ b/crates/wasmtime/src/instance.rs @@ -2,7 +2,7 @@ use crate::linker::{Definition, DefinitionType}; use crate::store::{InstanceId, StoreOpaque, Stored}; use crate::types::matching; use crate::{ - AsContextMut, Engine, Export, Extern, Func, Global, Memory, Module, SharedMemory, + AsContextMut, Engine, Export, Extern, Func, Global, Memory, Module, SharedMemory, StoreContext, StoreContextMut, Table, TypedFunc, }; use anyhow::{anyhow, bail, Context, Result}; @@ -260,7 +260,7 @@ impl Instance { // Register the module just before instantiation to ensure we keep the module // properly referenced while in use by the store. - store.modules_mut().register_module(module); + let module_id = store.modules_mut().register_module(module); store.fill_func_refs(); // The first thing we do is issue an instance allocation request @@ -298,7 +298,7 @@ impl Instance { // conflicts with the borrow on `store.engine`) but this doesn't // matter in practice since initialization isn't even running any // code here anyway. - let id = store.add_instance(instance_handle.clone()); + let id = store.add_instance(instance_handle.clone(), module_id); // Additionally, before we start doing fallible instantiation, we // do one more step which is to insert an `InstanceData` @@ -363,6 +363,16 @@ impl Instance { Ok(()) } + /// Get this instance's module. + pub fn module<'a, T: 'a>(&self, store: impl Into>) -> &'a Module { + self._module(store.into().0) + } + + fn _module<'a>(&self, store: &'a StoreOpaque) -> &'a Module { + let InstanceData { id, .. } = store[self.0]; + store.module_for_instance(id).unwrap() + } + /// Returns the list of exported items from this [`Instance`]. /// /// # Panics @@ -536,6 +546,46 @@ impl Instance { pub(crate) fn id(&self, store: &StoreOpaque) -> InstanceId { store[self.0].id } + + /// Get all globals within this instance. + /// + /// Returns both import and defined globals. + /// + /// Returns both exported and non-exported globals. + /// + /// Gives access to the full globals space. + pub(crate) fn all_globals<'a>( + &'a self, + store: &'a mut StoreOpaque, + ) -> impl ExactSizeIterator + 'a { + let data = &store[self.0]; + let instance = store.instance_mut(data.id); + instance + .all_globals() + .collect::>() + .into_iter() + .map(|(i, g)| (i, unsafe { Global::from_wasmtime_global(g, store) })) + } + + /// Get all memories within this instance. + /// + /// Returns both import and defined memories. + /// + /// Returns both exported and non-exported memories. + /// + /// Gives access to the full memories space. + pub(crate) fn all_memories<'a>( + &'a self, + store: &'a mut StoreOpaque, + ) -> impl ExactSizeIterator + 'a { + let data = &store[self.0]; + let instance = store.instance_mut(data.id); + instance + .all_memories() + .collect::>() + .into_iter() + .map(|(i, m)| (i, unsafe { Memory::from_wasmtime_memory(m, store) })) + } } pub(crate) struct OwnedImports { diff --git a/crates/wasmtime/src/memory.rs b/crates/wasmtime/src/memory.rs index c473fa143dd1..94cb354d4246 100644 --- a/crates/wasmtime/src/memory.rs +++ b/crates/wasmtime/src/memory.rs @@ -577,6 +577,15 @@ impl Memory { pub(crate) fn comes_from_same_store(&self, store: &StoreOpaque) -> bool { store.store_data().contains(self.0) } + + /// Get a stable hash key for this memory. + /// + /// Even if the same underlying memory definition is added to the + /// `StoreData` multiple times and becomes multiple `wasmtime::Memory`s, + /// this hash key will be consistent across all of these memories. + pub(crate) fn hash_key(&self, store: &StoreOpaque) -> impl std::hash::Hash + Eq { + store[self.0].definition as usize + } } /// A linear memory. This trait provides an interface for raw memory buffers @@ -947,4 +956,41 @@ mod tests { other => panic!("unexpected style {:?}", other), } } + + #[test] + fn hash_key_is_stable_across_duplicate_store_data_entries() -> Result<()> { + let mut store = Store::<()>::default(); + let module = Module::new( + store.engine(), + r#" + (module + (memory (export "m") 1 1) + ) + "#, + )?; + let instance = Instance::new(&mut store, &module, &[])?; + + // Each time we `get_memory`, we call `Memory::from_wasmtime` which adds + // a new entry to `StoreData`, so `g1` and `g2` will have different + // indices into `StoreData`. + let m1 = instance.get_memory(&mut store, "m").unwrap(); + let m2 = instance.get_memory(&mut store, "m").unwrap(); + + // That said, they really point to the same memory. + assert_eq!(m1.data(&store)[0], 0); + assert_eq!(m2.data(&store)[0], 0); + m1.data_mut(&mut store)[0] = 42; + assert_eq!(m1.data(&mut store)[0], 42); + assert_eq!(m2.data(&mut store)[0], 42); + + // And therefore their hash keys are the same. + assert!(m1.hash_key(&store.as_context().0) == m2.hash_key(&store.as_context().0)); + + // But the hash keys are different from different memories. + let instance2 = Instance::new(&mut store, &module, &[])?; + let m3 = instance2.get_memory(&mut store, "m").unwrap(); + assert!(m1.hash_key(&store.as_context().0) != m3.hash_key(&store.as_context().0)); + + Ok(()) + } } diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 3e812cfc8543..38a51fcd7ea0 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -26,7 +26,9 @@ use wasmtime_runtime::{ mod registry; -pub use registry::{is_wasm_trap_pc, register_code, unregister_code, ModuleRegistry}; +pub use registry::{ + is_wasm_trap_pc, register_code, unregister_code, ModuleRegistry, RegisteredModuleId, +}; /// A compiled WebAssembly module, ready to be instantiated. /// @@ -130,6 +132,14 @@ struct ModuleInner { offsets: VMOffsets, } +impl std::fmt::Debug for Module { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Module") + .field("name", &self.name()) + .finish_non_exhaustive() + } +} + impl Module { /// Creates a new WebAssembly `Module` from the given in-memory `bytes`. /// @@ -1094,6 +1104,10 @@ impl Module { (loc.start as usize, loc.length as usize) }) } + + pub(crate) fn id(&self) -> CompiledModuleId { + self.inner.module.unique_id() + } } impl ModuleInner { diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs index 46fb18262be9..65de4bf22536 100644 --- a/crates/wasmtime/src/module/registry.rs +++ b/crates/wasmtime/src/module/registry.rs @@ -44,10 +44,33 @@ struct LoadedCode { modules: BTreeMap, } +/// An identifier of a module that has previously been inserted into a +/// `ModuleRegistry`. +#[derive(Clone, Copy)] +pub enum RegisteredModuleId { + /// Index into `ModuleRegistry::modules_without_code`. + WithoutCode(usize), + /// Start address of the module's code so that we can get it again via + /// `ModuleRegistry::lookup_module`. + LoadedCode(usize), +} + impl ModuleRegistry { + /// Get a previously-registered module by id. + pub fn lookup_module_by_id(&self, id: RegisteredModuleId) -> Option<&Module> { + match id { + RegisteredModuleId::WithoutCode(idx) => self.modules_without_code.get(idx), + RegisteredModuleId::LoadedCode(pc) => { + let (module, _) = self.module_and_offset(pc)?; + Some(module) + } + } + } + /// Fetches information about a registered module given a program counter value. - pub fn lookup_module(&self, pc: usize) -> Option<&dyn ModuleInfo> { - self.module(pc).map(|(m, _)| m.module_info()) + pub fn lookup_module_info(&self, pc: usize) -> Option<&dyn ModuleInfo> { + let (module, _) = self.module_and_offset(pc)?; + Some(module.module_info()) } fn code(&self, pc: usize) -> Option<(&LoadedCode, usize)> { @@ -58,7 +81,7 @@ impl ModuleRegistry { Some((code, pc - *start)) } - fn module(&self, pc: usize) -> Option<(&Module, usize)> { + fn module_and_offset(&self, pc: usize) -> Option<(&Module, usize)> { let (code, offset) = self.code(pc)?; Some((code.module(pc)?, offset)) } @@ -72,17 +95,21 @@ impl ModuleRegistry { } /// Registers a new module with the registry. - pub fn register_module(&mut self, module: &Module) { - self.register(module.code_object(), Some(module)) + pub fn register_module(&mut self, module: &Module) -> RegisteredModuleId { + self.register(module.code_object(), Some(module)).unwrap() } #[cfg(feature = "component-model")] pub fn register_component(&mut self, component: &Component) { - self.register(component.code_object(), None) + self.register(component.code_object(), None); } /// Registers a new module with the registry. - fn register(&mut self, code: &Arc, module: Option<&Module>) { + fn register( + &mut self, + code: &Arc, + module: Option<&Module>, + ) -> Option { let text = code.code_memory().text(); // If there's not actually any functions in this module then we may @@ -92,14 +119,18 @@ impl ModuleRegistry { // module in the future. For that reason we continue to register empty // modules and retain them. if text.is_empty() { - self.modules_without_code.extend(module.cloned()); - return; + return module.map(|module| { + let id = RegisteredModuleId::WithoutCode(self.modules_without_code.len()); + self.modules_without_code.push(module.clone()); + id + }); } // The module code range is exclusive for end, so make it inclusive as // it may be a valid PC value let start_addr = text.as_ptr() as usize; let end_addr = start_addr + text.len() - 1; + let id = module.map(|_| RegisteredModuleId::LoadedCode(start_addr)); // If this module is already present in the registry then that means // it's either an overlapping image, for example for two modules @@ -110,7 +141,7 @@ impl ModuleRegistry { if let Some(module) = module { prev.push_module(module); } - return; + return id; } // Assert that this module's code doesn't collide with any other @@ -131,6 +162,7 @@ impl ModuleRegistry { } let prev = self.loaded_code.insert(end_addr, (start_addr, item)); assert!(prev.is_none()); + id } /// Fetches trap information about a program counter in a backtrace. @@ -148,8 +180,8 @@ impl ModuleRegistry { /// boolean indicates whether the engine used to compile this module is /// using environment variables to control debuginfo parsing. pub(crate) fn lookup_frame_info(&self, pc: usize) -> Option<(FrameInfo, &Module)> { - let (module, offset) = self.module(pc)?; - let info = FrameInfo::new(module, offset)?; + let (module, offset) = self.module_and_offset(pc)?; + let info = FrameInfo::new(module.clone(), offset)?; Some((info, module)) } diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index 88d7af66152f..d6f2bc574475 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -78,7 +78,7 @@ use crate::instance::InstanceData; use crate::linker::Definition; -use crate::module::BareModuleInfo; +use crate::module::{BareModuleInfo, RegisteredModuleId}; use crate::trampoline::VMHostGlobalContext; use crate::{module::ModuleRegistry, Engine, Module, Trap, Val, ValRaw}; use crate::{Global, Instance, Memory}; @@ -433,14 +433,24 @@ where /// instance allocator. struct StoreInstance { handle: InstanceHandle, + kind: StoreInstanceKind, +} + +enum StoreInstanceKind { + /// An actual, non-dummy instance. + Real { + /// The id of this instance's module inside our owning store's + /// `ModuleRegistry`. + module_id: RegisteredModuleId, + }, - /// Whether or not this is a dummy instance that is just an implementation - /// detail for something else. For example, host-created memories internally - /// create a dummy instance. + /// This is a dummy instance that is just an implementation detail for + /// something else. For example, host-created memories internally create a + /// dummy instance. /// - /// Regardless of the configured instance allocator for this engine, dummy + /// Regardless of the configured instance allocator for the engine, dummy /// instances always use the on-demand allocator to deallocate the instance. - dummy: bool, + Dummy, } #[derive(Copy, Clone)] @@ -1253,10 +1263,27 @@ impl StoreOpaque { &mut self.host_globals } - pub unsafe fn add_instance(&mut self, handle: InstanceHandle) -> InstanceId { + pub fn module_for_instance(&self, instance: InstanceId) -> Option<&'_ Module> { + match self.instances[instance.0].kind { + StoreInstanceKind::Dummy => None, + StoreInstanceKind::Real { module_id } => { + let module = self + .modules() + .lookup_module_by_id(module_id) + .expect("should always have a registered module for real instances"); + Some(module) + } + } + } + + pub unsafe fn add_instance( + &mut self, + handle: InstanceHandle, + module_id: RegisteredModuleId, + ) -> InstanceId { self.instances.push(StoreInstance { handle: handle.clone(), - dummy: false, + kind: StoreInstanceKind::Real { module_id }, }); InstanceId(self.instances.len() - 1) } @@ -1269,7 +1296,7 @@ impl StoreOpaque { pub unsafe fn add_dummy_instance(&mut self, handle: InstanceHandle) -> InstanceId { self.instances.push(StoreInstance { handle: handle.clone(), - dummy: true, + kind: StoreInstanceKind::Dummy, }); InstanceId(self.instances.len() - 1) } @@ -1290,7 +1317,7 @@ impl StoreOpaque { .enumerate() .filter_map(|(idx, inst)| { let id = InstanceId::from_index(idx); - if inst.dummy { + if let StoreInstanceKind::Dummy = inst.kind { None } else { Some(InstanceData::from_id(id)) @@ -1304,6 +1331,9 @@ impl StoreOpaque { /// Get all memories (host- or Wasm-defined) within this store. pub fn all_memories<'a>(&'a mut self) -> impl Iterator + 'a { + // NB: Host-created memories have dummy instances. Therefore, we can get + // all memories in the store by iterating over all instances (including + // dummy instances) and getting each of their defined memories. let mems = self .instances .iter_mut() @@ -2266,7 +2296,7 @@ impl Drop for StoreOpaque { let allocator = self.engine.allocator(); let ondemand = OnDemandInstanceAllocator::default(); for instance in self.instances.iter_mut() { - if instance.dummy { + if let StoreInstanceKind::Dummy = instance.kind { ondemand.deallocate_module(&mut instance.handle); } else { allocator.deallocate_module(&mut instance.handle); @@ -2291,7 +2321,7 @@ impl Drop for StoreOpaque { impl wasmtime_runtime::ModuleInfoLookup for ModuleRegistry { fn lookup(&self, pc: usize) -> Option<&dyn ModuleInfo> { - self.lookup_module(pc) + self.lookup_module_info(pc) } } diff --git a/crates/wasmtime/src/trap.rs b/crates/wasmtime/src/trap.rs index 1d0437aa1b28..3f8b2b0f7c49 100644 --- a/crates/wasmtime/src/trap.rs +++ b/crates/wasmtime/src/trap.rs @@ -373,7 +373,7 @@ impl fmt::Display for WasmBacktrace { } else { needs_newline = true; } - let name = frame.module_name().unwrap_or(""); + let name = frame.module().name().unwrap_or(""); write!(f, " {:>3}: ", i)?; if let Some(offset) = frame.module_offset() { @@ -425,7 +425,7 @@ impl fmt::Display for WasmBacktrace { /// to acquire this `FrameInfo`. For more information see [`WasmBacktrace`]. #[derive(Debug)] pub struct FrameInfo { - module_name: Option, + module: Module, func_index: u32, func_name: Option, func_start: FilePos, @@ -438,12 +438,18 @@ impl FrameInfo { /// /// Returns an object if this `pc` is known to this module, or returns `None` /// if no information can be found. - pub(crate) fn new(module: &Module, text_offset: usize) -> Option { - let module = module.compiled_module(); - let (index, _func_offset) = module.func_by_text_offset(text_offset)?; - let info = module.wasm_func_info(index); - let instr = - wasmtime_environ::lookup_file_pos(module.code_memory().address_map_data(), text_offset); + pub(crate) fn new(module: Module, text_offset: usize) -> Option { + let compiled_module = module.compiled_module(); + let (index, _func_offset) = compiled_module.func_by_text_offset(text_offset)?; + let info = compiled_module.wasm_func_info(index); + let func_start = info.start_srcloc; + let instr = wasmtime_environ::lookup_file_pos( + compiled_module.code_memory().address_map_data(), + text_offset, + ); + let index = compiled_module.module().func_index(index); + let func_index = index.index() as u32; + let func_name = compiled_module.func_name(index).map(|s| s.to_string()); // In debug mode for now assert that we found a mapping for `pc` within // the function, because otherwise something is buggy along the way and @@ -453,7 +459,7 @@ impl FrameInfo { // Note that if the module doesn't even have an address map due to // compilation settings then it's expected that `instr` is `None`. debug_assert!( - instr.is_some() || !module.has_address_map(), + instr.is_some() || !compiled_module.has_address_map(), "failed to find instruction for {:#x}", text_offset ); @@ -468,7 +474,7 @@ impl FrameInfo { // custom section contents. let mut symbols = Vec::new(); - if let Some(s) = &module.symbolize_context().ok().and_then(|c| c) { + if let Some(s) = &compiled_module.symbolize_context().ok().and_then(|c| c) { if let Some(offset) = instr.and_then(|i| i.file_offset()) { let to_lookup = u64::from(offset) - s.code_section_offset(); if let Ok(mut frames) = s.addr2line().find_frames(to_lookup).skip_all_loads() { @@ -492,14 +498,12 @@ impl FrameInfo { } } - let index = module.module().func_index(index); - Some(FrameInfo { - module_name: module.module().name.clone(), - func_index: index.index() as u32, - func_name: module.func_name(index).map(|s| s.to_string()), + module, + func_index, + func_name, instr, - func_start: info.start_srcloc, + func_start, symbols, }) } @@ -512,17 +516,11 @@ impl FrameInfo { self.func_index } - /// Returns the identifer of the module that this frame is for. - /// - /// Module identifiers are present in the `name` section of a WebAssembly - /// binary, but this may not return the exact item in the `name` section. - /// Module names can be overwritten at construction time or perhaps inferred - /// from file names. The primary purpose of this function is to assist in - /// debugging and therefore may be tweaked over time. + /// Returns the module for this frame. /// - /// This function returns `None` when no name can be found or inferred. - pub fn module_name(&self) -> Option<&str> { - self.module_name.as_deref() + /// This is the module who's code was being run in this frame. + pub fn module(&self) -> &Module { + &self.module } /// Returns a descriptive name of the function for this frame, if one is diff --git a/src/commands/run.rs b/src/commands/run.rs index 01b04809b10b..93c67c1463b1 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -225,8 +225,8 @@ impl RunCommand { let main = self.load_module(&engine, &self.module_and_args[0])?; // Validate coredump-on-trap argument - if let Some(coredump_path) = &self.common.debug.coredump { - if coredump_path.contains("%") { + if let Some(path) = &self.common.debug.coredump { + if path.contains("%") { bail!("the coredump-on-trap path does not support patterns yet.") } } @@ -529,7 +529,7 @@ impl RunCommand { .wasi_cli_run() .call_run(&mut *store) .context("failed to invoke `run` function") - .map_err(|e| self.handle_coredump(e)); + .map_err(|e| self.handle_core_dump(&mut *store, e)); // Translate the `Result<(),()>` produced by wasm into a feigned // explicit exit here with status 1 if `Err(())` is returned. @@ -583,16 +583,18 @@ impl RunCommand { // Invoke the function and then afterwards print all the results that came // out, if there are any. let mut results = vec![Val::null(); ty.results().len()]; - let invoke_res = func.call(store, &values, &mut results).with_context(|| { - if let Some(name) = &self.invoke { - format!("failed to invoke `{}`", name) - } else { - format!("failed to invoke command default") - } - }); + let invoke_res = func + .call(&mut *store, &values, &mut results) + .with_context(|| { + if let Some(name) = &self.invoke { + format!("failed to invoke `{}`", name) + } else { + format!("failed to invoke command default") + } + }); if let Err(err) = invoke_res { - return Err(self.handle_coredump(err)); + return Err(self.handle_core_dump(&mut *store, err)); } if !results.is_empty() { @@ -617,7 +619,7 @@ impl RunCommand { Ok(()) } - fn handle_coredump(&self, err: Error) -> Error { + fn handle_core_dump(&self, store: &mut Store, err: Error) -> Error { let coredump_path = match &self.common.debug.coredump { Some(path) => path, None => return err, @@ -629,7 +631,7 @@ impl RunCommand { .to_str() .unwrap_or_else(|| "unknown"); - if let Err(coredump_err) = generate_coredump(&err, &source_name, coredump_path) { + if let Err(coredump_err) = write_core_dump(store, &err, &source_name, coredump_path) { eprintln!("warning: coredump failed to generate: {}", coredump_err); err } else { @@ -1037,36 +1039,22 @@ fn ctx_set_listenfd(mut num_fd: usize, builder: &mut WasiCtxBuilder) -> Result Result<()> { - let bt = err - .downcast_ref::() - .ok_or_else(|| anyhow!("no wasm backtrace found to generate coredump with"))?; - - let coredump = wasm_encoder::CoreDumpSection::new(source_name); - let mut stacksection = wasm_encoder::CoreDumpStackSection::new("main"); - for f in bt.frames() { - // We don't have the information at this point to map frames to - // individual instances of a module, so we won't be able to create the - // "frame ∈ instance ∈ module" hierarchy described in the core dump spec - // until we move core dump generation into the runtime. So for now - // instanceidx will be 0 for all frames - let instanceidx = 0; - stacksection.frame( - instanceidx, - f.func_index(), - u32::try_from(f.func_offset().unwrap_or(0)).unwrap(), - // We don't currently have access to locals/stack values - [], - [], - ); - } - let mut module = wasm_encoder::Module::new(); - module.section(&coredump); - module.section(&stacksection); - - let mut f = File::create(coredump_path) - .context(format!("failed to create file at `{}`", coredump_path))?; - f.write_all(module.as_slice()) - .with_context(|| format!("failed to write coredump file at `{}`", coredump_path))?; +fn write_core_dump( + store: &mut Store, + err: &anyhow::Error, + name: &str, + path: &str, +) -> Result<()> { + let core_dump = err + .downcast_ref::() + .expect("should have been configured to capture core dumps"); + + let core_dump = core_dump.serialize(store, name); + + let mut core_dump_file = + File::create(path).context(format!("failed to create file at `{}`", path))?; + core_dump_file + .write_all(&core_dump) + .with_context(|| format!("failed to write core dump file at `{}`", path))?; Ok(()) } diff --git a/tests/all/component_model/import.rs b/tests/all/component_model/import.rs index e77b9a73433b..9f8ae09f0a7c 100644 --- a/tests/all/component_model/import.rs +++ b/tests/all/component_model/import.rs @@ -373,23 +373,23 @@ fn attempt_to_leave_during_malloc() -> Result<()> { assert_eq!(trace.len(), 4); // This was our entry point... - assert_eq!(trace[3].module_name(), Some("m")); + assert_eq!(trace[3].module().name(), Some("m")); assert_eq!(trace[3].func_name(), Some("run")); // ... which called an imported function which ends up being originally // defined by the shim instance. The shim instance then does an indirect // call through a table which goes to the `canon.lower`'d host function - assert_eq!(trace[2].module_name(), Some("host_shim")); + assert_eq!(trace[2].module().name(), Some("host_shim")); assert_eq!(trace[2].func_name(), Some("shim_ret_string")); // ... and the lowered host function will call realloc to allocate space for // the result - assert_eq!(trace[1].module_name(), Some("m")); + assert_eq!(trace[1].module().name(), Some("m")); assert_eq!(trace[1].func_name(), Some("realloc")); // ... but realloc calls the shim instance and tries to exit the // component, triggering a dynamic trap - assert_eq!(trace[0].module_name(), Some("host_shim")); + assert_eq!(trace[0].module().name(), Some("host_shim")); assert_eq!(trace[0].func_name(), Some("shim_thunk")); // In addition to the above trap also ensure that when we enter a wasm diff --git a/tests/all/coredump.rs b/tests/all/coredump.rs index a7ee535f0235..492bce76db64 100644 --- a/tests/all/coredump.rs +++ b/tests/all/coredump.rs @@ -66,6 +66,7 @@ fn coredump_has_stack() -> Result<()> { assert_eq!(cd.frames()[0].func_name().unwrap(), "c"); assert_eq!(cd.frames()[1].func_name().unwrap(), "b"); assert_eq!(cd.frames()[2].func_name().unwrap(), "a"); + let _ = cd.serialize(&mut store, "stack"); Ok(()) } @@ -105,6 +106,7 @@ fn coredump_has_modules_and_instances() -> Result<()> { let cd = e.downcast_ref::().unwrap(); assert_eq!(cd.modules().len(), 2); assert_eq!(cd.instances().len(), 2); + let _ = cd.serialize(&mut store, "modules-and-instances"); Ok(()) } @@ -158,6 +160,7 @@ fn coredump_has_host_globals_and_memory() -> Result<()> { assert_eq!(core_dump.globals().len(), 1); assert_eq!(core_dump.memories().len(), 1); assert_eq!(core_dump.instances().len(), 1); + let _ = core_dump.serialize(&mut store, "host-globals-and-memory"); Ok(()) } @@ -191,6 +194,7 @@ fn coredump_has_defined_globals_and_memory() -> Result<()> { assert_eq!(core_dump.globals().len(), 1); assert_eq!(core_dump.memories().len(), 1); assert_eq!(core_dump.instances().len(), 1); + let _ = core_dump.serialize(&mut store, "defined-globals-and-memory"); Ok(()) } @@ -250,6 +254,7 @@ fn multiple_globals_memories_and_instances() -> Result<()> { assert_eq!(core_dump.globals().len(), 2); assert_eq!(core_dump.memories().len(), 2); assert_eq!(core_dump.instances().len(), 2); + let _ = core_dump.serialize(&mut store, "multi"); Ok(()) } diff --git a/tests/all/traps.rs b/tests/all/traps.rs index 5b7377fa1537..a77443ac209c 100644 --- a/tests/all/traps.rs +++ b/tests/all/traps.rs @@ -130,12 +130,12 @@ fn test_trap_trace() -> Result<()> { let trace = e.downcast_ref::().unwrap().frames(); assert_eq!(trace.len(), 2); - assert_eq!(trace[0].module_name().unwrap(), "hello_mod"); + assert_eq!(trace[0].module().name().unwrap(), "hello_mod"); assert_eq!(trace[0].func_index(), 1); assert_eq!(trace[0].func_name(), Some("hello")); assert_eq!(trace[0].func_offset(), Some(1)); assert_eq!(trace[0].module_offset(), Some(0x26)); - assert_eq!(trace[1].module_name().unwrap(), "hello_mod"); + assert_eq!(trace[1].module().name().unwrap(), "hello_mod"); assert_eq!(trace[1].func_index(), 0); assert_eq!(trace[1].func_name(), None); assert_eq!(trace[1].func_offset(), Some(1)); @@ -254,9 +254,9 @@ fn test_trap_trace_cb() -> Result<()> { let trace = e.downcast_ref::().unwrap().frames(); assert_eq!(trace.len(), 2); - assert_eq!(trace[0].module_name().unwrap(), "hello_mod"); + assert_eq!(trace[0].module().name().unwrap(), "hello_mod"); assert_eq!(trace[0].func_index(), 2); - assert_eq!(trace[1].module_name().unwrap(), "hello_mod"); + assert_eq!(trace[1].module().name().unwrap(), "hello_mod"); assert_eq!(trace[1].func_index(), 1); assert!(format!("{e:?}").contains("cb throw")); @@ -281,7 +281,7 @@ fn test_trap_stack_overflow() -> Result<()> { let trace = e.downcast_ref::().unwrap().frames(); assert!(trace.len() >= 32); for i in 0..trace.len() { - assert_eq!(trace[i].module_name().unwrap(), "rec_mod"); + assert_eq!(trace[i].module().name().unwrap(), "rec_mod"); assert_eq!(trace[i].func_index(), 0); assert_eq!(trace[i].func_name(), Some("run")); }