From 8fad60a46ca322dc36dd5fc9e30cb24e0336050e Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 22 Feb 2022 10:40:43 -0800 Subject: [PATCH] memfd: make "dense image" heuristic limit configurable. (#3831) In #3820 we see an issue with the new heuristics that control use of memfd: it's entirely possible for a reasonable Wasm module produced by a snapshotting system to have a relatively sparse heap (less than 50% filled). A system that avoids memfd because of this would have an undesirable performance reduction on such modules. Ultimately we should try to implement a hybrid scheme where we support outlier/leftover initializers, but for now this PR makes the "always allow dense" limit configurable. This way, embedders that want to ensure that memfd is used can do so, if they have other knowledge about the maximum heap size allowed in their system. (Partially addresses #3820 but let's leave it open to track the hybrid idea) --- crates/environ/src/module.rs | 35 ++++++++++++++++++------- crates/fuzzing/src/generators.rs | 7 +++++ crates/wasmtime/src/config.rs | 44 ++++++++++++++++++++++++++++++++ crates/wasmtime/src/module.rs | 3 ++- 4 files changed, 79 insertions(+), 10 deletions(-) diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index 70be73f568f1..52a7a4563b93 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -322,7 +322,31 @@ impl ModuleTranslation<'_> { /// /// Note that the constraints for `Paged` are the same as those for /// `Static`. - pub fn try_static_init(&mut self, page_size: u64) { + /// + /// Takes a `page_size` argument in order to ensure that all + /// initialization is page-aligned for mmap-ability, and + /// `max_image_size_always_allowed` to control how we decide + /// whether to use static init. + /// + /// We will try to avoid generating very sparse images, which are + /// possible if e.g. a module has an initializer at offset 0 and a + /// very high offset (say, 1 GiB). To avoid this, we use a dual + /// condition: we always allow images less than + /// `max_image_size_always_allowed`, and the embedder of Wasmtime + /// can set this if desired to ensure that static init should + /// always be done if the size of the module or its heaps is + /// otherwise bounded by the system. We also allow images with + /// static init data bigger than that, but only if it is "dense", + /// defined as having at least half (50%) of its pages with some + /// data. + /// + /// We could do something slightly better by building a dense part + /// and keeping a sparse list of outlier/leftover segments (see + /// issue #3820). This would also allow mostly-static init of + /// modules that have some dynamically-placed data segments. But, + /// for now, this is sufficient to allow a system that "knows what + /// it's doing" to always get static init. + pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) { // First try to switch this memory initialization to the `Paged` // variant, if it isn't already. This will perform static bounds checks // and everything and massage it all into a format which is a bit easier @@ -333,13 +357,6 @@ impl ModuleTranslation<'_> { _ => return, }; - // Maximum size, in bytes, of an initialization image which is - // unconditionally allowed regardless of the input module's size and - // properties. This is chosen to be modestly small to allow useful cases - // to use an initialization image but not too large that if every module - // ran up to the limit here it shouldn't cause a problem. - const MAX_IMAGE_SIZE_ALWAYS_ALLOWED: u64 = 1 << 20; // 1 MB - let memory_init_size = |pages: &[StaticMemoryInitializer]| { if pages.len() == 0 { return 0; @@ -382,7 +399,7 @@ impl ModuleTranslation<'_> { // If the memory initialization image is larger than the size of all // data, then we still allow memory initialization if the image will // be of a relatively modest size, such as 1MB here. - if memory_init_size < MAX_IMAGE_SIZE_ALWAYS_ALLOWED { + if memory_init_size < max_image_size_always_allowed { continue; } diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 1a5ca37ffe57..01c59bb42b3f 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -257,6 +257,7 @@ pub struct WasmtimeConfig { pub memory_config: MemoryConfig, force_jump_veneers: bool, memfd: bool, + memfd_guaranteed_dense_image_size: u64, use_precompiled_cwasm: bool, /// Configuration for the instance allocation strategy to use. pub strategy: InstanceAllocationStrategy, @@ -440,6 +441,12 @@ impl Config { .interruptable(self.wasmtime.interruptable) .consume_fuel(self.wasmtime.consume_fuel) .memfd(self.wasmtime.memfd) + .memfd_guaranteed_dense_image_size(std::cmp::min( + // Clamp this at 16MiB so we don't get huge in-memory + // images during fuzzing. + 16 << 20, + self.wasmtime.memfd_guaranteed_dense_image_size, + )) .allocation_strategy(self.wasmtime.strategy.to_wasmtime()); self.wasmtime.codegen.configure(&mut cfg); diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 41072390d949..e45daafa7387 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -105,6 +105,7 @@ pub struct Config { pub(crate) parallel_compilation: bool, pub(crate) paged_memory_initialization: bool, pub(crate) memfd: bool, + pub(crate) memfd_guaranteed_dense_image_size: u64, } impl Config { @@ -131,6 +132,7 @@ impl Config { // Default to paged memory initialization when using uffd on linux paged_memory_initialization: cfg!(all(target_os = "linux", feature = "uffd")), memfd: false, + memfd_guaranteed_dense_image_size: 16 << 20, }; #[cfg(compiler)] { @@ -1199,6 +1201,47 @@ impl Config { self } + /// Configures the "guaranteed dense image size" for memfd. + /// + /// When using the memfd feature to initialize memory efficiently, + /// compiled modules contain an image of the module's initial + /// heap. If the module has a fairly sparse initial heap, with + /// just a few data segments at very different offsets, this could + /// result in a large region of zero bytes in the image. In other + /// words, it's not very memory-efficient. + /// + /// We normally use a heuristic to avoid this: if less than half + /// of the initialized range (first non-zero to last non-zero + /// byte) of any memory in the module has pages with nonzero + /// bytes, then we avoid memfd for the entire module. + /// + /// However, if the embedder always needs the instantiation-time + /// efficiency of memfd, and is otherwise carefully controlling + /// parameters of the modules (for example, by limiting the + /// maximum heap size of the modules), then it may be desirable to + /// ensure memfd is used even if this could go against the + /// heuristic above. Thus, we add another condition: there is a + /// size of initialized data region up to which we *always* allow + /// memfd. The embedder can set this to a known maximum heap size + /// if they desire to always get the benefits of memfd. + /// + /// In the future we may implement a "best of both worlds" + /// solution where we have a dense image up to some limit, and + /// then support a sparse list of initializers beyond that; this + /// would get most of the benefit of memfd and pay the incremental + /// cost of eager initialization only for those bits of memory + /// that are out-of-bounds. However, for now, an embedder desiring + /// fast instantiation should ensure that this setting is as large + /// as the maximum module initial memory content size. + /// + /// By default this value is 16 MiB. + #[cfg(feature = "memfd")] + #[cfg_attr(nightlydoc, doc(cfg(feature = "memfd")))] + pub fn memfd_guaranteed_dense_image_size(&mut self, size_in_bytes: u64) -> &mut Self { + self.memfd_guaranteed_dense_image_size = size_in_bytes; + self + } + pub(crate) fn build_allocator(&self) -> Result> { #[cfg(feature = "async")] let stack_size = self.async_stack_size; @@ -1269,6 +1312,7 @@ impl Clone for Config { parallel_compilation: self.parallel_compilation, paged_memory_initialization: self.paged_memory_initialization, memfd: self.memfd, + memfd_guaranteed_dense_image_size: self.memfd_guaranteed_dense_image_size, } } } diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 03c9640a2b48..3d0bc8bead5b 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -432,7 +432,8 @@ impl Module { // such as mmap'ing from a file to get copy-on-write. if engine.config().memfd { let align = engine.compiler().page_size_align(); - translation.try_static_init(align); + let max_always_allowed = engine.config().memfd_guaranteed_dense_image_size; + translation.try_static_init(align, max_always_allowed); } // Attempt to convert table initializer segments to