From 0288016665ebcda91cfdaf34c36ea6ac650465f3 Mon Sep 17 00:00:00 2001 From: Robin Freyler Date: Mon, 13 May 2024 15:08:48 +0200 Subject: [PATCH] Add non-streaming Wasm module creation (#1035) * rename Module::new[_unchecked] to new_streaming[_unchecked] * remove unnecessary pattern matches We are guaranteed that only Payload::CodeSectionEntry follows until the code section is done. * fix doc test * clean-up module parsing * properly process invalid payloads everywhere needed * move streaming parser into its own submodule * re-rename ModuleStreamingParser back to ModuleParser It will be handling both streaming and buffered Wasm module parsing. * update docs of streaming parsing * remove convenience methods * update docs * add non-streaming Wasm module creation * fix internal doc links * use non-streaming Module creation everywhere It should be the new default when applicable. * fix bug in non-streaming parser * apply clippy suggestions * fix buffered parsing * apply more clippy suggestions * test streaming Wasm module compilation in spec testsuite --- crates/cli/src/context.rs | 2 +- crates/wasmi/src/module/mod.rs | 84 +++-- crates/wasmi/src/module/parser.rs | 353 +------------------ crates/wasmi/src/module/parser/buffered.rs | 194 ++++++++++ crates/wasmi/src/module/parser/streaming.rs | 273 ++++++++++++++ crates/wasmi/tests/e2e/v1/host_calls_wasm.rs | 2 +- crates/wasmi/tests/e2e/v1/resumable_call.rs | 4 +- crates/wasmi/tests/spec/context.rs | 21 +- crates/wasmi/tests/spec/mod.rs | 20 +- crates/wasmi/tests/spec/run.rs | 20 +- fuzz/fuzz_targets/translate.rs | 2 +- fuzz/fuzz_targets/translate_metered.rs | 2 +- 12 files changed, 602 insertions(+), 375 deletions(-) create mode 100644 crates/wasmi/src/module/parser/buffered.rs create mode 100644 crates/wasmi/src/module/parser/streaming.rs diff --git a/crates/cli/src/context.rs b/crates/cli/src/context.rs index cecc048ae7..a814e2c307 100644 --- a/crates/cli/src/context.rs +++ b/crates/cli/src/context.rs @@ -36,7 +36,7 @@ impl Context { config.compilation_mode(compilation_mode); let engine = wasmi::Engine::new(&config); let wasm_bytes = utils::read_wasm_or_wat(wasm_file)?; - let module = wasmi::Module::new(&engine, &mut &wasm_bytes[..]).map_err(|error| { + let module = wasmi::Module::new(&engine, &wasm_bytes[..]).map_err(|error| { anyhow!("failed to parse and validate Wasm module {wasm_file:?}: {error}") })?; let mut store = wasmi::Store::new(&engine, wasi_ctx); diff --git a/crates/wasmi/src/module/mod.rs b/crates/wasmi/src/module/mod.rs index 19e3fe936b..a6de31b62b 100644 --- a/crates/wasmi/src/module/mod.rs +++ b/crates/wasmi/src/module/mod.rs @@ -15,7 +15,7 @@ use self::{ export::ExternIdx, global::Global, import::{ExternTypeIdx, Import}, - parser::{parse, parse_unchecked}, + parser::ModuleParser, }; pub(crate) use self::{ data::{DataSegment, DataSegments, InitDataSegment, PassiveDataSegmentBytes}, @@ -186,7 +186,25 @@ impl ModuleImports { } impl Module { - /// Creates a new Wasm [`Module`] from the given byte stream. + /// Creates a new Wasm [`Module`] from the given Wasm bytecode buffer. + /// + /// # Note + /// + /// This parses, validates and translates the buffered Wasm bytecode. + /// + /// # Errors + /// + /// - If the Wasm bytecode is malformed or fails to validate. + /// - If the Wasm bytecode violates restrictions + /// set in the [`Config`] used by the `engine`. + /// - If Wasmi cannot translate the Wasm bytecode. + /// + /// [`Config`]: crate::Config + pub fn new(engine: &Engine, wasm: &[u8]) -> Result { + ModuleParser::new(engine).parse_buffered(wasm) + } + + /// Creates a new Wasm [`Module`] from the given Wasm bytecode stream. /// /// # Note /// @@ -194,41 +212,67 @@ impl Module { /// /// # Errors /// - /// - If the `stream` cannot be parsed as a valid Wasm module. - /// - If the Wasm bytecode yielded by `stream` is not valid. - /// - If the Wasm bytecode yielded by `stream` violates restrictions + /// - If the Wasm bytecode is malformed or fails to validate. + /// - If the Wasm bytecode violates restrictions /// set in the [`Config`] used by the `engine`. - /// - If Wasmi cannot translate the Wasm bytecode yielded by `stream`. + /// - If Wasmi cannot translate the Wasm bytecode. + /// + /// [`Config`]: crate::Config + pub fn new_streaming(engine: &Engine, stream: impl Read) -> Result { + ModuleParser::new(engine).parse_streaming(stream) + } + + /// Creates a new Wasm [`Module`] from the given Wasm bytecode buffer. + /// + /// # Note + /// + /// This parses and translates the buffered Wasm bytecode. + /// + /// # Safety + /// + /// - This does _not_ validate the Wasm bytecode. + /// - It is the caller's responsibility that the Wasm bytecode is valid. + /// - It is the caller's responsibility that the Wasm bytecode adheres + /// to the restrictions set by the used [`Config`] of the `engine`. + /// - Violating the above rules is undefined behavior. + /// + /// # Errors + /// + /// - If the Wasm bytecode is malformed or contains invalid sections. + /// - If the Wasm bytecode fails to be compiled by Wasmi. /// /// [`Config`]: crate::Config - pub fn new(engine: &Engine, stream: impl Read) -> Result { - parse(engine, stream).map_err(Into::into) + pub unsafe fn new_unchecked(engine: &Engine, wasm: &[u8]) -> Result { + let parser = ModuleParser::new(engine); + unsafe { parser.parse_buffered_unchecked(wasm) } } /// Creates a new Wasm [`Module`] from the given byte stream. /// /// # Note /// - /// - This parses and translates the Wasm bytecode yielded by `stream`. - /// - This still validates Wasm bytecode outside of function bodies. + /// This parses and translates the Wasm bytecode yielded by `stream`. /// /// # Safety /// - /// - This does _not_ fully validate the Wasm bytecode yielded by `stream`. - /// - It is the caller's responsibility to call this function only with - /// a `stream` that yields fully valid Wasm bytecode. - /// - Additionally it is the caller's responsibility that the Wasm bytecode - /// yielded by `stream` must adhere to the restrictions set by the used - /// [`Config`] of the `engine`. - /// - Violating these rules may lead to undefined behavior. + /// - This does _not_ validate the Wasm bytecode. + /// - It is the caller's responsibility that the Wasm bytecode is valid. + /// - It is the caller's responsibility that the Wasm bytecode adheres + /// to the restrictions set by the used [`Config`] of the `engine`. + /// - Violating the above rules is undefined behavior. /// /// # Errors /// - /// If the `stream` cannot be parsed as a valid Wasm module. + /// - If the Wasm bytecode is malformed or contains invalid sections. + /// - If the Wasm bytecode fails to be compiled by Wasmi. /// /// [`Config`]: crate::Config - pub unsafe fn new_unchecked(engine: &Engine, stream: impl Read) -> Result { - unsafe { parse_unchecked(engine, stream).map_err(Into::into) } + pub unsafe fn new_streaming_unchecked( + engine: &Engine, + stream: impl Read, + ) -> Result { + let parser = ModuleParser::new(engine); + unsafe { parser.parse_streaming_unchecked(stream) } } /// Returns the [`Engine`] used during creation of the [`Module`]. diff --git a/crates/wasmi/src/module/parser.rs b/crates/wasmi/src/module/parser.rs index ab0729b99c..ef38277b8b 100644 --- a/crates/wasmi/src/module/parser.rs +++ b/crates/wasmi/src/module/parser.rs @@ -5,10 +5,8 @@ use super::{ import::{FuncTypeIdx, Import}, ElementSegment, FuncIdx, - Module, ModuleBuilder, ModuleHeader, - Read, }; use crate::{ engine::{CompiledFunc, EnforcedLimitsError}, @@ -18,10 +16,9 @@ use crate::{ MemoryType, TableType, }; -use core::ops::{Deref, DerefMut, Range}; -use std::{boxed::Box, vec::Vec}; +use core::ops::Range; +use std::boxed::Box; use wasmparser::{ - Chunk, DataSectionReader, ElementSectionReader, Encoding, @@ -38,32 +35,14 @@ use wasmparser::{ Validator, }; -/// Parse, validate and translate the Wasm bytecode stream into Wasm IR bytecode. -/// -/// - Returns the fully compiled and validated Wasm [`Module`] upon success. -/// - Uses the given [`Engine`] as the translation target of the process. -/// -/// # Errors -/// -/// If the Wasm bytecode stream fails to parse, validate or translate. -pub fn parse(engine: &Engine, stream: impl Read) -> Result { - ModuleParser::new(engine).parse(stream) -} +#[cfg(doc)] +use crate::Module; -/// Parse and translate the Wasm bytecode stream into Wasm IR bytecode. -/// -/// - Returns the fully compiled Wasm [`Module`] upon success. -/// - Uses the given [`Engine`] as the translation target of the process. -/// -/// # Errors -/// -/// If the Wasm bytecode stream fails to parse or translate. -pub unsafe fn parse_unchecked(engine: &Engine, stream: impl Read) -> Result { - unsafe { ModuleParser::new(engine).parse_unchecked(stream) } -} +mod buffered; +mod streaming; /// Context used to construct a WebAssembly module from a stream of bytes. -struct ModuleParser { +pub struct ModuleParser { /// The engine used for translation. engine: Engine, /// The Wasm validator used throughout stream parsing. @@ -76,62 +55,9 @@ struct ModuleParser { eof: bool, } -/// A buffer for holding parsed payloads in bytes. -#[derive(Debug, Default, Clone)] -pub struct ParseBuffer { - buffer: Vec, -} - -impl ParseBuffer { - /// Drops the first `amount` bytes from the [`ParseBuffer`] as they have been consumed. - #[inline] - pub fn consume(&mut self, amount: usize) { - self.buffer.drain(..amount); - } - - /// Pulls more bytes from the `stream` in order to produce Wasm payload. - /// - /// Returns `true` if the parser reached the end of the stream. - /// - /// # Note - /// - /// Uses `hint` to efficiently preallocate enough space for the next payload. - #[inline] - pub fn pull_bytes(&mut self, hint: u64, stream: &mut impl Read) -> Result { - // Use the hint to preallocate more space, then read - // some more data into the buffer. - // - // Note that the buffer management here is not ideal, - // but it's compact enough to fit in an example! - let len = self.len(); - let new_len = len + hint as usize; - self.resize(new_len, 0x0_u8); - let read_bytes = stream.read(&mut self[len..])?; - self.truncate(len + read_bytes); - let reached_end = read_bytes == 0; - Ok(reached_end) - } -} - -impl Deref for ParseBuffer { - type Target = Vec; - - #[inline] - fn deref(&self) -> &Self::Target { - &self.buffer - } -} - -impl DerefMut for ParseBuffer { - #[inline] - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.buffer - } -} - impl ModuleParser { /// Creates a new [`ModuleParser`] for the given [`Engine`]. - fn new(engine: &Engine) -> Self { + pub fn new(engine: &Engine) -> Self { let parser = WasmParser::new(0); Self { engine: engine.clone(), @@ -142,226 +68,6 @@ impl ModuleParser { } } - /// Starts parsing and validating the Wasm bytecode stream. - /// - /// Returns the compiled and validated Wasm [`Module`] upon success. - /// - /// # Errors - /// - /// If the Wasm bytecode stream fails to validate. - pub fn parse(mut self, stream: impl Read) -> Result { - let features = self.engine.config().wasm_features(); - self.validator = Some(Validator::new_with_features(features)); - // SAFETY: we just pre-populated the Wasm module parser with a validator - // thus calling this method is safe. - unsafe { self.parse_impl(stream) } - } - - /// Starts parsing and validating the Wasm bytecode stream. - /// - /// Returns the compiled and validated Wasm [`Module`] upon success. - /// - /// # Safety - /// - /// The caller is responsible to make sure that the provided - /// `stream` yields valid WebAssembly bytecode. - /// - /// # Errors - /// - /// If the Wasm bytecode stream fails to validate. - pub unsafe fn parse_unchecked(self, stream: impl Read) -> Result { - unsafe { self.parse_impl(stream) } - } - - /// Starts parsing and validating the Wasm bytecode stream. - /// - /// Returns the compiled and validated Wasm [`Module`] upon success. - /// - /// # Safety - /// - /// The caller is responsible to either - /// - /// 1) Populate the [`ModuleParser`] with a [`Validator`] prior to calling this method, OR; - /// 2) Make sure that the provided `stream` yields valid WebAssembly bytecode. - /// - /// Otherwise this method has undefined behavior. - /// - /// # Errors - /// - /// If the Wasm bytecode stream fails to validate. - unsafe fn parse_impl(mut self, mut stream: impl Read) -> Result { - let mut buffer = ParseBuffer::default(); - let header = Self::parse_header(&mut self, &mut stream, &mut buffer)?; - let builder = Self::parse_code(&mut self, &mut stream, &mut buffer, header)?; - let module = Self::parse_data(&mut self, &mut stream, &mut buffer, builder)?; - Ok(module) - } - - /// Parse the Wasm module header. - /// - /// - The Wasm module header is the set of all sections that appear before - /// the Wasm code section. - /// - We separate parsing of the Wasm module header since the information of - /// the Wasm module header is required for translating the Wasm code section. - /// - /// # Errors - /// - /// If the Wasm bytecode stream fails to parse or validate. - fn parse_header( - &mut self, - stream: &mut impl Read, - buffer: &mut ParseBuffer, - ) -> Result { - let mut header = ModuleHeaderBuilder::new(&self.engine); - loop { - match self.parser.parse(&buffer[..], self.eof)? { - Chunk::NeedMoreData(hint) => { - self.eof = buffer.pull_bytes(hint, stream)?; - if self.eof { - break; - } - } - Chunk::Parsed { consumed, payload } => { - match payload { - Payload::Version { - num, - encoding, - range, - } => self.process_version(num, encoding, range), - Payload::TypeSection(section) => self.process_types(section, &mut header), - Payload::ImportSection(section) => { - self.process_imports(section, &mut header) - } - Payload::InstanceSection(section) => self.process_instances(section), - Payload::FunctionSection(section) => { - self.process_functions(section, &mut header) - } - Payload::TableSection(section) => self.process_tables(section, &mut header), - Payload::MemorySection(section) => { - self.process_memories(section, &mut header) - } - Payload::TagSection(section) => self.process_tags(section), - Payload::GlobalSection(section) => { - self.process_globals(section, &mut header) - } - Payload::ExportSection(section) => { - self.process_exports(section, &mut header) - } - Payload::StartSection { func, range } => { - self.process_start(func, range, &mut header) - } - Payload::ElementSection(section) => { - self.process_element(section, &mut header) - } - Payload::DataCountSection { count, range } => { - self.process_data_count(count, range) - } - Payload::CodeSectionStart { count, range, size } => { - self.process_code_start(count, range, size)?; - buffer.consume(consumed); - break; - } - Payload::DataSection(_) => break, - Payload::End(_) => break, - Payload::CustomSection { .. } => Ok(()), - Payload::UnknownSection { id, range, .. } => { - self.process_unknown(id, range) - } - unexpected => { - unreachable!("encountered unexpected Wasm section: {unexpected:?}") - } - }?; - // Cut away the parts from the intermediate buffer that have already been parsed. - buffer.consume(consumed); - } - } - } - Ok(header.finish()) - } - - /// Parse the Wasm data section and finalize parsing. - /// - /// We separate parsing of the Wasm data section since it is the only Wasm - /// section that comes after the Wasm code section that we have to separate - /// out for technical reasons. - /// - /// # Errors - /// - /// If the Wasm bytecode stream fails to parse or validate. - fn parse_code( - &mut self, - stream: &mut impl Read, - buffer: &mut ParseBuffer, - header: ModuleHeader, - ) -> Result { - loop { - match self.parser.parse(&buffer[..], self.eof)? { - Chunk::NeedMoreData(hint) => { - self.eof = buffer.pull_bytes(hint, stream)?; - } - Chunk::Parsed { consumed, payload } => { - match payload { - Payload::CodeSectionEntry(func_body) => { - // Note: Unfortunately the `wasmparser` crate is missing an API - // to return the byte slice for the respective code section - // entry payload. Please remove this work around as soon as - // such an API becomes available. - let remaining = func_body.get_binary_reader().bytes_remaining(); - let start = consumed - remaining; - let bytes = &buffer[start..consumed]; - self.process_code_entry(func_body, bytes, &header)?; - } - Payload::CustomSection { .. } => {} - Payload::UnknownSection { id, range, .. } => { - self.process_unknown(id, range)? - } - _ => break, - } - // Cut away the parts from the intermediate buffer that have already been parsed. - buffer.consume(consumed); - } - } - } - Ok(ModuleBuilder::new(header)) - } - - fn parse_data( - &mut self, - stream: &mut impl Read, - buffer: &mut ParseBuffer, - mut builder: ModuleBuilder, - ) -> Result { - loop { - match self.parser.parse(&buffer[..], self.eof)? { - Chunk::NeedMoreData(hint) => { - self.eof = buffer.pull_bytes(hint, stream)?; - } - Chunk::Parsed { consumed, payload } => { - match payload { - Payload::DataSection(section) => { - self.process_data(section, &mut builder)?; - } - Payload::End(offset) => { - self.process_end(offset)?; - buffer.consume(consumed); - break; - } - Payload::CustomSection { .. } => {} - Payload::UnknownSection { id, range, .. } => { - self.process_unknown(id, range)? - } - unexpected => { - unreachable!("encountered unexpected Wasm section: {unexpected:?}") - } - } - // Cut away the parts from the intermediate buffer that have already been parsed. - buffer.consume(consumed); - } - } - } - Ok(builder.finish(&self.engine)) - } - /// Processes the end of the Wasm binary. fn process_end(&mut self, offset: usize) -> Result<(), Error> { if let Some(validator) = &mut self.validator { @@ -449,22 +155,6 @@ impl ModuleParser { Ok(()) } - /// Process module instances. - /// - /// # Note - /// - /// This is part of the module linking Wasm proposal and not yet supported - /// by Wasmi. - fn process_instances( - &mut self, - section: wasmparser::InstanceSectionReader, - ) -> Result<(), Error> { - if let Some(validator) = &mut self.validator { - validator.instance_section(§ion)?; - } - Ok(()) - } - /// Process module function declarations. /// /// # Note @@ -552,19 +242,6 @@ impl ModuleParser { Ok(()) } - /// Process module tags. - /// - /// # Note - /// - /// This is part of the module linking Wasm proposal and not yet supported - /// by Wasmi. - fn process_tags(&mut self, section: wasmparser::TagSectionReader) -> Result<(), Error> { - if let Some(validator) = &mut self.validator { - validator.tag_section(§ion)?; - } - Ok(()) - } - /// Process module global variable declarations. /// /// # Note @@ -816,15 +493,13 @@ impl ModuleParser { Ok(()) } - /// Process an unknown Wasm module section. - /// - /// # Note - /// - /// This generally will be treated as an error for now. - fn process_unknown(&mut self, id: u8, range: Range) -> Result<(), Error> { + /// Process an unexpected, unsupported or malformed Wasm module section payload. + fn process_invalid_payload(&mut self, payload: Payload<'_>) -> Result<(), Error> { if let Some(validator) = &mut self.validator { - validator.unknown_section(id, &range)?; + if let Err(error) = validator.payload(&payload) { + return Err(Error::from(error)); + } } - Ok(()) + panic!("encountered unsupported, unexpected or malformed Wasm payload: {payload:?}") } } diff --git a/crates/wasmi/src/module/parser/buffered.rs b/crates/wasmi/src/module/parser/buffered.rs new file mode 100644 index 0000000000..c41db5c1af --- /dev/null +++ b/crates/wasmi/src/module/parser/buffered.rs @@ -0,0 +1,194 @@ +use super::{ModuleBuilder, ModuleHeader, ModuleHeaderBuilder, ModuleParser}; +use crate::{Error, Module}; +use wasmparser::{Chunk, Payload, Validator}; + +impl ModuleParser { + /// Starts parsing and validating the Wasm bytecode stream. + /// + /// Returns the compiled and validated Wasm [`Module`] upon success. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to validate. + pub fn parse_buffered(mut self, buffer: &[u8]) -> Result { + let features = self.engine.config().wasm_features(); + self.validator = Some(Validator::new_with_features(features)); + // SAFETY: we just pre-populated the Wasm module parser with a validator + // thus calling this method is safe. + unsafe { self.parse_buffered_impl(buffer) } + } + + /// Starts parsing and validating the Wasm bytecode stream. + /// + /// Returns the compiled and validated Wasm [`Module`] upon success. + /// + /// # Safety + /// + /// The caller is responsible to make sure that the provided + /// `stream` yields valid WebAssembly bytecode. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to validate. + pub unsafe fn parse_buffered_unchecked(self, buffer: &[u8]) -> Result { + unsafe { self.parse_buffered_impl(buffer) } + } + + /// Starts parsing and validating the Wasm bytecode stream. + /// + /// Returns the compiled and validated Wasm [`Module`] upon success. + /// + /// # Safety + /// + /// The caller is responsible to either + /// + /// 1) Populate the [`ModuleParser`] with a [`Validator`] prior to calling this method, OR; + /// 2) Make sure that the provided `stream` yields valid WebAssembly bytecode. + /// + /// Otherwise this method has undefined behavior. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to validate. + unsafe fn parse_buffered_impl(mut self, mut buffer: &[u8]) -> Result { + let header = Self::parse_buffered_header(&mut self, &mut buffer)?; + let builder = Self::parse_buffered_code(&mut self, &mut buffer, header)?; + let module = Self::parse_buffered_data(&mut self, &mut buffer, builder)?; + Ok(module) + } + + /// Fetch next Wasm module payload and adust the `buffer`. + /// + /// # Errors + /// + /// If the parsed Wasm is malformed. + fn next_payload<'a>(&mut self, buffer: &mut &'a [u8]) -> Result<(usize, Payload<'a>), Error> { + match self.parser.parse(&buffer[..], true)? { + Chunk::Parsed { consumed, payload } => Ok((consumed, payload)), + Chunk::NeedMoreData(_hint) => { + // This is not possible since `eof` is always true. + unreachable!() + } + } + } + + /// Consumes the parts of the buffer that have been processed. + fn consume_buffer<'a>(consumed: usize, buffer: &mut &'a [u8]) -> &'a [u8] { + let (consumed, remaining) = buffer.split_at(consumed); + *buffer = remaining; + consumed + } + + /// Parse the Wasm module header. + /// + /// - The Wasm module header is the set of all sections that appear before + /// the Wasm code section. + /// - We separate parsing of the Wasm module header since the information of + /// the Wasm module header is required for translating the Wasm code section. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to parse or validate. + fn parse_buffered_header(&mut self, buffer: &mut &[u8]) -> Result { + let mut header = ModuleHeaderBuilder::new(&self.engine); + loop { + let (consumed, payload) = self.next_payload(buffer)?; + match payload { + Payload::Version { + num, + encoding, + range, + } => self.process_version(num, encoding, range), + Payload::TypeSection(section) => self.process_types(section, &mut header), + Payload::ImportSection(section) => self.process_imports(section, &mut header), + Payload::FunctionSection(section) => self.process_functions(section, &mut header), + Payload::TableSection(section) => self.process_tables(section, &mut header), + Payload::MemorySection(section) => self.process_memories(section, &mut header), + Payload::GlobalSection(section) => self.process_globals(section, &mut header), + Payload::ExportSection(section) => self.process_exports(section, &mut header), + Payload::StartSection { func, range } => { + self.process_start(func, range, &mut header) + } + Payload::ElementSection(section) => self.process_element(section, &mut header), + Payload::DataCountSection { count, range } => self.process_data_count(count, range), + Payload::CodeSectionStart { count, range, size } => { + self.process_code_start(count, range, size)?; + Self::consume_buffer(consumed, buffer); + break; + } + Payload::DataSection(_) => break, + Payload::End(_) => break, + Payload::CustomSection { .. } => Ok(()), + unexpected => self.process_invalid_payload(unexpected), + }?; + Self::consume_buffer(consumed, buffer); + } + Ok(header.finish()) + } + + /// Parse the Wasm code section entries. + /// + /// We separate parsing of the Wasm code section since most of a Wasm module + /// is made up of code section entries which we can parse and validate more efficiently + /// by serving them with a specialized routine. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to parse or validate. + fn parse_buffered_code( + &mut self, + buffer: &mut &[u8], + header: ModuleHeader, + ) -> Result { + loop { + let (consumed, payload) = self.next_payload(buffer)?; + match payload { + Payload::CodeSectionEntry(func_body) => { + // Note: Unfortunately the `wasmparser` crate is missing an API + // to return the byte slice for the respective code section + // entry payload. Please remove this work around as soon as + // such an API becomes available. + let bytes = Self::consume_buffer(consumed, buffer); + let remaining = func_body.get_binary_reader().bytes_remaining(); + let start = consumed - remaining; + let bytes = &bytes[start..]; + self.process_code_entry(func_body, bytes, &header)?; + } + _ => break, + } + } + Ok(ModuleBuilder::new(header)) + } + + /// Parse the Wasm data section and finalize parsing. + /// + /// We separate parsing of the Wasm data section since it is the only Wasm + /// section that comes after the Wasm code section that we have to separate + /// out for technical reasons. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to parse or validate. + fn parse_buffered_data( + &mut self, + buffer: &mut &[u8], + mut builder: ModuleBuilder, + ) -> Result { + loop { + let (consumed, payload) = self.next_payload(buffer)?; + match payload { + Payload::DataSection(section) => { + self.process_data(section, &mut builder)?; + } + Payload::End(offset) => { + self.process_end(offset)?; + break; + } + Payload::CustomSection { .. } => {} + invalid => self.process_invalid_payload(invalid)?, + } + Self::consume_buffer(consumed, buffer); + } + Ok(builder.finish(&self.engine)) + } +} diff --git a/crates/wasmi/src/module/parser/streaming.rs b/crates/wasmi/src/module/parser/streaming.rs new file mode 100644 index 0000000000..8ae0ba7ccc --- /dev/null +++ b/crates/wasmi/src/module/parser/streaming.rs @@ -0,0 +1,273 @@ +use super::{ModuleBuilder, ModuleHeader, ModuleHeaderBuilder, ModuleParser}; +use crate::{Error, Module, Read}; +use core::ops::{Deref, DerefMut}; +use std::vec::Vec; +use wasmparser::{Chunk, Payload, Validator}; + +/// A buffer for holding parsed payloads in bytes. +#[derive(Debug, Default, Clone)] +struct ParseBuffer { + buffer: Vec, +} + +impl ParseBuffer { + /// Drops the first `amount` bytes from the [`ParseBuffer`] as they have been consumed. + #[inline] + fn consume(buffer: &mut Self, amount: usize) { + buffer.drain(..amount); + } + + /// Pulls more bytes from the `stream` in order to produce Wasm payload. + /// + /// Returns `true` if the parser reached the end of the stream. + /// + /// # Note + /// + /// Uses `hint` to efficiently preallocate enough space for the next payload. + #[inline] + fn pull_bytes(buffer: &mut Self, hint: u64, stream: &mut impl Read) -> Result { + // Use the hint to preallocate more space, then read + // some more data into the buffer. + // + // Note that the buffer management here is not ideal, + // but it's compact enough to fit in an example! + let len = buffer.len(); + let new_len = len + hint as usize; + buffer.resize(new_len, 0x0_u8); + let read_bytes = stream.read(&mut buffer[len..])?; + buffer.truncate(len + read_bytes); + let reached_end = read_bytes == 0; + Ok(reached_end) + } +} + +impl Deref for ParseBuffer { + type Target = Vec; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.buffer + } +} + +impl DerefMut for ParseBuffer { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.buffer + } +} + +impl ModuleParser { + /// Parses and validates the Wasm bytecode `stream`. + /// + /// Returns the compiled and validated Wasm [`Module`] upon success. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to validate. + pub fn parse_streaming(mut self, stream: impl Read) -> Result { + let features = self.engine.config().wasm_features(); + self.validator = Some(Validator::new_with_features(features)); + // SAFETY: we just pre-populated the Wasm module parser with a validator + // thus calling this method is safe. + unsafe { self.parse_streaming_impl(stream) } + } + + /// Parses the Wasm bytecode `stream` without Wasm validation. + /// + /// Returns the compiled and validated Wasm [`Module`] upon success. + /// + /// # Safety + /// + /// The caller is responsible to make sure that the provided + /// `stream` yields valid WebAssembly bytecode. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to validate. + pub unsafe fn parse_streaming_unchecked(self, stream: impl Read) -> Result { + unsafe { self.parse_streaming_impl(stream) } + } + + /// Starts parsing and validating the Wasm bytecode stream. + /// + /// Returns the compiled and validated Wasm [`Module`] upon success. + /// + /// # Safety + /// + /// The caller is responsible to either + /// + /// 1) Populate the [`ModuleParser`] with a [`Validator`] prior to calling this method, OR; + /// 2) Make sure that the provided `stream` yields valid WebAssembly bytecode. + /// + /// Otherwise this method has undefined behavior. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to validate. + unsafe fn parse_streaming_impl(mut self, mut stream: impl Read) -> Result { + let mut buffer = ParseBuffer::default(); + let header = Self::parse_streaming_header(&mut self, &mut stream, &mut buffer)?; + let builder = Self::parse_streaming_code(&mut self, &mut stream, &mut buffer, header)?; + let module = Self::parse_streaming_data(&mut self, &mut stream, &mut buffer, builder)?; + Ok(module) + } + + /// Parse the Wasm module header. + /// + /// - The Wasm module header is the set of all sections that appear before + /// the Wasm code section. + /// - We separate parsing of the Wasm module header since the information of + /// the Wasm module header is required for translating the Wasm code section. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to parse or validate. + fn parse_streaming_header( + &mut self, + stream: &mut impl Read, + buffer: &mut ParseBuffer, + ) -> Result { + let mut header = ModuleHeaderBuilder::new(&self.engine); + loop { + match self.parser.parse(&buffer[..], self.eof)? { + Chunk::NeedMoreData(hint) => { + self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?; + if self.eof { + break; + } + } + Chunk::Parsed { consumed, payload } => { + match payload { + Payload::Version { + num, + encoding, + range, + } => self.process_version(num, encoding, range), + Payload::TypeSection(section) => self.process_types(section, &mut header), + Payload::ImportSection(section) => { + self.process_imports(section, &mut header) + } + Payload::FunctionSection(section) => { + self.process_functions(section, &mut header) + } + Payload::TableSection(section) => self.process_tables(section, &mut header), + Payload::MemorySection(section) => { + self.process_memories(section, &mut header) + } + Payload::GlobalSection(section) => { + self.process_globals(section, &mut header) + } + Payload::ExportSection(section) => { + self.process_exports(section, &mut header) + } + Payload::StartSection { func, range } => { + self.process_start(func, range, &mut header) + } + Payload::ElementSection(section) => { + self.process_element(section, &mut header) + } + Payload::DataCountSection { count, range } => { + self.process_data_count(count, range) + } + Payload::CodeSectionStart { count, range, size } => { + self.process_code_start(count, range, size)?; + ParseBuffer::consume(buffer, consumed); + break; + } + Payload::DataSection(_) => break, + Payload::End(_) => break, + Payload::CustomSection { .. } => Ok(()), + unexpected => self.process_invalid_payload(unexpected), + }?; + // Cut away the parts from the intermediate buffer that have already been parsed. + ParseBuffer::consume(buffer, consumed); + } + } + } + Ok(header.finish()) + } + + /// Parse the Wasm code section entries. + /// + /// We separate parsing of the Wasm code section since most of a Wasm module + /// is made up of code section entries which we can parse and validate more efficiently + /// by serving them with a specialized routine. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to parse or validate. + fn parse_streaming_code( + &mut self, + stream: &mut impl Read, + buffer: &mut ParseBuffer, + header: ModuleHeader, + ) -> Result { + loop { + match self.parser.parse(&buffer[..], self.eof)? { + Chunk::NeedMoreData(hint) => { + self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?; + } + Chunk::Parsed { consumed, payload } => { + match payload { + Payload::CodeSectionEntry(func_body) => { + // Note: Unfortunately the `wasmparser` crate is missing an API + // to return the byte slice for the respective code section + // entry payload. Please remove this work around as soon as + // such an API becomes available. + let remaining = func_body.get_binary_reader().bytes_remaining(); + let start = consumed - remaining; + let bytes = &buffer[start..consumed]; + self.process_code_entry(func_body, bytes, &header)?; + } + _ => break, + } + // Cut away the parts from the intermediate buffer that have already been parsed. + ParseBuffer::consume(buffer, consumed); + } + } + } + Ok(ModuleBuilder::new(header)) + } + + /// Parse the Wasm data section and finalize parsing. + /// + /// We separate parsing of the Wasm data section since it is the only Wasm + /// section that comes after the Wasm code section that we have to separate + /// out for technical reasons. + /// + /// # Errors + /// + /// If the Wasm bytecode stream fails to parse or validate. + fn parse_streaming_data( + &mut self, + stream: &mut impl Read, + buffer: &mut ParseBuffer, + mut builder: ModuleBuilder, + ) -> Result { + loop { + match self.parser.parse(&buffer[..], self.eof)? { + Chunk::NeedMoreData(hint) => { + self.eof = ParseBuffer::pull_bytes(buffer, hint, stream)?; + } + Chunk::Parsed { consumed, payload } => { + match payload { + Payload::DataSection(section) => { + self.process_data(section, &mut builder)?; + } + Payload::End(offset) => { + self.process_end(offset)?; + ParseBuffer::consume(buffer, consumed); + break; + } + Payload::CustomSection { .. } => {} + invalid => self.process_invalid_payload(invalid)?, + } + // Cut away the parts from the intermediate buffer that have already been parsed. + ParseBuffer::consume(buffer, consumed); + } + } + } + Ok(builder.finish(&self.engine)) + } +} diff --git a/crates/wasmi/tests/e2e/v1/host_calls_wasm.rs b/crates/wasmi/tests/e2e/v1/host_calls_wasm.rs index a30a4ff3b2..be6b968f93 100644 --- a/crates/wasmi/tests/e2e/v1/host_calls_wasm.rs +++ b/crates/wasmi/tests/e2e/v1/host_calls_wasm.rs @@ -40,7 +40,7 @@ fn host_calls_wasm() { "#, ) .unwrap(); - let module = Module::new(store.engine(), &mut &wasm[..]).unwrap(); + let module = Module::new(store.engine(), &wasm[..]).unwrap(); let instance = linker .instantiate(&mut store, &module) .unwrap() diff --git a/crates/wasmi/tests/e2e/v1/resumable_call.rs b/crates/wasmi/tests/e2e/v1/resumable_call.rs index 06624c431a..69fe2390e2 100644 --- a/crates/wasmi/tests/e2e/v1/resumable_call.rs +++ b/crates/wasmi/tests/e2e/v1/resumable_call.rs @@ -56,7 +56,7 @@ fn resumable_call_smoldot_common(wasm: &str) -> (Store, TypedFunc<(), // host function, returns 10 if the output is 0 and // returns 20 otherwise. let wasm = wat::parse_str(wasm).unwrap(); - let module = Module::new(store.engine(), &mut &wasm[..]).unwrap(); + let module = Module::new(store.engine(), &wasm[..]).unwrap(); let instance = linker .instantiate(&mut store, &module) .unwrap() @@ -228,7 +228,7 @@ fn resumable_call() { ) .unwrap(); - let module = Module::new(store.engine(), &mut &wasm[..]).unwrap(); + let module = Module::new(store.engine(), &wasm[..]).unwrap(); let instance = linker .instantiate(&mut store, &module) .unwrap() diff --git a/crates/wasmi/tests/spec/context.rs b/crates/wasmi/tests/spec/context.rs index 2bfabe20c1..cc9140d9d4 100644 --- a/crates/wasmi/tests/spec/context.rs +++ b/crates/wasmi/tests/spec/context.rs @@ -1,8 +1,13 @@ -use super::{TestDescriptor, TestError, TestProfile, TestSpan}; +use super::{ + run::{ParsingMode, RunnerConfig}, + TestDescriptor, + TestError, + TestProfile, + TestSpan, +}; use anyhow::Result; use std::collections::HashMap; use wasmi::{ - Config, Engine, Extern, Func, @@ -26,6 +31,8 @@ use wast::token::{Id, Span}; pub struct TestContext<'a> { /// The Wasmi engine used for executing functions used during the test. engine: Engine, + /// The configuration of the test runner. + runner_config: RunnerConfig, /// The linker for linking together Wasm test modules. linker: Linker<()>, /// The store to hold all runtime data during the test. @@ -48,8 +55,8 @@ pub struct TestContext<'a> { impl<'a> TestContext<'a> { /// Creates a new [`TestContext`] with the given [`TestDescriptor`]. - pub fn new(descriptor: &'a TestDescriptor, config: Config) -> Self { - let engine = Engine::new(&config); + pub fn new(descriptor: &'a TestDescriptor, runner_config: RunnerConfig) -> Self { + let engine = Engine::new(&runner_config.config); let mut linker = Linker::new(&engine); let mut store = Store::new(&engine, ()); _ = store.set_fuel(1_000_000_000); @@ -104,6 +111,7 @@ impl<'a> TestContext<'a> { .unwrap(); TestContext { engine, + runner_config, linker, store, modules: Vec::new(), @@ -164,7 +172,10 @@ impl TestContext<'_> { error ) }); - let module = Module::new(self.engine(), &wasm[..])?; + let module = match self.runner_config.mode { + ParsingMode::Buffered => Module::new(self.engine(), &wasm[..])?, + ParsingMode::Streaming => Module::new_streaming(self.engine(), &wasm[..])?, + }; let instance_pre = self.linker.instantiate(&mut self.store, &module)?; let instance = instance_pre.start(&mut self.store)?; self.modules.push(module); diff --git a/crates/wasmi/tests/spec/mod.rs b/crates/wasmi/tests/spec/mod.rs index 33551bd640..981ec117ed 100644 --- a/crates/wasmi/tests/spec/mod.rs +++ b/crates/wasmi/tests/spec/mod.rs @@ -9,6 +9,7 @@ use self::{ descriptor::{TestDescriptor, TestSpan}, error::TestError, profile::TestProfile, + run::{ParsingMode, RunnerConfig}, }; use wasmi::Config; @@ -68,7 +69,7 @@ fn mvp_config() -> Config { /// # Note /// /// The Wasm MVP has no Wasm proposals enabled. -fn test_config(consume_fuel: bool) -> Config { +fn test_config(consume_fuel: bool, mode: ParsingMode) -> RunnerConfig { let mut config = mvp_config(); // We have to enable the `mutable-global` Wasm proposal because // it seems that the entire Wasm spec test suite is already built @@ -83,7 +84,7 @@ fn test_config(consume_fuel: bool) -> Config { .wasm_tail_call(true) .wasm_extended_const(true) .consume_fuel(consume_fuel); - config + RunnerConfig { config, mode } } macro_rules! expand_tests { @@ -217,7 +218,7 @@ mod blobs { expand_tests! { define_spec_tests, - let config = test_config(false); + let config = test_config(false, ParsingMode::Buffered); let runner = run::run_wasm_spec_test; } @@ -227,7 +228,18 @@ mod fueled { expand_tests! { define_spec_tests, - let config = test_config(true); + let config = test_config(true, ParsingMode::Buffered); + let runner = run::run_wasm_spec_test; + } +} + +mod streaming { + use super::*; + + expand_tests! { + define_spec_tests, + + let config = test_config(false, ParsingMode::Streaming); let runner = run::run_wasm_spec_test; } } diff --git a/crates/wasmi/tests/spec/run.rs b/crates/wasmi/tests/spec/run.rs index 27628beeb4..658fd38499 100644 --- a/crates/wasmi/tests/spec/run.rs +++ b/crates/wasmi/tests/spec/run.rs @@ -16,8 +16,26 @@ use wast::{ Wat, }; +/// The configuation for the test runner. +#[derive(Debug, Copy, Clone)] +pub struct RunnerConfig { + /// The Wasmi configuration used for all tests. + pub config: Config, + /// The parsing mode that is used. + pub mode: ParsingMode, +} + +/// The mode in which Wasm is parsed. +#[derive(Debug, Copy, Clone)] +pub enum ParsingMode { + /// The test runner shall use buffered Wasm compilation. + Buffered, + /// The test runner shall use streaming Wasm compilation. + Streaming, +} + /// Runs the Wasm test spec identified by the given name. -pub fn run_wasm_spec_test(name: &'static str, file: &'static str, config: Config) { +pub fn run_wasm_spec_test(name: &'static str, file: &'static str, config: RunnerConfig) { let test = TestDescriptor::new(name, file); let mut context = TestContext::new(&test, config); diff --git a/fuzz/fuzz_targets/translate.rs b/fuzz/fuzz_targets/translate.rs index 238d456aa7..aa722db5ce 100644 --- a/fuzz/fuzz_targets/translate.rs +++ b/fuzz/fuzz_targets/translate.rs @@ -5,5 +5,5 @@ use wasmi::{Engine, Module}; fuzz_target!(|data: wasm_smith::Module| { let wasm = data.to_bytes(); let engine = Engine::default(); - Module::new(&engine, &mut &wasm[..]).unwrap(); + Module::new(&engine, &wasm[..]).unwrap(); }); diff --git a/fuzz/fuzz_targets/translate_metered.rs b/fuzz/fuzz_targets/translate_metered.rs index a663748745..b7a9df3900 100644 --- a/fuzz/fuzz_targets/translate_metered.rs +++ b/fuzz/fuzz_targets/translate_metered.rs @@ -7,5 +7,5 @@ fuzz_target!(|data: wasm_smith::Module| { let mut config = Config::default(); config.consume_fuel(true); let engine = Engine::new(&config); - Module::new(&engine, &mut &wasm[..]).unwrap(); + Module::new(&engine, &wasm[..]).unwrap(); });