From 9c343b3a5e926274c84415c21bacf26daef4b1e6 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Mon, 19 Dec 2022 15:14:02 -0800 Subject: [PATCH] wiggle: copy guest strings from shared memory Along the same lines as #5471, this change adds a new smart pointer, `GuestStrCow`, to copy the string bytes over from Wasm memory to the host when the string is found in shared memory. This is necessary to maintain Rust guarantees: with shared memory, the bytes backing a `GuestStr` could be altered by another thread and this would invalidate the assumption that we can dereference at any point to `&str`. `GuestStrCow` is essentially a wrapper around `GuestStr` when the memory is not shared but copies the memory region into a `String` when the memory is shared. --- crates/wasi-common/src/snapshots/preview_1.rs | 24 ++--- crates/wiggle/src/lib.rs | 93 ++++++++++++++----- 2 files changed, 78 insertions(+), 39 deletions(-) diff --git a/crates/wasi-common/src/snapshots/preview_1.rs b/crates/wasi-common/src/snapshots/preview_1.rs index 578a488e37b6..6e4d04a7fec7 100644 --- a/crates/wasi-common/src/snapshots/preview_1.rs +++ b/crates/wasi-common/src/snapshots/preview_1.rs @@ -663,7 +663,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { self.table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::CREATE_DIRECTORY)? - .create_dir(path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref()) + .create_dir(&path.as_cow()?) .await } @@ -678,7 +678,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_dir(u32::from(dirfd))? .get_cap(DirCaps::PATH_FILESTAT_GET)? .get_path_filestat( - path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref(), + &path.as_cow()?, flags.contains(types::Lookupflags::SYMLINK_FOLLOW), ) .await?; @@ -705,7 +705,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_dir(u32::from(dirfd))? .get_cap(DirCaps::PATH_FILESTAT_SET_TIMES)? .set_times( - path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref(), + &path.as_cow()?, atim, mtim, flags.contains(types::Lookupflags::SYMLINK_FOLLOW), @@ -736,9 +736,9 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { src_dir .hard_link( - src_path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref(), + &src_path.as_cow()?, target_dir.deref(), - target_path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref(), + &target_path.as_cow()?, ) .await } @@ -764,7 +764,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { let oflags = OFlags::from(&oflags); let fdflags = FdFlags::from(fdflags); - let path = path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)"); + let path = path.as_cow()?; if oflags.contains(OFlags::DIRECTORY) { if oflags.contains(OFlags::CREATE) || oflags.contains(OFlags::EXCLUSIVE) @@ -813,7 +813,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::READLINK)? - .read_link(path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref()) + .read_link(&path.as_cow()?) .await? .into_os_string() .into_string() @@ -835,7 +835,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { self.table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::REMOVE_DIRECTORY)? - .remove_dir(path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref()) + .remove_dir(&path.as_cow()?) .await } @@ -854,11 +854,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_dir(u32::from(dest_fd))? .get_cap(DirCaps::RENAME_TARGET)?; src_dir - .rename( - src_path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref(), - dest_dir.deref(), - dest_path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref(), - ) + .rename(&src_path.as_cow()?, dest_dir.deref(), &dest_path.as_cow()?) .await } @@ -871,7 +867,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { self.table() .get_dir(u32::from(dirfd))? .get_cap(DirCaps::SYMLINK)? - .symlink(src_path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref(), dest_path.as_str()?.expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)").deref()) + .symlink(&src_path.as_cow()?, &dest_path.as_cow()?) .await } diff --git a/crates/wiggle/src/lib.rs b/crates/wiggle/src/lib.rs index 43b48f28eb57..813e7534218a 100644 --- a/crates/wiggle/src/lib.rs +++ b/crates/wiggle/src/lib.rs @@ -703,7 +703,8 @@ impl<'a> GuestPtr<'a, str> { /// `GuestError` will be returned. /// /// Additionally, because it is `unsafe` to have a `GuestStr` of shared - /// memory, this function will return `None` in this case. + /// memory, this function will return `None` in this case (see + /// [`GuestPtr<'_, str>::as_cow`]). pub fn as_str(&self) -> Result>, GuestError> { match self.as_bytes().as_unsafe_slice_mut()?.shared_borrow() { UnsafeBorrowResult::Ok(s) => Ok(Some(s.try_into()?)), @@ -731,6 +732,24 @@ impl<'a> GuestPtr<'a, str> { UnsafeBorrowResult::Err(e) => Err(e), } } + + /// Attempts to create a [`GuestStrCow<'_>`] from this pointer, performing + /// bounds checks and utf-8 checks. Whereas [`GuestPtr::as_str`] will fail + /// with `None` if attempting to access Wasm shared memory, this call will + /// succeed: if used on shared memory, this function will copy the string + /// into [`GuestStrCow::Copied`]. If the memory is non-shared, this returns + /// a [`GuestStrCow::Borrowed`] (a thin wrapper over [`GuestStr<'_, T>]`). + pub fn as_cow(&self) -> Result, GuestError> { + match self.as_bytes().as_unsafe_slice_mut()?.shared_borrow() { + UnsafeBorrowResult::Ok(s) => Ok(GuestStrCow::Borrowed(GuestStr(s))), + UnsafeBorrowResult::Shared(_) => { + let copied = self.as_bytes().to_vec()?; + let utf8_string = String::from_utf8(copied).map_err(|e| e.utf8_error())?; + Ok(GuestStrCow::Copied(utf8_string)) + } + UnsafeBorrowResult::Err(e) => Err(e), + } + } } impl<'a> GuestPtr<'a, [u8]> { @@ -755,30 +774,6 @@ impl fmt::Debug for GuestPtr<'_, T> { } } -/// A smart pointer for distinguishing between different kinds of Wasm memory: -/// shared and non-shared. -/// -/// As with `GuestSlice`, this is usable as a `&'a [T]` via [`std::ops::Deref`]. -/// The major difference is that, for shared memories, the memory will be copied -/// out of Wasm linear memory to avoid the possibility of concurrent mutation by -/// another thread. This extra copy exists solely to maintain the Rust -/// guarantees regarding `&[T]`. -pub enum GuestCow<'a, T> { - Borrowed(GuestSlice<'a, T>), - Copied(Vec), -} - -impl<'a, T> std::ops::Deref for GuestCow<'a, T> { - type Target = [T]; - - fn deref(&self) -> &Self::Target { - match self { - GuestCow::Borrowed(s) => s, - GuestCow::Copied(s) => s, - } - } -} - /// A smart pointer to an shareable slice in guest memory. /// /// Usable as a `&'a [T]` via [`std::ops::Deref`]. @@ -848,6 +843,30 @@ impl<'a, T> Drop for GuestSliceMut<'a, T> { } } +/// A smart pointer for distinguishing between different kinds of Wasm memory: +/// shared and non-shared. +/// +/// As with `GuestSlice`, this is usable as a `&'a [T]` via [`std::ops::Deref`]. +/// The major difference is that, for shared memories, the memory will be copied +/// out of Wasm linear memory to avoid the possibility of concurrent mutation by +/// another thread. This extra copy exists solely to maintain the Rust +/// guarantees regarding `&[T]`. +pub enum GuestCow<'a, T> { + Borrowed(GuestSlice<'a, T>), + Copied(Vec), +} + +impl<'a, T> std::ops::Deref for GuestCow<'a, T> { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + match self { + GuestCow::Borrowed(s) => s, + GuestCow::Copied(s) => s, + } + } +} + /// A smart pointer to an `unsafe` slice in guest memory. /// /// Accessing guest memory (e.g., WebAssembly linear memory) is inherently @@ -1059,6 +1078,30 @@ impl<'a> std::ops::DerefMut for GuestStrMut<'a> { } } +/// A smart pointer to a `str` for distinguishing between different kinds of +/// Wasm memory: shared and non-shared. +/// +/// As with `GuestStr`, this is usable as a `&'a str` via [`std::ops::Deref`]. +/// The major difference is that, for shared memories, the string will be copied +/// out of Wasm linear memory to avoid the possibility of concurrent mutation by +/// another thread. This extra copy exists solely to maintain the Rust +/// guarantees regarding `&str`. +pub enum GuestStrCow<'a> { + Borrowed(GuestStr<'a>), + Copied(String), +} + +impl<'a> std::ops::Deref for GuestStrCow<'a> { + type Target = str; + + fn deref(&self) -> &Self::Target { + match self { + GuestStrCow::Borrowed(s) => s, + GuestStrCow::Copied(s) => s, + } + } +} + mod private { pub trait Sealed {} impl Sealed for T {}