diff --git a/crates/wasi-common/src/hostcalls_impl/fs.rs b/crates/wasi-common/src/hostcalls_impl/fs.rs index ca9b52e21af6..0d910a34bf4e 100644 --- a/crates/wasi-common/src/hostcalls_impl/fs.rs +++ b/crates/wasi-common/src/hostcalls_impl/fs.rs @@ -578,41 +578,6 @@ pub(crate) unsafe fn path_open( enc_fd_byref(memory, fd_out_ptr, guest_fd) } -pub(crate) unsafe fn fd_readdir( - wasi_ctx: &mut WasiCtx, - memory: &mut [u8], - fd: wasi::__wasi_fd_t, - buf: wasi32::uintptr_t, - buf_len: wasi32::size_t, - cookie: wasi::__wasi_dircookie_t, - buf_used: wasi32::uintptr_t, -) -> Result<()> { - trace!( - "fd_readdir(fd={:?}, buf={:#x?}, buf_len={}, cookie={:#x?}, buf_used={:#x?})", - fd, - buf, - buf_len, - cookie, - buf_used, - ); - - enc_usize_byref(memory, buf_used, 0)?; - - let file = wasi_ctx - .get_fd_entry_mut(fd)? - .as_descriptor_mut(wasi::__WASI_RIGHTS_FD_READDIR, 0)? - .as_file_mut()?; - let host_buf = dec_slice_of_mut_u8(memory, buf, buf_len)?; - - trace!(" | (buf,buf_len)={:?}", host_buf); - - let host_bufused = hostcalls_impl::fd_readdir(file, host_buf, cookie)?; - - trace!(" | *buf_used={:?}", host_bufused); - - enc_usize_byref(memory, buf_used, host_bufused) -} - pub(crate) unsafe fn path_readlink( wasi_ctx: &WasiCtx, memory: &mut [u8], @@ -1032,6 +997,53 @@ pub(crate) unsafe fn fd_prestat_dir_name( enc_slice_of_u8(memory, path.as_bytes(), path_ptr) } +pub(crate) unsafe fn fd_readdir( + wasi_ctx: &mut WasiCtx, + memory: &mut [u8], + fd: wasi::__wasi_fd_t, + buf: wasi32::uintptr_t, + buf_len: wasi32::size_t, + cookie: wasi::__wasi_dircookie_t, + buf_used: wasi32::uintptr_t, +) -> Result<()> { + trace!( + "fd_readdir(fd={:?}, buf={:#x?}, buf_len={}, cookie={:#x?}, buf_used={:#x?})", + fd, + buf, + buf_len, + cookie, + buf_used, + ); + + enc_usize_byref(memory, buf_used, 0)?; + + let file = wasi_ctx + .get_fd_entry_mut(fd)? + .as_descriptor_mut(wasi::__WASI_RIGHTS_FD_READDIR, 0)? + .as_file_mut()?; + let mut host_buf = dec_slice_of_mut_u8(memory, buf, buf_len)?; + + trace!(" | (buf,buf_len)={:?}", host_buf); + + let iter = hostcalls_impl::fd_readdir(file, cookie)?; + let mut host_bufused = 0; + for dirent in iter { + let dirent_raw = dirent?.to_wasi_raw()?; + let offset = dirent_raw.len(); + if host_buf.len() < offset { + break; + } else { + host_buf[0..offset].copy_from_slice(&dirent_raw); + host_bufused += offset; + host_buf = &mut host_buf[offset..]; + } + } + + trace!(" | *buf_used={:?}", host_bufused); + + enc_usize_byref(memory, buf_used, host_bufused) +} + #[allow(dead_code)] // trouble with sockets #[derive(Clone, Copy, Debug)] #[repr(u8)] @@ -1061,7 +1073,6 @@ pub(crate) struct Dirent { } impl Dirent { - #![allow(unused)] // temporarily, until BSD catches up with this change /// Serialize the directory entry to the format define by `__wasi_fd_readdir`, /// so that the serialized entries can be concatenated by the implementation. pub fn to_wasi_raw(&self) -> Result> { diff --git a/crates/wasi-common/src/sys/unix/bsd/hostcalls_impl.rs b/crates/wasi-common/src/sys/unix/bsd/hostcalls_impl.rs index 96960bf122ed..40cfcb480998 100644 --- a/crates/wasi-common/src/sys/unix/bsd/hostcalls_impl.rs +++ b/crates/wasi-common/src/sys/unix/bsd/hostcalls_impl.rs @@ -1,12 +1,14 @@ +use super::super::dir::{Dir, Entry, SeekLoc}; use super::osfile::OsFile; -use crate::hostcalls_impl::PathGet; +use crate::hostcalls_impl::{Dirent, PathGet}; use crate::sys::host_impl; use crate::sys::unix::str_to_cstring; use crate::{wasi, Error, Result}; -use nix::libc::{self, c_long, c_void}; +use nix::libc; use std::convert::TryInto; use std::fs::File; use std::os::unix::prelude::AsRawFd; +use std::sync::MutexGuard; pub(crate) fn path_unlink_file(resolved: PathGet) -> Result<()> { use nix::errno; @@ -139,101 +141,6 @@ pub(crate) fn path_rename(resolved_old: PathGet, resolved_new: PathGet) -> Resul } } -pub(crate) fn fd_readdir( - os_file: &mut OsFile, - host_buf: &mut [u8], - cookie: wasi::__wasi_dircookie_t, -) -> Result { - use crate::sys::unix::bsd::osfile::DirStream; - use libc::{fdopendir, readdir, rewinddir, seekdir, telldir}; - use nix::errno::Errno; - use std::ffi::CStr; - use std::mem::ManuallyDrop; - use std::sync::Mutex; - - let dir_stream = match os_file.dir_stream { - Some(ref mut dir_stream) => dir_stream, - None => { - let file = os_file.file.try_clone()?; - let dir_ptr = unsafe { fdopendir(file.as_raw_fd()) }; - os_file.dir_stream.get_or_insert(Mutex::new(DirStream { - file: ManuallyDrop::new(file), - dir_ptr, - })) - } - }; - let dir_stream = dir_stream.lock().unwrap(); - - let host_buf_ptr = host_buf.as_mut_ptr(); - let host_buf_len = host_buf.len(); - - if cookie != wasi::__WASI_DIRCOOKIE_START { - unsafe { seekdir(dir_stream.dir_ptr, cookie as c_long) }; - } else { - unsafe { rewinddir(dir_stream.dir_ptr) }; - } - - let mut left = host_buf_len; - let mut host_buf_offset: usize = 0; - - loop { - let errno = Errno::last(); - let host_entry_ptr = unsafe { readdir(dir_stream.dir_ptr) }; - if host_entry_ptr.is_null() { - if errno != Errno::last() { - // TODO Is this correct? - // According to POSIX man (for Linux though!), there was an error - // if the errno value has changed at some point during the sequence - // of readdir calls - return Err(host_impl::errno_from_nix(Errno::last())); - } else { - // Not an error - break; - } - } - - let host_entry = unsafe { *host_entry_ptr }; - let mut wasi_entry: wasi::__wasi_dirent_t = host_impl::dirent_from_host(&host_entry)?; - // Set d_next manually: - // * on macOS d_seekoff is not set for some reason - // * on FreeBSD d_seekoff doesn't exist; there is d_off but it is - // not equivalent to the value read from telldir call - wasi_entry.d_next = unsafe { telldir(dir_stream.dir_ptr) } as wasi::__wasi_dircookie_t; - - log::debug!("fd_readdir host_entry = {:?}", host_entry); - log::debug!("fd_readdir wasi_entry = {:?}", wasi_entry); - - let name_len = host_entry.d_namlen.try_into()?; - let required_space = std::mem::size_of_val(&wasi_entry) + name_len; - - if required_space > left { - break; - } - - let name = unsafe { CStr::from_ptr(host_entry.d_name.as_ptr()) }.to_str()?; - log::debug!("fd_readdir entry name = {}", name); - - unsafe { - let ptr = host_buf_ptr.offset(host_buf_offset.try_into()?) as *mut c_void - as *mut wasi::__wasi_dirent_t; - *ptr = wasi_entry; - } - host_buf_offset += std::mem::size_of_val(&wasi_entry); - - unsafe { - std::ptr::copy_nonoverlapping( - name.as_ptr(), - host_buf_ptr.offset(host_buf_offset.try_into()?), - name_len, - ) - }; - host_buf_offset += name_len; - left -= required_space; - } - - Ok(host_buf_len - left) -} - #[cfg(any(target_os = "macos", target_os = "ios"))] pub(crate) fn fd_advise( file: &File, @@ -296,3 +203,88 @@ pub(crate) fn fd_advise( Ok(()) } + +pub(crate) fn fd_readdir<'a>( + os_file: &'a mut OsFile, + cookie: wasi::__wasi_dircookie_t, +) -> Result> + 'a> { + use std::sync::Mutex; + + let dir = match os_file.dir { + Some(ref mut dir) => dir, + None => { + // We need to duplicate the fd, because `opendir(3)`: + // Upon successful return from fdopendir(), the file descriptor is under + // control of the system, and if any attempt is made to close the file + // descriptor, or to modify the state of the associated description other + // than by means of closedir(), readdir(), readdir_r(), or rewinddir(), + // the behaviour is undefined. + let fd = (*os_file).try_clone()?; + let dir = Dir::from(fd)?; + os_file.dir.get_or_insert(Mutex::new(dir)) + } + }; + let mut dir = dir.lock().unwrap(); + + // Seek if needed. Unless cookie is wasi::__WASI_DIRCOOKIE_START, + // new items may not be returned to the caller. + if cookie == wasi::__WASI_DIRCOOKIE_START { + log::trace!(" | fd_readdir: doing rewinddir"); + dir.rewind(); + } else { + log::trace!(" | fd_readdir: doing seekdir to {}", cookie); + let loc = unsafe { SeekLoc::from_raw(cookie as i64) }; + dir.seek(loc); + } + + Ok(DirIter(dir).map(|entry| { + let (entry, loc): (Entry, SeekLoc) = entry?; + Ok(Dirent { + name: entry + // TODO can we reuse path_from_host for CStr? + .file_name() + .to_str()? + .to_owned(), + ino: entry.ino(), + ftype: entry.file_type().into(), + // Set cookie manually: + // * on macOS d_seekoff is not set for some reason + // * on FreeBSD d_seekoff doesn't exist; there is d_off but it is + // not equivalent to the value read from telldir call + cookie: loc.to_raw().try_into()?, + }) + })) +} + +struct DirIter<'a>(MutexGuard<'a, Dir>); + +impl<'a> Iterator for DirIter<'a> { + type Item = nix::Result<(Entry, SeekLoc)>; + + fn next(&mut self) -> Option { + use libc::readdir; + use nix::{errno::Errno, Error}; + + unsafe { + let errno = Errno::last(); + let ent = readdir((self.0).0.as_ptr()); + if ent.is_null() { + if errno != Errno::last() { + // TODO This should be verified on different BSD-flavours. + // + // According to 4.3BSD/POSIX.1-2001 man pages, there was an error + // if the errno value has changed at some point during the sequence + // of readdir calls. + Some(Err(Error::last())) + } else { + // Not an error. We've simply reached the end of the stream. + None + } + } else { + let entry = Entry(*ent); + let loc = self.0.tell(); + Some(Ok((entry, loc))) + } + } + } +} diff --git a/crates/wasi-common/src/sys/unix/bsd/mod.rs b/crates/wasi-common/src/sys/unix/bsd/mod.rs index 4fea6c835f4b..66bc2652b5ab 100644 --- a/crates/wasi-common/src/sys/unix/bsd/mod.rs +++ b/crates/wasi-common/src/sys/unix/bsd/mod.rs @@ -22,20 +22,5 @@ pub(crate) mod fdentry_impl { } pub(crate) mod host_impl { - use super::super::host_impl::dirent_filetype_from_host; - use crate::{wasi, Result}; - pub(crate) const O_RSYNC: nix::fcntl::OFlag = nix::fcntl::OFlag::O_SYNC; - - pub(crate) fn dirent_from_host( - host_entry: &nix::libc::dirent, - ) -> Result { - let mut entry = unsafe { std::mem::zeroed::() }; - let d_type = dirent_filetype_from_host(host_entry)?; - entry.d_ino = host_entry.d_ino; - entry.d_next = host_entry.d_seekoff; - entry.d_namlen = u32::from(host_entry.d_namlen); - entry.d_type = d_type; - Ok(entry) - } } diff --git a/crates/wasi-common/src/sys/unix/bsd/osfile.rs b/crates/wasi-common/src/sys/unix/bsd/osfile.rs index 41340149aa4e..379dcfdcedd4 100644 --- a/crates/wasi-common/src/sys/unix/bsd/osfile.rs +++ b/crates/wasi-common/src/sys/unix/bsd/osfile.rs @@ -1,33 +1,29 @@ +use super::super::dir::Dir; use std::fs; -use std::mem::ManuallyDrop; use std::ops::{Deref, DerefMut}; use std::os::unix::prelude::{AsRawFd, RawFd}; use std::sync::Mutex; -#[derive(Debug)] -pub(crate) struct DirStream { - pub(crate) file: ManuallyDrop, - pub(crate) dir_ptr: *mut libc::DIR, -} - -impl Drop for DirStream { - fn drop(&mut self) { - unsafe { libc::closedir(self.dir_ptr) }; - } -} - #[derive(Debug)] pub(crate) struct OsFile { pub(crate) file: fs::File, - pub(crate) dir_stream: Option>, + // In case that this `OsFile` actually refers to a directory, + // when the client makes a `fd_readdir` syscall on this descriptor, + // we will need to cache the `libc::DIR` pointer manually in order + // to be able to seek on it later. While on Linux, this is handled + // by the OS, BSD Unixes require the client to do this caching. + // + // This comes directly from the BSD man pages on `readdir`: + // > Values returned by telldir() are good only for the lifetime + // > of the DIR pointer, dirp, from which they are derived. + // > If the directory is closed and then reopened, prior values + // > returned by telldir() will no longer be valid. + pub(crate) dir: Option>, } impl From for OsFile { fn from(file: fs::File) -> Self { - Self { - file, - dir_stream: None, - } + Self { file, dir: None } } } diff --git a/crates/wasi-common/src/sys/unix/dir.rs b/crates/wasi-common/src/sys/unix/dir.rs index 20b75f968dcf..e3612dea1cf7 100644 --- a/crates/wasi-common/src/sys/unix/dir.rs +++ b/crates/wasi-common/src/sys/unix/dir.rs @@ -1,16 +1,15 @@ // Based on src/dir.rs from nix -#![allow(unused)] // temporarily, until BSD catches up with this change use crate::hostcalls_impl::FileType; use libc; -use nix::{errno::Errno, Error, Result}; +use nix::{Error, Result}; use std::os::unix::io::{AsRawFd, IntoRawFd, RawFd}; use std::{ffi, ptr}; #[cfg(target_os = "linux")] -use libc::{dirent64 as dirent, readdir64_r as readdir_r}; +use libc::dirent64 as dirent; -#[cfg(not(target_os = "linux"))] -use libc::{dirent, readdir_r}; +#[cfg(not(target_os = "linux",))] +use libc::dirent; /// An open directory. /// @@ -26,7 +25,7 @@ use libc::{dirent, readdir_r}; /// * returns entries' names as a `CStr` (no allocation or conversion beyond whatever libc /// does). #[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub(crate) struct Dir(ptr::NonNull); +pub(crate) struct Dir(pub(crate) ptr::NonNull); impl Dir { /// Converts from a descriptor-based object, closing the descriptor on success or failure. @@ -90,53 +89,12 @@ impl Drop for Dir { } } -pub(crate) struct IntoIter(Dir); -impl Iterator for IntoIter { - type Item = Result; - fn next(&mut self) -> Option { - unsafe { - // Note: POSIX specifies that portable applications should dynamically allocate a - // buffer with room for a `d_name` field of size `pathconf(..., _PC_NAME_MAX)` plus 1 - // for the NUL byte. It doesn't look like the std library does this; it just uses - // fixed-sized buffers (and libc's dirent seems to be sized so this is appropriate). - // Probably fine here too then. - // - // See `impl Iterator for ReadDir` [1] for more details. - // [1] https://github.com/rust-lang/rust/blob/master/src/libstd/sys/unix/fs.rs - let mut ent = std::mem::MaybeUninit::::uninit(); - let mut result = ptr::null_mut(); - if let Err(e) = Errno::result(readdir_r( - (self.0).0.as_ptr(), - ent.as_mut_ptr(), - &mut result, - )) { - return Some(Err(e)); - } - if result.is_null() { - None - } else { - assert_eq!(result, ent.as_mut_ptr(), "readdir_r specification violated"); - Some(Ok(Entry(ent.assume_init()))) - } - } - } -} - -impl IntoIterator for Dir { - type IntoIter = IntoIter; - type Item = Result; - - fn into_iter(self) -> IntoIter { - IntoIter(self) - } -} - /// A directory entry, similar to `std::fs::DirEntry`. /// /// Note that unlike the std version, this may represent the `.` or `..` entries. #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] #[repr(transparent)] -pub(crate) struct Entry(dirent); +pub(crate) struct Entry(pub(crate) dirent); pub(crate) type Type = FileType; diff --git a/crates/wasi-common/src/sys/unix/linux/hostcalls_impl.rs b/crates/wasi-common/src/sys/unix/linux/hostcalls_impl.rs index fd267738d43b..2279a15ae20a 100644 --- a/crates/wasi-common/src/sys/unix/linux/hostcalls_impl.rs +++ b/crates/wasi-common/src/sys/unix/linux/hostcalls_impl.rs @@ -1,5 +1,4 @@ use super::super::dir::{Dir, Entry, SeekLoc}; -use super::osfile::OsFile; use crate::hostcalls_impl::{Dirent, PathGet}; use crate::sys::host_impl; use crate::sys::unix::str_to_cstring; @@ -67,7 +66,7 @@ pub(crate) fn path_rename(resolved_old: PathGet, resolved_new: PathGet) -> Resul } } -pub(crate) fn fd_readdir_impl( +pub(crate) fn fd_readdir( fd: &File, cookie: wasi::__wasi_dircookie_t, ) -> Result>> { @@ -98,7 +97,7 @@ pub(crate) fn fd_readdir_impl( dir.seek(loc); } - Ok(dir.into_iter().map(|entry| { + Ok(DirIter(dir).map(|entry| { let entry: Entry = entry?; Ok(Dirent { name: entry // TODO can we reuse path_from_host for CStr? @@ -112,29 +111,41 @@ pub(crate) fn fd_readdir_impl( })) } -// This should actually be common code with Windows, -// but there's BSD stuff remaining -pub(crate) fn fd_readdir( - os_file: &mut OsFile, - mut host_buf: &mut [u8], - cookie: wasi::__wasi_dircookie_t, -) -> Result { - let iter = fd_readdir_impl(os_file, cookie)?; - let mut used = 0; - for dirent in iter { - let dirent_raw = dirent?.to_wasi_raw()?; - let offset = dirent_raw.len(); - if host_buf.len() < offset { - break; - } else { - host_buf[0..offset].copy_from_slice(&dirent_raw); - used += offset; - host_buf = &mut host_buf[offset..]; +struct DirIter(Dir); + +impl Iterator for DirIter { + type Item = nix::Result; + + fn next(&mut self) -> Option { + use libc::{dirent64, readdir64_r}; + use nix::errno::Errno; + + unsafe { + // Note: POSIX specifies that portable applications should dynamically allocate a + // buffer with room for a `d_name` field of size `pathconf(..., _PC_NAME_MAX)` plus 1 + // for the NUL byte. It doesn't look like the std library does this; it just uses + // fixed-sized buffers (and libc's dirent seems to be sized so this is appropriate). + // Probably fine here too then. + // + // See `impl Iterator for ReadDir` [1] for more details. + // [1] https://github.com/rust-lang/rust/blob/master/src/libstd/sys/unix/fs.rs + let mut ent = std::mem::MaybeUninit::::uninit(); + let mut result = std::ptr::null_mut(); + if let Err(e) = Errno::result(readdir64_r( + (self.0).0.as_ptr(), + ent.as_mut_ptr(), + &mut result, + )) { + return Some(Err(e)); + } + if result.is_null() { + None + } else { + assert_eq!(result, ent.as_mut_ptr(), "readdir_r specification violated"); + Some(Ok(Entry(ent.assume_init()))) + } } } - - trace!(" | *buf_used={:?}", used); - Ok(used) } pub(crate) fn fd_advise( diff --git a/crates/wasi-common/src/sys/windows/hostcalls_impl/fs.rs b/crates/wasi-common/src/sys/windows/hostcalls_impl/fs.rs index 0279a257cf7d..561a50befcf2 100644 --- a/crates/wasi-common/src/sys/windows/hostcalls_impl/fs.rs +++ b/crates/wasi-common/src/sys/windows/hostcalls_impl/fs.rs @@ -218,7 +218,7 @@ fn dirent_from_path>( // . gets cookie = 1 // .. gets cookie = 2 // other entries, in order they were returned by FindNextFileW get subsequent integers as their cookies -pub(crate) fn fd_readdir_impl( +pub(crate) fn fd_readdir( fd: &File, cookie: wasi::__wasi_dircookie_t, ) -> Result>> { @@ -257,30 +257,6 @@ pub(crate) fn fd_readdir_impl( Ok(iter.skip(cookie)) } -// This should actually be common code with Linux -pub(crate) fn fd_readdir( - os_file: &mut OsFile, - mut host_buf: &mut [u8], - cookie: wasi::__wasi_dircookie_t, -) -> Result { - let iter = fd_readdir_impl(os_file, cookie)?; - let mut used = 0; - for dirent in iter { - let dirent_raw = dirent?.to_wasi_raw()?; - let offset = dirent_raw.len(); - if host_buf.len() < offset { - break; - } else { - host_buf[0..offset].copy_from_slice(&dirent_raw); - used += offset; - host_buf = &mut host_buf[offset..]; - } - } - - trace!(" | *buf_used={:?}", used); - Ok(used) -} - pub(crate) fn path_readlink(resolved: PathGet, buf: &mut [u8]) -> Result { use winx::file::get_file_path;