Skip to content

Commit

Permalink
feat: WalkDirGeneric::try_into_iter() for early error handling.
Browse files Browse the repository at this point in the history
If we can't instantiate the iterator due to a busy thread-pool,
we can now abort early instead of yielding a fake-entry just to
show an error occurred. This is the preferred way to instantiate
a  `jwalk` iterator.
  • Loading branch information
Byron committed Dec 15, 2022
1 parent 3bf1bc2 commit 7d5b8b8
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 23 deletions.
14 changes: 2 additions & 12 deletions src/core/dir_entry_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::Result;
pub struct DirEntryIter<C: ClientState> {
min_depth: usize,
// iterator yielding next ReadDir results when needed
read_dir_iter: Option<Peekable<ReadDirIter<C>>>,
pub(crate) read_dir_iter: Option<Peekable<ReadDirIter<C>>>,
// stack of ReadDir results, track location in filesystem traversal
read_dir_results_stack: Vec<vec::IntoIter<Result<DirEntry<C>>>>,
}
Expand Down Expand Up @@ -87,17 +87,7 @@ impl<C: ClientState> Iterator for DirEntryIter<C> {
// 2.2 If dir_entry has a read_children_path means we need to read a new
// directory and push those results onto read_dir_results_stack
if dir_entry.read_children_path.is_some() {
let iter = match self.read_dir_iter
.as_mut()
.ok_or_else(|| {
Error::from_io(
0,
std::io::Error::new(
std::io::ErrorKind::Other,
"rayon thread-pool too busy or dependency loop detected - aborting before possibility of deadlock",
),
)
}) {
let iter = match self.read_dir_iter.as_mut().ok_or_else(|| Error::busy()) {
Ok(iter) => iter,
Err(err) => return Some(Err(err)),
};
Expand Down
27 changes: 24 additions & 3 deletions src/core/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ enum ErrorInner {
ancestor: PathBuf,
child: PathBuf,
},
ThreadpoolBusy,
}

impl Error {
Expand All @@ -51,6 +52,7 @@ impl Error {
/// [`std::fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
pub fn path(&self) -> Option<&Path> {
match self.inner {
ErrorInner::ThreadpoolBusy => None,
ErrorInner::Io { path: None, .. } => None,
ErrorInner::Io {
path: Some(ref path),
Expand Down Expand Up @@ -152,10 +154,17 @@ impl Error {
pub fn io_error(&self) -> Option<&io::Error> {
match self.inner {
ErrorInner::Io { ref err, .. } => Some(err),
ErrorInner::Loop { .. } => None,
_ => None,
}
}

/// Returns true if this error is due to a busy thread-pool that prevented its effective use.
///
/// Note that business detection is timeout based, and we don't know if it would have been a deadlock or not.
pub fn is_busy(&self) -> bool {
matches!(self.inner, ErrorInner::ThreadpoolBusy)
}

/// Similar to [`io_error`] except consumes self to convert to the original
/// [`io::Error`] if one exists.
///
Expand All @@ -164,10 +173,16 @@ impl Error {
pub fn into_io_error(self) -> Option<io::Error> {
match self.inner {
ErrorInner::Io { err, .. } => Some(err),
ErrorInner::Loop { .. } => None,
_ => None,
}
}

pub(crate) fn busy() -> Self {
Error {
depth: 0,
inner: ErrorInner::ThreadpoolBusy,
}
}
pub(crate) fn from_path(depth: usize, pb: PathBuf, err: io::Error) -> Self {
Error {
depth,
Expand Down Expand Up @@ -210,7 +225,7 @@ impl error::Error for Error {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self.inner {
ErrorInner::Io { ref err, .. } => Some(err),
ErrorInner::Loop { .. } => None,
ErrorInner::Loop { .. } | ErrorInner::ThreadpoolBusy => None,
}
}

Expand All @@ -219,6 +234,7 @@ impl error::Error for Error {
match self.inner {
ErrorInner::Io { ref err, .. } => err.description(),
ErrorInner::Loop { .. } => "file system loop found",
ErrorInner::ThreadpoolBusy => "thread-pool busy",
}
}

Expand All @@ -230,6 +246,7 @@ impl error::Error for Error {
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.inner {
ErrorInner::ThreadpoolBusy => f.write_str("rayon thread-pool too busy or dependency loop detected - aborting before possibility of deadlock"),
ErrorInner::Io {
path: None,
ref err,
Expand Down Expand Up @@ -274,6 +291,10 @@ impl From<Error> for io::Error {
inner: ErrorInner::Loop { .. },
..
} => io::ErrorKind::Other,
Error {
inner: ErrorInner::ThreadpoolBusy,
..
} => io::ErrorKind::Other,
};
io::Error::new(kind, walk_err)
}
Expand Down
14 changes: 14 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ impl<C: ClientState> WalkDirGeneric<C> {
/// path root. If root is a directory, then it is the first item yielded by
/// the iterator. If root is a file, then it is the first and only item
/// yielded by the iterator.
///
/// Note that his iterator can fail on the first element if `into_iter()` is used as it
/// has to be infallible. Use [`try_into_iter()`][WalkDirGeneric::try_into_iter()]
/// instead for error handling.
pub fn new<P: AsRef<Path>>(root: P) -> Self {
WalkDirGeneric {
root: root.as_ref().to_path_buf(),
Expand All @@ -230,6 +234,16 @@ impl<C: ClientState> WalkDirGeneric<C> {
}
}

/// Try to create an iterator or fail if the rayon threadpool (in any configuration) is busy.
pub fn try_into_iter(self) -> Result<DirEntryIter<C>> {
let iter = self.into_iter();
if iter.read_dir_iter.is_none() {
Err(Error::busy())
} else {
Ok(iter)
}
}

/// Root path of the walk.
pub fn root(&self) -> &Path {
&self.root
Expand Down
20 changes: 12 additions & 8 deletions tests/detect_deadlock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,20 @@ fn works() {
(0..=1)
.collect::<Vec<usize>>()
.par_iter()
.for_each(|_round| {
for entry in WalkDir::new(".").parallelism(jwalk::Parallelism::RayonDefaultPool {
.for_each(|round| {
let generic = WalkDir::new(".").parallelism(jwalk::Parallelism::RayonDefaultPool {
busy_timeout: std::time::Duration::from_millis(10),
}) {
match entry {
Ok(_) => panic!("Must detect deadlock"),
Err(err)
if err.io_error().expect("is IO").kind() == std::io::ErrorKind::Other => {}
Err(err) => panic!("Unexpected error: {:?}", err),
});
if *round == 0 {
for entry in generic {
match entry {
Ok(_) => panic!("Must detect deadlock"),
Err(err) if err.is_busy() => {}
Err(err) => panic!("Unexpected error: {:?}", err),
}
}
} else {
assert!(matches!(generic.try_into_iter(), Err(err) if err.is_busy()));
}
});
}

0 comments on commit 7d5b8b8

Please sign in to comment.