From 7d5b8b870bfca2b1b68de1427fbdc0ec1a1bff2b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 15 Dec 2022 11:35:37 +0100 Subject: [PATCH] feat: `WalkDirGeneric::try_into_iter()` for early error handling. If we can't instantiate the iterator due to a busy thread-pool, we can now abort early instead of yielding a fake-entry just to show an error occurred. This is the preferred way to instantiate a `jwalk` iterator. --- src/core/dir_entry_iter.rs | 14 ++------------ src/core/error.rs | 27 ++++++++++++++++++++++++--- src/lib.rs | 14 ++++++++++++++ tests/detect_deadlock.rs | 20 ++++++++++++-------- 4 files changed, 52 insertions(+), 23 deletions(-) diff --git a/src/core/dir_entry_iter.rs b/src/core/dir_entry_iter.rs index b948e3c..8b4664b 100644 --- a/src/core/dir_entry_iter.rs +++ b/src/core/dir_entry_iter.rs @@ -9,7 +9,7 @@ use crate::Result; pub struct DirEntryIter { min_depth: usize, // iterator yielding next ReadDir results when needed - read_dir_iter: Option>>, + pub(crate) read_dir_iter: Option>>, // stack of ReadDir results, track location in filesystem traversal read_dir_results_stack: Vec>>>, } @@ -87,17 +87,7 @@ impl Iterator for DirEntryIter { // 2.2 If dir_entry has a read_children_path means we need to read a new // directory and push those results onto read_dir_results_stack if dir_entry.read_children_path.is_some() { - let iter = match self.read_dir_iter - .as_mut() - .ok_or_else(|| { - Error::from_io( - 0, - std::io::Error::new( - std::io::ErrorKind::Other, - "rayon thread-pool too busy or dependency loop detected - aborting before possibility of deadlock", - ), - ) - }) { + let iter = match self.read_dir_iter.as_mut().ok_or_else(|| Error::busy()) { Ok(iter) => iter, Err(err) => return Some(Err(err)), }; diff --git a/src/core/error.rs b/src/core/error.rs index 6e82c01..d96deb5 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -40,6 +40,7 @@ enum ErrorInner { ancestor: PathBuf, child: PathBuf, }, + ThreadpoolBusy, } impl Error { @@ -51,6 +52,7 @@ impl Error { /// [`std::fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html pub fn path(&self) -> Option<&Path> { match self.inner { + ErrorInner::ThreadpoolBusy => None, ErrorInner::Io { path: None, .. } => None, ErrorInner::Io { path: Some(ref path), @@ -152,10 +154,17 @@ impl Error { pub fn io_error(&self) -> Option<&io::Error> { match self.inner { ErrorInner::Io { ref err, .. } => Some(err), - ErrorInner::Loop { .. } => None, + _ => None, } } + /// Returns true if this error is due to a busy thread-pool that prevented its effective use. + /// + /// Note that business detection is timeout based, and we don't know if it would have been a deadlock or not. + pub fn is_busy(&self) -> bool { + matches!(self.inner, ErrorInner::ThreadpoolBusy) + } + /// Similar to [`io_error`] except consumes self to convert to the original /// [`io::Error`] if one exists. /// @@ -164,10 +173,16 @@ impl Error { pub fn into_io_error(self) -> Option { match self.inner { ErrorInner::Io { err, .. } => Some(err), - ErrorInner::Loop { .. } => None, + _ => None, } } + pub(crate) fn busy() -> Self { + Error { + depth: 0, + inner: ErrorInner::ThreadpoolBusy, + } + } pub(crate) fn from_path(depth: usize, pb: PathBuf, err: io::Error) -> Self { Error { depth, @@ -210,7 +225,7 @@ impl error::Error for Error { fn source(&self) -> Option<&(dyn error::Error + 'static)> { match self.inner { ErrorInner::Io { ref err, .. } => Some(err), - ErrorInner::Loop { .. } => None, + ErrorInner::Loop { .. } | ErrorInner::ThreadpoolBusy => None, } } @@ -219,6 +234,7 @@ impl error::Error for Error { match self.inner { ErrorInner::Io { ref err, .. } => err.description(), ErrorInner::Loop { .. } => "file system loop found", + ErrorInner::ThreadpoolBusy => "thread-pool busy", } } @@ -230,6 +246,7 @@ impl error::Error for Error { impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.inner { + ErrorInner::ThreadpoolBusy => f.write_str("rayon thread-pool too busy or dependency loop detected - aborting before possibility of deadlock"), ErrorInner::Io { path: None, ref err, @@ -274,6 +291,10 @@ impl From for io::Error { inner: ErrorInner::Loop { .. }, .. } => io::ErrorKind::Other, + Error { + inner: ErrorInner::ThreadpoolBusy, + .. + } => io::ErrorKind::Other, }; io::Error::new(kind, walk_err) } diff --git a/src/lib.rs b/src/lib.rs index 697a7f5..299fe03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -212,6 +212,10 @@ impl WalkDirGeneric { /// path root. If root is a directory, then it is the first item yielded by /// the iterator. If root is a file, then it is the first and only item /// yielded by the iterator. + /// + /// Note that his iterator can fail on the first element if `into_iter()` is used as it + /// has to be infallible. Use [`try_into_iter()`][WalkDirGeneric::try_into_iter()] + /// instead for error handling. pub fn new>(root: P) -> Self { WalkDirGeneric { root: root.as_ref().to_path_buf(), @@ -230,6 +234,16 @@ impl WalkDirGeneric { } } + /// Try to create an iterator or fail if the rayon threadpool (in any configuration) is busy. + pub fn try_into_iter(self) -> Result> { + let iter = self.into_iter(); + if iter.read_dir_iter.is_none() { + Err(Error::busy()) + } else { + Ok(iter) + } + } + /// Root path of the walk. pub fn root(&self) -> &Path { &self.root diff --git a/tests/detect_deadlock.rs b/tests/detect_deadlock.rs index afd0fe1..7678958 100644 --- a/tests/detect_deadlock.rs +++ b/tests/detect_deadlock.rs @@ -11,16 +11,20 @@ fn works() { (0..=1) .collect::>() .par_iter() - .for_each(|_round| { - for entry in WalkDir::new(".").parallelism(jwalk::Parallelism::RayonDefaultPool { + .for_each(|round| { + let generic = WalkDir::new(".").parallelism(jwalk::Parallelism::RayonDefaultPool { busy_timeout: std::time::Duration::from_millis(10), - }) { - match entry { - Ok(_) => panic!("Must detect deadlock"), - Err(err) - if err.io_error().expect("is IO").kind() == std::io::ErrorKind::Other => {} - Err(err) => panic!("Unexpected error: {:?}", err), + }); + if *round == 0 { + for entry in generic { + match entry { + Ok(_) => panic!("Must detect deadlock"), + Err(err) if err.is_busy() => {} + Err(err) => panic!("Unexpected error: {:?}", err), + } } + } else { + assert!(matches!(generic.try_into_iter(), Err(err) if err.is_busy())); } }); }