From e36b65a11a7fa72e4b43f89c71441884269f0522 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 1 Feb 2018 21:11:02 -0500 Subject: [PATCH] windows: fix OneDrive traversals This commit fixes a bug on Windows where directory traversals were completely broken when attempting to scan OneDrive directories that use the "file on demand" strategy. The specific problem was that Rust's standard library treats OneDrive directories as reparse points instead of directories, which causes methods like `FileType::is_file` and `FileType::is_dir` to always return false, even when retrieved via methods like `metadata` that purport to follow symbolic links. We fix this by peppering our code with checks on the underlying file attributes exposed by Windows. We consider an entry a directory if and only if the directory bit is set on the attributes. We are careful to make sure that the code remains the same on non-Windows platforms. Note that we also bump the dependency on `walkdir`, which contains a similar fix for its traversals. This bug is recorded upstream: https://github.com/rust-lang/rust/issues/46484 Upstream also has a pending PR: https://github.com/rust-lang/rust/pull/47956 Fixes #705 --- Cargo.lock | 9 +++-- Cargo.toml | 4 ++ ignore/Cargo.toml | 4 ++ ignore/src/lib.rs | 2 + ignore/src/walk.rs | 93 ++++++++++++++++++++++++++++++++++++++++++---- src/args.rs | 47 +++++++++++++++++++++-- src/main.rs | 52 +++++++++++++++++++++++--- 7 files changed, 192 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13fbb69ae..c9291fafa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -134,7 +134,8 @@ dependencies = [ "same-file 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -249,6 +250,7 @@ dependencies = [ "regex 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", "same-file 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "termcolor 0.3.3", + "winapi 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -341,10 +343,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "walkdir" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "same-file 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -412,7 +415,7 @@ dependencies = [ "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" "checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" -"checksum walkdir 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bb9e0499dfaf907659414fa6895e019db6694aa5ecb36e1a8003b6ba472d5705" +"checksum walkdir 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6e44abc83f36bfebb7ee3e140d324dfb4a75d17047d9d16c49143aafeb122cff" "checksum winapi 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "890b38836c01d72fdb636d15c9cfc52ec7fd783b330abc93cd1686f4308dfccc" "checksum winapi-i686-pc-windows-gnu 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ec6667f60c23eca65c561e63a13d81b44234c2e38a6b6c959025ee907ec614cc" "checksum winapi-x86_64-pc-windows-gnu 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "98f12c52b2630cd05d2c3ffd8e008f7f48252c042b4871c72aed9dc733b96668" diff --git a/Cargo.toml b/Cargo.toml index 62267e262..e6f73ef4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,10 @@ same-file = "1" termcolor = { version = "0.3.3", path = "termcolor" } globset = { version = "0.2.1", path = "globset" } +[target.'cfg(windows)'.dependencies.winapi] +version = "0.3" +features = ["std", "winnt"] + [build-dependencies] clap = "2.26" lazy_static = "1" diff --git a/ignore/Cargo.toml b/ignore/Cargo.toml index b2c86778d..89d8672c0 100644 --- a/ignore/Cargo.toml +++ b/ignore/Cargo.toml @@ -28,6 +28,10 @@ same-file = "1" thread_local = "0.3.2" walkdir = "2" +[target.'cfg(windows)'.dependencies.winapi] +version = "0.3" +features = ["std", "winnt"] + [dev-dependencies] tempdir = "0.3.5" diff --git a/ignore/src/lib.rs b/ignore/src/lib.rs index 3fbb8974d..a578487cb 100644 --- a/ignore/src/lib.rs +++ b/ignore/src/lib.rs @@ -59,6 +59,8 @@ extern crate same_file; extern crate tempdir; extern crate thread_local; extern crate walkdir; +#[cfg(windows)] +extern crate winapi; use std::error; use std::fmt; diff --git a/ignore/src/walk.rs b/ignore/src/walk.rs index cde4f7505..0ecd28bc8 100644 --- a/ignore/src/walk.rs +++ b/ignore/src/walk.rs @@ -89,6 +89,11 @@ impl DirEntry { self.err.as_ref() } + /// Returns true if and only if this entry points to a directory. + fn is_dir(&self) -> bool { + self.dent.is_dir() + } + fn new_stdin() -> DirEntry { DirEntry { dent: DirEntryInner::Stdin, @@ -208,6 +213,24 @@ impl DirEntryInner { Raw(ref x) => Some(x.ino()), } } + + /// Returns true if and only if this entry points to a directory. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + fn is_dir(&self) -> bool { + self.metadata().map(|md| metadata_is_dir(&md)).unwrap_or(false) + } + + /// Returns true if and only if this entry points to a directory. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(not(windows))] + fn is_dir(&self) -> bool { + self.file_type().map(|ft| ft.is_dir()).unwrap_or(false) + } } /// DirEntryRaw is essentially copied from the walkdir crate so that we can @@ -456,7 +479,7 @@ impl WalkBuilder { (p.to_path_buf(), None) } else { let mut wd = WalkDir::new(p); - wd = wd.follow_links(follow_links || p.is_file()); + wd = wd.follow_links(follow_links || path_is_file(p)); if let Some(max_depth) = max_depth { wd = wd.max_depth(max_depth); } @@ -728,7 +751,7 @@ impl Walk { return false; } - let is_dir = ent.file_type().is_dir(); + let is_dir = walkdir_entry_is_dir(ent); let max_size = self.max_filesize; let should_skip_path = skip_path(&self.ig, ent.path(), is_dir); let should_skip_filesize = if !is_dir && max_size.is_some() { @@ -757,7 +780,7 @@ impl Iterator for Walk { } Some((path, Some(it))) => { self.it = Some(it); - if self.parents && path.is_dir() { + if self.parents && path_is_dir(&path) { let (ig, err) = self.ig_root.add_parents(path); self.ig = ig; if let Some(err) = err { @@ -847,7 +870,7 @@ impl Iterator for WalkEventIter { None => None, Some(Err(err)) => Some(Err(err)), Some(Ok(dent)) => { - if dent.file_type().is_dir() { + if walkdir_entry_is_dir(&dent) { self.depth += 1; Some(Ok(WalkEvent::Dir(dent))) } else { @@ -1000,7 +1023,7 @@ struct Work { impl Work { /// Returns true if and only if this work item is a directory. fn is_dir(&self) -> bool { - self.dent.file_type().map_or(false, |t| t.is_dir()) + self.dent.is_dir() } /// Adds ignore rules for parent directories. @@ -1174,13 +1197,13 @@ impl Worker { return (self.f)(Err(err)); } }; - if dent.file_type().map_or(false, |ft| ft.is_dir()) { + if dent.is_dir() { if let Err(err) = check_symlink_loop(ig, dent.path(), depth) { return (self.f)(Err(err)); } } } - let is_dir = dent.file_type().map_or(false, |ft| ft.is_dir()); + let is_dir = dent.is_dir(); let max_size = self.max_filesize; let should_skip_path = skip_path(ig, dent.path(), is_dir); let should_skip_filesize = if !is_dir && max_size.is_some() { @@ -1376,6 +1399,62 @@ fn skip_path(ig: &Ignore, path: &Path, is_dir: bool) -> bool { } } +/// Returns true if and only if this path points to a directory. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn path_is_dir(path: &Path) -> bool { + fs::metadata(path).map(|md| metadata_is_dir(&md)).unwrap_or(false) +} + +/// Returns true if and only if this entry points to a directory. +#[cfg(not(windows))] +fn path_is_dir(path: &Path) -> bool { + path.is_dir() +} + +/// Returns true if and only if this path points to a file. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn path_is_file(path: &Path) -> bool { + !path_is_dir(path) +} + +/// Returns true if and only if this entry points to a directory. +#[cfg(not(windows))] +fn path_is_file(path: &Path) -> bool { + path.is_file() +} + +/// Returns true if and only if the given walkdir entry points to a directory. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn walkdir_entry_is_dir(dent: &walkdir::DirEntry) -> bool { + dent.metadata().map(|md| metadata_is_dir(&md)).unwrap_or(false) +} + +/// Returns true if and only if the given walkdir entry points to a directory. +#[cfg(not(windows))] +fn walkdir_entry_is_dir(dent: &walkdir::DirEntry) -> bool { + dent.file_type().is_dir() +} + +/// Returns true if and only if the given metadata points to a directory. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn metadata_is_dir(md: &fs::Metadata) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 +} + #[cfg(test)] mod tests { use std::fs::{self, File}; diff --git a/src/args.rs b/src/args.rs index 030adf0f8..a0c1d7db1 100644 --- a/src/args.rs +++ b/src/args.rs @@ -209,7 +209,7 @@ impl Args { /// Returns true if there is exactly one file path given to search. pub fn is_one_path(&self) -> bool { self.paths.len() == 1 - && (self.paths[0] == Path::new("-") || self.paths[0].is_file()) + && (self.paths[0] == Path::new("-") || path_is_file(&self.paths[0])) } /// Create a worker whose configuration is taken from the @@ -562,7 +562,7 @@ impl<'a> ArgMatches<'a> { self.is_present("with-filename") || self.is_present("vimgrep") || paths.len() > 1 - || paths.get(0).map_or(false, |p| p.is_dir()) + || paths.get(0).map_or(false, |p| path_is_dir(p)) } } @@ -609,7 +609,7 @@ impl<'a> ArgMatches<'a> { } else { // If we're only searching a few paths and all of them are // files, then memory maps are probably faster. - paths.len() <= 10 && paths.iter().all(|p| p.is_file()) + paths.len() <= 10 && paths.iter().all(|p| path_is_file(p)) }) } @@ -1036,3 +1036,44 @@ fn stdin_is_readable() -> bool { // always return true. true } + +/// Returns true if and only if this path points to a directory. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn path_is_dir(path: &Path) -> bool { + fs::metadata(path).map(|md| metadata_is_dir(&md)).unwrap_or(false) +} + +/// Returns true if and only if this entry points to a directory. +#[cfg(not(windows))] +fn path_is_dir(path: &Path) -> bool { + path.is_dir() +} + +/// Returns true if and only if this path points to a file. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn path_is_file(path: &Path) -> bool { + !path_is_dir(path) +} + +/// Returns true if and only if this entry points to a directory. +#[cfg(not(windows))] +fn path_is_file(path: &Path) -> bool { + path.is_file() +} + +/// Returns true if and only if the given metadata points to a directory. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn metadata_is_dir(md: &fs::Metadata) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 +} diff --git a/src/main.rs b/src/main.rs index 7d39aa66e..fea89e441 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,8 @@ extern crate num_cpus; extern crate regex; extern crate same_file; extern crate termcolor; +#[cfg(windows)] +extern crate winapi; use std::error::Error; use std::process; @@ -267,15 +269,14 @@ fn get_or_log_dir_entry( eprintln!("{}", err); } } - let ft = match dent.file_type() { - None => return Some(dent), // entry is stdin - Some(ft) => ft, - }; + if dent.file_type().is_none() { + return Some(dent); // entry is stdin + } // A depth of 0 means the user gave the path explicitly, so we // should always try to search it. - if dent.depth() == 0 && !ft.is_dir() { + if dent.depth() == 0 && !ignore_entry_is_dir(&dent) { return Some(dent); - } else if !ft.is_file() { + } else if !ignore_entry_is_file(&dent) { return None; } // If we are redirecting stdout to a file, then don't search that @@ -288,6 +289,45 @@ fn get_or_log_dir_entry( } } +/// Returns true if and only if the given `ignore::DirEntry` points to a +/// directory. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + + dent.metadata().map(|md| { + md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 + }).unwrap_or(false) +} + +/// Returns true if and only if the given `ignore::DirEntry` points to a +/// directory. +#[cfg(not(windows))] +fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool { + dent.file_type().map_or(false, |ft| ft.is_dir()) +} + +/// Returns true if and only if the given `ignore::DirEntry` points to a +/// file. +/// +/// This works around a bug in Rust's standard library: +/// https://github.com/rust-lang/rust/issues/46484 +#[cfg(windows)] +fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool { + !ignore_entry_is_dir(dent) +} + +/// Returns true if and only if the given `ignore::DirEntry` points to a +/// file. +#[cfg(not(windows))] +fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool { + dent.file_type().map_or(false, |ft| ft.is_file()) +} + fn is_stdout_file( dent: &ignore::DirEntry, stdout_handle: Option<&same_file::Handle>,