From 70c7c29266bc4396c968c7aa311c9721929a7cab Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Sep 2023 20:00:07 +0200 Subject: [PATCH 01/12] fix!: parallel utilities that create thread-state now use `FnOnce`. This way, all unnecessary cloning is avoided. --- gix-features/src/parallel/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-features/src/parallel/mod.rs b/gix-features/src/parallel/mod.rs index 6a95761b2c3..5a0a4b5890d 100644 --- a/gix-features/src/parallel/mod.rs +++ b/gix-features/src/parallel/mod.rs @@ -135,7 +135,7 @@ pub fn in_parallel_if( condition: impl FnOnce() -> bool, input: impl Iterator + Send, thread_limit: Option, - new_thread_state: impl Fn(usize) -> S + Send + Clone, + new_thread_state: impl FnOnce(usize) -> S + Send + Clone, consume: impl FnMut(I, &mut S) -> O + Send + Clone, reducer: R, ) -> Result<::Output, ::Error> @@ -161,7 +161,7 @@ pub fn in_parallel_if( _condition: impl FnOnce() -> bool, input: impl Iterator, thread_limit: Option, - new_thread_state: impl Fn(usize) -> S, + new_thread_state: impl FnOnce(usize) -> S, consume: impl FnMut(I, &mut S) -> O, reducer: R, ) -> Result<::Output, ::Error> From 959dc175b7925e0a7952c23ad92f83a32ad9609c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Sep 2023 10:21:19 +0200 Subject: [PATCH 02/12] feat: add trace for `State::from_tree()` as it's rather time-consuming --- gix-index/src/init.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/gix-index/src/init.rs b/gix-index/src/init.rs index abd71ffdd28..9fe0b8e27bc 100644 --- a/gix-index/src/init.rs +++ b/gix-index/src/init.rs @@ -39,6 +39,7 @@ mod from_tree { where Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Option>, { + let _span = gix_features::trace::coarse!("gix_index::State::from_tree()"); let mut buf = Vec::new(); let root = find(tree, &mut buf).ok_or(breadthfirst::Error::NotFound { oid: tree.into() })?; From a8333f1137df51d237f6debf056ac075b0a2cd94 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Sep 2023 16:19:36 +0200 Subject: [PATCH 03/12] feat: add `Repository::stat_options()` to learn how an index would compare filesystem stats. --- gix/src/config/cache/access.rs | 39 ++++++++++++++++++++------------ gix/src/config/mod.rs | 13 +++++++++++ gix/src/repository/config/mod.rs | 8 +++++++ 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 820dd51ff46..5f4fbe72f11 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -137,7 +137,7 @@ impl Cache { let install_dir = crate::path::install_dir().ok(); let home = self.home_dir(); - let ctx = crate::config::cache::interpolate_context(install_dir.as_deref(), home.as_deref()); + let ctx = config::cache::interpolate_context(install_dir.as_deref(), home.as_deref()); Some(path.interpolate(ctx)) } @@ -154,6 +154,23 @@ impl Cache { }) } + #[cfg(feature = "index")] + pub(crate) fn stat_options(&self) -> Result { + use crate::config::tree::gitoxide; + Ok(gix_index::entry::stat::Options { + trust_ctime: boolean(self, "core.trustCTime", &Core::TRUST_C_TIME, true)?, + use_nsec: boolean(self, "gitoxide.core.useNsec", &gitoxide::Core::USE_NSEC, false)?, + use_stdev: boolean(self, "gitoxide.core.useStdev", &gitoxide::Core::USE_STDEV, false)?, + check_stat: self + .apply_leniency( + self.resolved + .string("core", None, "checkStat") + .map(|v| Core::CHECK_STAT.try_into_checkstat(v)), + )? + .unwrap_or(true), + }) + } + /// Collect everything needed to checkout files into a worktree. /// Note that some of the options being returned will be defaulted so safe settings, the caller might have to override them /// depending on the use-case. @@ -162,7 +179,7 @@ impl Cache { &self, repo: &crate::Repository, attributes_source: gix_worktree::stack::state::attributes::Source, - ) -> Result { + ) -> Result { use crate::config::tree::gitoxide; let git_dir = repo.git_dir(); let thread_limit = self.apply_leniency( @@ -202,18 +219,12 @@ impl Cache { destination_is_initially_empty: false, overwrite_existing: false, keep_going: false, - stat_options: gix_index::entry::stat::Options { - trust_ctime: boolean(self, "core.trustCTime", &Core::TRUST_C_TIME, true)?, - use_nsec: boolean(self, "gitoxide.core.useNsec", &gitoxide::Core::USE_NSEC, false)?, - use_stdev: boolean(self, "gitoxide.core.useStdev", &gitoxide::Core::USE_STDEV, false)?, - check_stat: self - .apply_leniency( - self.resolved - .string("core", None, "checkStat") - .map(|v| Core::CHECK_STAT.try_into_checkstat(v)), - )? - .unwrap_or(true), - }, + stat_options: self.stat_options().map_err(|err| match err { + config::stat_options::Error::ConfigCheckStat(err) => { + config::checkout_options::Error::ConfigCheckStat(err) + } + config::stat_options::Error::ConfigBoolean(err) => config::checkout_options::Error::ConfigBoolean(err), + })?, }) } diff --git a/gix/src/config/mod.rs b/gix/src/config/mod.rs index e8e0f87352e..102c7a48286 100644 --- a/gix/src/config/mod.rs +++ b/gix/src/config/mod.rs @@ -119,6 +119,19 @@ pub mod diff { } } +/// +pub mod stat_options { + /// The error produced when collecting stat information, and returned by [Repository::stat_options()](crate::Repository::stat_options()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + ConfigCheckStat(#[from] super::key::GenericErrorWithValue), + #[error(transparent)] + ConfigBoolean(#[from] super::boolean::Error), + } +} + /// #[cfg(feature = "attributes")] pub mod checkout_options { diff --git a/gix/src/repository/config/mod.rs b/gix/src/repository/config/mod.rs index 806b1282d98..618ccf0f63b 100644 --- a/gix/src/repository/config/mod.rs +++ b/gix/src/repository/config/mod.rs @@ -29,6 +29,14 @@ impl crate::Repository { self.config.fs_capabilities() } + /// Return filesystem options on how to perform stat-checks, typically in relation to the index. + /// + /// Note that these values have not been [probed](gix_fs::Capabilities::probe()). + #[cfg(feature = "index")] + pub fn stat_options(&self) -> Result { + self.config.stat_options() + } + /// The options used to open the repository. pub fn open_options(&self) -> &crate::open::Options { &self.options From 7a96a258cc8a5bad6414367cdbce390b7ac88b19 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Sep 2023 16:52:53 +0200 Subject: [PATCH 04/12] feat: add `DerefMut` implementation for `FileSnapshot`. This allows to leverage `make_mut()`. --- gix-fs/src/snapshot.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gix-fs/src/snapshot.rs b/gix-fs/src/snapshot.rs index 62b1d166731..2b21d0d9f21 100644 --- a/gix-fs/src/snapshot.rs +++ b/gix-fs/src/snapshot.rs @@ -57,6 +57,12 @@ impl Deref for FileSnapshot { } } +impl std::ops::DerefMut for FileSnapshot { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.value + } +} + impl Deref for SharedFileSnapshotMut { type Target = MutableOnDemand>>; From 51971969d2cf13587d4bfbd4cb047f2377b8bc0f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Sep 2023 17:04:50 +0200 Subject: [PATCH 05/12] feat: Add `threading::make_mut()` to allow obtaining a mutable reference to shared data. This is particularly useful when handling an index file, which may be shared across clones of a repository. --- gix-features/src/threading.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/gix-features/src/threading.rs b/gix-features/src/threading.rs index 2b33386d216..5f92ba2a892 100644 --- a/gix-features/src/threading.rs +++ b/gix-features/src/threading.rs @@ -30,6 +30,11 @@ mod _impl { v.write() } + /// Get a mutable reference to the underlying data, with semantics similar to [Arc::make_mut()]. + pub fn make_mut(this: &mut OwnShared) -> &mut T { + OwnShared::make_mut(this) + } + /// Get a mutable reference through a [`Mutable`] for read-write access. pub fn lock(v: &Mutable) -> parking_lot::MutexGuard<'_, T> { v.lock() @@ -75,6 +80,11 @@ mod _impl { v.borrow_mut() } + /// Get a mutable reference to the underlying data, with semantics similar to [Rc::make_mut()]. + pub fn make_mut(this: &mut OwnShared) -> &mut T { + OwnShared::make_mut(this) + } + /// Get a mutable reference through a [`Mutable`] for read-write access. pub fn lock(v: &Mutable) -> RefMut<'_, T> { v.borrow_mut() From 832b34570bde97499e75a174bc818113092b3145 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Sep 2023 16:52:35 +0200 Subject: [PATCH 06/12] doc: improve documentation of `common_prefix()` and `pattern_matching_relative_path()`. --- gix-pathspec/src/lib.rs | 7 +++++-- gix-pathspec/src/search/matching.rs | 2 +- gix-pathspec/src/search/mod.rs | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/gix-pathspec/src/lib.rs b/gix-pathspec/src/lib.rs index 550f142e2eb..9f91567da6f 100644 --- a/gix-pathspec/src/lib.rs +++ b/gix-pathspec/src/lib.rs @@ -93,9 +93,12 @@ pub struct Pattern { pub attributes: Vec, /// If `true`, we are a special Nil pattern and always match. nil: bool, - /// The length of bytes in `path` that belong to the prefix, which will always be matched case-insensitively. + /// The length of bytes in `path` that belong to the prefix, which will always be matched case-sensitively + /// on case-sensitive filesystems. + /// /// That way, even though pathspecs are applied from the top, we can emulate having changed directory into - /// a specific sub-directory in a case-sensitive file-system. + /// a specific sub-directory in a case-sensitive file-system, even if the rest of the pathspec can be set to + /// match case-insensitively. /// Is set by [Pattern::normalize()]. prefix_len: usize, } diff --git a/gix-pathspec/src/search/matching.rs b/gix-pathspec/src/search/matching.rs index c97989c5983..c7c8f2cbb36 100644 --- a/gix-pathspec/src/search/matching.rs +++ b/gix-pathspec/src/search/matching.rs @@ -8,7 +8,7 @@ use crate::{ impl Search { /// Return the first [`Match`] of `relative_path`, or `None`. - /// `is_dir` is true if `relative_path` is a directory. + /// `is_dir` is `true` if `relative_path` is a directory. /// `attributes` is called as `attributes(relative_path, case, is_dir, outcome) -> has_match` to obtain for attributes for `relative_path`, if /// the underlying pathspec defined an attribute filter, to be stored in `outcome`, returning true if there was a match. /// All attributes of the pathspec have to be present in the defined value for the pathspec to match. diff --git a/gix-pathspec/src/search/mod.rs b/gix-pathspec/src/search/mod.rs index ff76df0144e..a9c87377bf3 100644 --- a/gix-pathspec/src/search/mod.rs +++ b/gix-pathspec/src/search/mod.rs @@ -30,8 +30,10 @@ impl Search { /// Return the portion of the prefix among all of the pathspecs involved in this search, or an empty string if /// there is none. It doesn't have to end at a directory boundary though, nor does it denote a directory. /// - /// Note that the common_prefix can be matched case-insensitively, which makes it useful to skip large portions of input. - /// Further, excluded pathspecs don't participate which makes this common prefix inclusive. + /// Note that the common_prefix is always matched case-sensitively, and it is useful to skip large portions of input. + /// Further, excluded pathspecs don't participate which makes this common prefix inclusive. To work correclty though, + /// one will have to additionally match paths that have the common prefix with that pathspec itself to assure it is + /// not excluded. pub fn common_prefix(&self) -> &BStr { self.patterns .iter() From 2d011253c64aaeede546a2e4cdd143142689044b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Sep 2023 17:36:38 +0200 Subject: [PATCH 07/12] feat!: various improvements --- Cargo.lock | 2 + gix-status/Cargo.toml | 1 + gix-status/src/index_as_worktree/function.rs | 39 ++++++++++---- gix-status/src/index_as_worktree/types.rs | 2 +- gix-status/src/lib.rs | 18 +++++++ gix-status/tests/Cargo.toml | 2 +- gix-status/tests/status/index_as_worktree.rs | 53 ++++++++++++++++++++ 7 files changed, 106 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c61811764b..8ac70801a09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2250,6 +2250,7 @@ dependencies = [ "gix-index 0.24.0", "gix-object 0.36.0", "gix-path 0.10.0", + "gix-pathspec", "thiserror", ] @@ -2264,6 +2265,7 @@ dependencies = [ "gix-hash 0.13.0", "gix-index 0.24.0", "gix-object 0.36.0", + "gix-pathspec", "gix-status", "gix-testtools", ] diff --git a/gix-status/Cargo.toml b/gix-status/Cargo.toml index 1ef5eb10799..bdf464a1fe2 100644 --- a/gix-status/Cargo.toml +++ b/gix-status/Cargo.toml @@ -20,6 +20,7 @@ gix-hash = { version = "^0.13.0", path = "../gix-hash" } gix-object = { version = "^0.36.0", path = "../gix-object" } gix-path = { version = "^0.10.0", path = "../gix-path" } gix-features = { version = "^0.34.0", path = "../gix-features" } +gix-pathspec = { version = "0.2.0", path = "../gix-pathspec" } thiserror = "1.0.26" filetime = "0.2.15" diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs index be2572013c6..e576cc35c44 100644 --- a/gix-status/src/index_as_worktree/function.rs +++ b/gix-status/src/index_as_worktree/function.rs @@ -1,3 +1,4 @@ +use std::sync::atomic::Ordering; use std::{io, marker::PhantomData, path::Path}; use bstr::BStr; @@ -11,7 +12,7 @@ use crate::{ types::{Error, Options}, Change, VisitEntry, }, - read, + read, Pathspec, }; /// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them @@ -24,12 +25,15 @@ use crate::{ /// Note that this isn't technically quite what this function does as this also provides some additional information, /// like whether a file has conflicts, and files that were added with `git add` are shown as a special /// changes despite not technically requiring a change to the index since `git add` already added the file to the index. +#[allow(clippy::too_many_arguments)] pub fn index_as_worktree<'index, T, Find, E>( index: &'index mut gix_index::State, worktree: &Path, collector: &mut impl VisitEntry<'index, ContentChange = T>, compare: impl CompareBlobs + Send + Clone, find: Find, + progress: &mut dyn gix_features::progress::Progress, + pathspec: impl Pathspec + Send + Clone, options: Options, ) -> Result<(), Error> where @@ -43,14 +47,22 @@ where let timestamp = index.timestamp(); index.set_timestamp(FileTime::now()); let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit( - 100, + 500, // just like git index.entries().len().into(), options.thread_limit, None, ); + + let range = index + .prefixed_entries_range(pathspec.common_prefix()) + .unwrap_or(0..index.entries().len()); let (entries, path_backing) = index.entries_mut_and_pathbacking(); + let entries = &mut entries[range]; + progress.init(entries.len().into(), gix_features::progress::count("files")); + let count = progress.counter(); + in_parallel_if( - || true, // TODO: heuristic: when is parallelization not worth it? + || true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though. entries.chunks_mut(chunk_size), thread_limit, { @@ -65,15 +77,20 @@ where worktree, options, }, - compare.clone(), - find.clone(), + compare, + find, + pathspec, ) } }, - |entries, (state, diff, find)| { + |entries, (state, diff, find, pathspec)| { entries .iter_mut() - .filter_map(|entry| state.process(entry, diff, find)) + .filter_map(|entry| { + let res = state.process(entry, diff, find, pathspec); + count.fetch_add(1, Ordering::Relaxed); + res + }) .collect() }, ReduceChange { @@ -101,10 +118,11 @@ impl<'index> State<'_, 'index> { entry: &'index mut gix_index::Entry, diff: &mut impl CompareBlobs, find: &mut Find, + pathspec: &mut impl Pathspec, ) -> Option> where E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, { let conflict = match entry.stage() { 0 => false, @@ -120,6 +138,9 @@ impl<'index> State<'_, 'index> { return None; } let path = entry.path_in(self.path_backing); + if !pathspec.is_included(path, Some(false)) { + return None; + } let status = self.compute_status(&mut *entry, path, diff, find); Some(status.map(move |status| (&*entry, path, status, conflict))) } @@ -172,7 +193,7 @@ impl<'index> State<'_, 'index> { ) -> Result>, Error> where E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, { // TODO fs cache let worktree_path = gix_path::try_from_bstr(git_path).map_err(|_| Error::IllformedUtf8)?; diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index 10ff5c28d24..35cf45401b1 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -14,7 +14,7 @@ pub enum Error { Find(#[source] Box), } -#[derive(Clone, Default)] +#[derive(Clone, Debug, Default)] /// Options that control how the index status with a worktree is computed. pub struct Options { /// Capabilities of the file system which affect the status computation. diff --git a/gix-status/src/lib.rs b/gix-status/src/lib.rs index 843eb6a20a5..225668ced78 100644 --- a/gix-status/src/lib.rs +++ b/gix-status/src/lib.rs @@ -12,4 +12,22 @@ pub mod read; pub mod index_as_worktree; + +use bstr::BStr; pub use index_as_worktree::function::index_as_worktree; + +/// A trait to facilitate working working with pathspecs. +pub trait Pathspec { + /// Return the portion of the prefix among all of the pathspecs involved in this search, or an empty string if + /// there is none. It doesn't have to end at a directory boundary though, nor does it denote a directory. + /// + /// Note that the common_prefix is always matched case-sensitively, and it is useful to skip large portions of input. + /// Further, excluded pathspecs don't participate which makes this common prefix inclusive. To work correclty though, + /// one will have to additionally match paths that have the common prefix with that pathspec itself to assure it is + /// not excluded. + fn common_prefix(&self) -> &BStr; + + /// Return `true` if `relative_path` is included in this pathspec. + /// `is_dir` is `true` if `relative_path` is a directory. + fn is_included(&mut self, relative_path: &BStr, is_dir: Option) -> bool; +} diff --git a/gix-status/tests/Cargo.toml b/gix-status/tests/Cargo.toml index d69aafd59b2..93676222e79 100644 --- a/gix-status/tests/Cargo.toml +++ b/gix-status/tests/Cargo.toml @@ -24,7 +24,7 @@ gix-fs = { path = "../../gix-fs" } gix-hash = { path = "../../gix-hash" } gix-object = { path = "../../gix-object" } gix-features = { path = "../../gix-features" } - +gix-pathspec = { version = "0.2.0", path = "../../gix-pathspec" } filetime = "0.2.15" bstr = { version = "1.3.0", default-features = false } diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs index 484b86a4522..49ede98167d 100644 --- a/gix-status/tests/status/index_as_worktree.rs +++ b/gix-status/tests/status/index_as_worktree.rs @@ -29,17 +29,25 @@ const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { }; fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { + fixture_filtered(name, &[], expected_status) +} + +fn fixture_filtered(name: &str, pathspecs: &[&str], expected_status: &[(&BStr, Option, bool)]) { let worktree = fixture_path(name); let git_dir = worktree.join(".git"); let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default()).unwrap(); let mut recorder = Recorder::default(); + let search = gix_pathspec::Search::from_specs(to_pathspecs(pathspecs), None, std::path::Path::new("")) + .expect("valid specs can be normalized"); index_as_worktree( &mut index, &worktree, &mut recorder, FastEq, |_, _| Ok::<_, std::convert::Infallible>(gix_object::BlobRef { data: &[] }), + &mut gix_features::progress::Discard, + Pathspec(search), Options { fs: gix_fs::Capabilities::probe(&git_dir), stat: TEST_OPTIONS, @@ -51,6 +59,13 @@ fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { assert_eq!(recorder.records, expected_status) } +fn to_pathspecs(input: &[&str]) -> Vec { + input + .iter() + .map(|pattern| gix_pathspec::parse(pattern.as_bytes(), Default::default()).expect("known to be valid")) + .collect() +} + #[test] fn removed() { fixture( @@ -62,6 +77,15 @@ fn removed() { (BStr::new(b"executable"), Some(Change::Removed), false), ], ); + + fixture_filtered( + "status_removed", + &["dir"], + &[ + (BStr::new(b"dir/content"), Some(Change::Removed), false), + (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), false), + ], + ); } #[test] @@ -180,6 +204,8 @@ fn racy_git() { &mut recorder, counter.clone(), |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), + &mut gix_features::progress::Discard, + Pathspec::default(), Options { fs, stat: TEST_OPTIONS, @@ -201,6 +227,8 @@ fn racy_git() { &mut recorder, counter, |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), + &mut gix_features::progress::Discard, + Pathspec::default(), Options { fs, stat: TEST_OPTIONS, @@ -226,3 +254,28 @@ fn racy_git() { "racy change is correctly detected" ); } + +#[derive(Clone)] +struct Pathspec(gix_pathspec::Search); + +impl Default for Pathspec { + fn default() -> Self { + let search = gix_pathspec::Search::from_specs(to_pathspecs(&[]), None, std::path::Path::new("")) + .expect("empty is always valid"); + Self(search) + } +} + +impl gix_status::Pathspec for Pathspec { + fn common_prefix(&self) -> &BStr { + self.0.common_prefix() + } + + fn is_included(&mut self, relative_path: &BStr, is_dir: Option) -> bool { + self.0 + .pattern_matching_relative_path(relative_path, is_dir, &mut |_, _, _, _| { + unreachable!("we don't use attributes in our pathspecs") + }) + .map_or(false, |m| !m.is_excluded()) + } +} From f066f9889b57a4ffaebc0ed1442d77999498db42 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Sep 2023 16:55:37 +0200 Subject: [PATCH 08/12] feat: `PathSpec` implements `gix_status::PathSpec` to allow it to be used there. The reason we need a trait and can't do with simply a function is that multiple calls are needed to test for inclusion *and* allow the common-prefix optimization. --- Cargo.lock | 1 + gix/Cargo.toml | 6 +++++- gix/src/lib.rs | 2 ++ gix/src/pathspec.rs | 11 +++++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 8ac70801a09..918c90b6698 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1238,6 +1238,7 @@ dependencies = [ "gix-revision", "gix-revwalk", "gix-sec 0.10.0", + "gix-status", "gix-submodule", "gix-tempfile 9.0.0", "gix-testtools", diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 0e9661c54cd..c1c5bc675ae 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -51,7 +51,7 @@ default = ["max-performance-safe", "comfort", "basic", "extras"] basic = ["blob-diff", "revision", "index"] ## Various additional features and capabilities that are not necessarily part of what most users would need. -extras = ["worktree-stream", "worktree-archive", "revparse-regex", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt"] +extras = ["worktree-stream", "worktree-archive", "revparse-regex", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status"] ## Various progress-related features that improve the look of progress message units. comfort = ["gix-features/progress-unit-bytes", "gix-features/progress-unit-human-numbers"] @@ -61,6 +61,9 @@ comfort = ["gix-features/progress-unit-bytes", "gix-features/progress-unit-human #! A component is a distinct feature which may be comprised of one or more methods around a particular topic. #! Providers of libraries should only activate the components they need. +## Obtain information similar to `git status`. +status = ["gix-status"] + ## Utilities for interrupting computations and cleaning up tempfiles. interrupt = ["dep:signal-hook", "gix-tempfile/signals"] @@ -239,6 +242,7 @@ gix-hashtable = { version = "^0.4.0", path = "../gix-hashtable" } gix-commitgraph = { version = "^0.20.0", path = "../gix-commitgraph" } gix-pathspec = { version = "^0.2.0", path = "../gix-pathspec", optional = true } gix-submodule = { version = "^0.3.0", path = "../gix-submodule", optional = true } +gix-status = { version = "^0.1.0", path = "../gix-status", optional = true } gix-worktree-stream = { version = "^0.4.0", path = "../gix-worktree-stream", optional = true } gix-archive = { version = "^0.4.0", path = "../gix-archive", default-features = false, optional = true } diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 82c4b8ea22e..4ea2b98334e 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -124,6 +124,8 @@ pub use gix_ref as refs; pub use gix_refspec as refspec; pub use gix_revwalk as revwalk; pub use gix_sec as sec; +#[cfg(feature = "status")] +pub use gix_status as status; pub use gix_tempfile as tempfile; pub use gix_trace as trace; pub use gix_traverse as traverse; diff --git a/gix/src/pathspec.rs b/gix/src/pathspec.rs index 3d299e31377..4d1567b9e91 100644 --- a/gix/src/pathspec.rs +++ b/gix/src/pathspec.rs @@ -144,3 +144,14 @@ impl<'repo> Pathspec<'repo> { }) } } + +#[cfg(feature = "status")] +impl gix_status::Pathspec for Pathspec<'_> { + fn common_prefix(&self) -> &BStr { + self.search.common_prefix() + } + + fn is_included(&mut self, relative_path: &BStr, is_dir: Option) -> bool { + self.is_included(relative_path, is_dir) + } +} From fd034e03b5a05dcc7a01014ce6a97b7cf93086be Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Sep 2023 19:34:58 +0200 Subject: [PATCH 09/12] fix: make time conversion more robust Previously it could easily fail if very old files are found, or future ones. Instead, such entries simply can't be compared quickly. --- gix-index/src/entry/stat.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-index/src/entry/stat.rs b/gix-index/src/entry/stat.rs index 65063dc161c..7bde717633f 100644 --- a/gix-index/src/entry/stat.rs +++ b/gix-index/src/entry/stat.rs @@ -95,8 +95,8 @@ impl Stat { use std::os::unix::fs::MetadataExt; #[cfg(unix)] let res = Stat { - mtime: mtime.try_into()?, - ctime: ctime.try_into()?, + mtime: mtime.try_into().unwrap_or_default(), + ctime: ctime.try_into().unwrap_or_default(), // truncating to 32 bits is fine here because // that's what the linux syscalls returns // just rust upcasts to 64 bits for some reason? From e22893c1c95a76d9a5f3b2f2a4e2a30f815ee7e5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Sep 2023 19:50:15 +0200 Subject: [PATCH 10/12] fix: do not trust ctime by default. On MacOS it seems to be off by two seconds right from the source, which seems to be an issue `stat` isn't having. --- gix/src/config/cache/access.rs | 8 +++++++- gix/src/config/tree/sections/core.rs | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 5f4fbe72f11..352bc97124b 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -158,7 +158,13 @@ impl Cache { pub(crate) fn stat_options(&self) -> Result { use crate::config::tree::gitoxide; Ok(gix_index::entry::stat::Options { - trust_ctime: boolean(self, "core.trustCTime", &Core::TRUST_C_TIME, true)?, + trust_ctime: boolean( + self, + "core.trustCTime", + &Core::TRUST_C_TIME, + // For now, on MacOS it's known to not be trust-worthy at least with the Rust STDlib, being 2s off + !cfg!(target_os = "macos"), + )?, use_nsec: boolean(self, "gitoxide.core.useNsec", &gitoxide::Core::USE_NSEC, false)?, use_stdev: boolean(self, "gitoxide.core.useStdev", &gitoxide::Core::USE_STDEV, false)?, check_stat: self diff --git a/gix/src/config/tree/sections/core.rs b/gix/src/config/tree/sections/core.rs index 2ec5c279ea3..ab3e2bab93f 100644 --- a/gix/src/config/tree/sections/core.rs +++ b/gix/src/config/tree/sections/core.rs @@ -45,7 +45,8 @@ impl Core { /// The `core.symlinks` key. pub const SYMLINKS: keys::Boolean = keys::Boolean::new_boolean("symlinks", &config::Tree::CORE); /// The `core.trustCTime` key. - pub const TRUST_C_TIME: keys::Boolean = keys::Boolean::new_boolean("trustCTime", &config::Tree::CORE); + pub const TRUST_C_TIME: keys::Boolean = keys::Boolean::new_boolean("trustCTime", &config::Tree::CORE) + .with_deviation("Currently the default is false, instead of true, as it seems to be 2s off in tests"); /// The `core.worktree` key. pub const WORKTREE: keys::Any = keys::Any::new("worktree", &config::Tree::CORE) .with_environment_override("GIT_WORK_TREE") From f9d14d86a6578cf0f9a0c4a2256ad227b9264340 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Sep 2023 21:19:50 +0200 Subject: [PATCH 11/12] feat: Add `PathspecDetached` as pathspec that can more easily be used across threads. --- gix/src/lib.rs | 2 +- gix/src/pathspec.rs | 54 +++++++++++++++++++++++++++++++++++++++++++-- gix/src/types.rs | 12 ++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 4ea2b98334e..672d5c91c6a 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -161,7 +161,7 @@ pub use types::{ Commit, Head, Id, Object, ObjectDetached, Reference, Remote, Repository, Tag, ThreadSafeRepository, Tree, Worktree, }; #[cfg(feature = "attributes")] -pub use types::{Pathspec, Submodule}; +pub use types::{Pathspec, PathspecDetached, Submodule}; /// pub mod clone; diff --git a/gix/src/pathspec.rs b/gix/src/pathspec.rs index 4d1567b9e91..235a91d7630 100644 --- a/gix/src/pathspec.rs +++ b/gix/src/pathspec.rs @@ -3,7 +3,7 @@ use gix_macros::momo; use gix_odb::FindExt; pub use gix_pathspec::*; -use crate::{bstr::BStr, AttributeStack, Pathspec, Repository}; +use crate::{bstr::BStr, AttributeStack, Pathspec, PathspecDetached, Repository}; /// pub mod init { @@ -75,6 +75,15 @@ impl<'repo> Pathspec<'repo> { self.stack.map(|stack| AttributeStack::new(stack, self.repo)), ) } + + /// Turn ourselves into an implementation that works without a repository instance and that is rather minimal. + pub fn detach(self) -> std::io::Result { + Ok(PathspecDetached { + search: self.search, + stack: self.stack, + odb: self.repo.objects.clone().into_arc()?, + }) + } } /// Access @@ -145,8 +154,49 @@ impl<'repo> Pathspec<'repo> { } } +/// Access +impl PathspecDetached { + /// Return the first [`Match`](search::Match) of `relative_path`, or `None`. + /// Note that the match might [be excluded](search::Match::is_excluded()). + /// `is_dir` is true if `relative_path` is a directory. + #[doc( + alias = "match_diff", + alias = "match_tree", + alias = "match_index", + alias = "match_workdir", + alias = "matches_path", + alias = "git2" + )] + #[momo] + pub fn pattern_matching_relative_path<'a>( + &mut self, + relative_path: impl Into<&'a BStr>, + is_dir: Option, + ) -> Option> { + self.search.pattern_matching_relative_path( + relative_path.into(), + is_dir, + &mut |relative_path, case, is_dir, out| { + let stack = self.stack.as_mut().expect("initialized in advance"); + stack + .set_case(case) + .at_entry(relative_path, Some(is_dir), |id, buf| self.odb.find_blob(id, buf)) + .map_or(false, |platform| platform.matching_attributes(out)) + }, + ) + } + + /// The simplified version of [`pattern_matching_relative_path()`](Self::pattern_matching_relative_path()) which returns + /// `true` if `relative_path` is included in the set of positive pathspecs, while not being excluded. + #[momo] + pub fn is_included<'a>(&mut self, relative_path: impl Into<&'a BStr>, is_dir: Option) -> bool { + self.pattern_matching_relative_path(relative_path, is_dir) + .map_or(false, |m| !m.is_excluded()) + } +} + #[cfg(feature = "status")] -impl gix_status::Pathspec for Pathspec<'_> { +impl gix_status::Pathspec for PathspecDetached { fn common_prefix(&self) -> &BStr { self.search.common_prefix() } diff --git a/gix/src/types.rs b/gix/src/types.rs index 0afdbbc519f..0739cdd25c5 100644 --- a/gix/src/types.rs +++ b/gix/src/types.rs @@ -218,6 +218,18 @@ pub struct Pathspec<'repo> { pub(crate) search: gix_pathspec::Search, } +/// Like [`Pathspec`], but without a Repository reference and with minimal API. +#[derive(Clone)] +#[cfg(feature = "attributes")] +pub struct PathspecDetached { + /// The cache to power attribute access. It's only initialized if we have a pattern with attributes. + pub stack: Option, + /// The prepared search to use for checking matches. + pub search: gix_pathspec::Search, + /// A thread-safe version of an ODB. + pub odb: gix_odb::HandleArc, +} + /// A stand-in for the submodule of a particular name. #[derive(Clone)] #[cfg(feature = "attributes")] From f094f71dc1a50955552509d108556c01517c6ed6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Sep 2023 14:08:44 +0200 Subject: [PATCH 12/12] feat: `gix status` with basic index-worktree comparison --- Cargo.lock | 1 + gitoxide-core/Cargo.toml | 3 +- gitoxide-core/src/repository/mod.rs | 1 + gitoxide-core/src/repository/status.rs | 120 +++++++++++++++++++++++++ src/plumbing/main.rs | 27 ++++++ src/plumbing/options/mod.rs | 28 ++++++ 6 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 gitoxide-core/src/repository/status.rs diff --git a/Cargo.lock b/Cargo.lock index 918c90b6698..6df2c8fbacd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1179,6 +1179,7 @@ dependencies = [ "gix", "gix-archive", "gix-pack", + "gix-status", "gix-transport", "gix-url", "itertools 0.11.0", diff --git a/gitoxide-core/Cargo.toml b/gitoxide-core/Cargo.toml index 5b0c97471df..abbc8623512 100644 --- a/gitoxide-core/Cargo.toml +++ b/gitoxide-core/Cargo.toml @@ -44,10 +44,11 @@ serde = ["gix/serde", "dep:serde_json", "dep:serde", "bytesize/serde"] [dependencies] # deselect everything else (like "performance") as this should be controllable by the parent application. -gix = { version = "^0.53.1", path = "../gix", default-features = false, features = ["blob-diff", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt"] } +gix = { version = "^0.53.1", path = "../gix", default-features = false, features = ["blob-diff", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status"] } gix-pack-for-configuration-only = { package = "gix-pack", version = "^0.42.0", path = "../gix-pack", default-features = false, features = ["pack-cache-lru-dynamic", "pack-cache-lru-static", "generate", "streaming-input"] } gix-transport-configuration-only = { package = "gix-transport", version = "^0.36.0", path = "../gix-transport", default-features = false } gix-archive-for-configuration-only = { package = "gix-archive", version = "^0.4.0", path = "../gix-archive", optional = true, features = ["tar", "tar_gz"] } +gix-status = { version = "0.1.0", path = "../gix-status" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } anyhow = "1.0.42" thiserror = "1.0.34" diff --git a/gitoxide-core/src/repository/mod.rs b/gitoxide-core/src/repository/mod.rs index be4d4dfd0bf..c78e82edb85 100644 --- a/gitoxide-core/src/repository/mod.rs +++ b/gitoxide-core/src/repository/mod.rs @@ -40,6 +40,7 @@ pub mod mailmap; pub mod odb; pub mod remote; pub mod revision; +pub mod status; pub mod submodule; pub mod tree; pub mod verify; diff --git a/gitoxide-core/src/repository/status.rs b/gitoxide-core/src/repository/status.rs new file mode 100644 index 00000000000..2b892d53190 --- /dev/null +++ b/gitoxide-core/src/repository/status.rs @@ -0,0 +1,120 @@ +use crate::OutputFormat; +use anyhow::{bail, Context}; +use gix::bstr::{BStr, BString}; +use gix::index::Entry; +use gix::prelude::FindExt; +use gix::Progress; +use gix_status::index_as_worktree::content::FastEq; +use gix_status::index_as_worktree::Change; + +pub enum Submodules { + /// display all information about submodules, including ref changes, modifications and untracked files. + All, + /// Compare only the configuration of the superprojects commit with the actually checked out `HEAD` commit. + RefChange, + /// See if there are worktree modifications compared to the index, but do not check for untracked files. + Modifications, +} + +pub struct Options { + pub format: OutputFormat, + pub submodules: Submodules, + pub thread_limit: Option, +} + +pub fn show( + repo: gix::Repository, + pathspecs: Vec, + out: impl std::io::Write, + mut err: impl std::io::Write, + mut progress: impl gix::NestedProgress, + Options { + format, + // TODO: implement this + submodules: _, + thread_limit, + }: Options, +) -> anyhow::Result<()> { + if format != OutputFormat::Human { + bail!("Only human format is supported right now"); + } + let mut index = repo.index()?; + let index = gix::threading::make_mut(&mut index); + let pathspec = repo.pathspec( + pathspecs, + true, + index, + gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping, + )?; + let mut progress = progress.add_child("traverse index"); + let start = std::time::Instant::now(); + gix_status::index_as_worktree( + index, + repo.work_dir() + .context("This operation cannot be run on a bare repository")?, + &mut Printer(out), + FastEq, + { + let odb = repo.objects.clone().into_arc()?; + move |id, buf| odb.find_blob(id, buf) + }, + &mut progress, + pathspec.detach()?, + gix_status::index_as_worktree::Options { + fs: repo.filesystem_options()?, + thread_limit, + stat: repo.stat_options()?, + }, + )?; + + writeln!(err, "\nhead -> index and untracked files aren't implemented yet")?; + progress.show_throughput(start); + Ok(()) +} + +struct Printer(W); + +impl<'index, W> gix_status::index_as_worktree::VisitEntry<'index> for Printer +where + W: std::io::Write, +{ + type ContentChange = (); + + fn visit_entry( + &mut self, + entry: &'index Entry, + rela_path: &'index BStr, + change: Option>, + conflict: bool, + ) { + self.visit_inner(entry, rela_path, change, conflict).ok(); + } +} + +impl Printer { + fn visit_inner( + &mut self, + _entry: &Entry, + rela_path: &BStr, + change: Option>, + conflict: bool, + ) -> anyhow::Result<()> { + if let Some(change) = conflict + .then_some('U') + .or_else(|| change.as_ref().and_then(change_to_char)) + { + writeln!(&mut self.0, "{change} {rela_path}")?; + } + Ok(()) + } +} + +fn change_to_char(change: &Change<()>) -> Option { + // Known status letters: https://github.com/git/git/blob/6807fcfedab84bc8cd0fbf721bc13c4e68cda9ae/diff.h#L613 + Some(match change { + Change::Removed => 'D', + Change::Type => 'T', + Change::Modification { .. } => 'M', + Change::IntentToAdd => return None, + }) +} diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 5ae9a9e3e78..04a4fa36bcd 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -133,6 +133,33 @@ pub fn main() -> Result<()> { })?; match cmd { + Subcommands::Status(crate::plumbing::options::status::Platform { submodules, pathspec }) => prepare_and_run( + "status", + trace, + auto_verbose, + progress, + progress_keep_open, + None, + move |progress, out, err| { + use crate::plumbing::options::status::Submodules; + core::repository::status::show( + repository(Mode::Lenient)?, + pathspec, + out, + err, + progress, + core::repository::status::Options { + format, + thread_limit: thread_limit.or(cfg!(target_os = "macos").then_some(3)), // TODO: make this a configurable when in `gix`, this seems to be optimal on MacOS, linux scales though! + submodules: match submodules { + Submodules::All => core::repository::status::Submodules::All, + Submodules::RefChange => core::repository::status::Submodules::RefChange, + Submodules::Modifications => core::repository::status::Submodules::Modifications, + }, + }, + ) + }, + ), Subcommands::Submodule(platform) => match platform .cmds .unwrap_or(crate::plumbing::options::submodule::Subcommands::List) diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 54a6d8db3c8..300f1810e61 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -127,6 +127,7 @@ pub enum Subcommands { Submodule(submodule::Platform), /// Show which git configuration values are used or planned. ConfigTree, + Status(status::Platform), Config(config::Platform), #[cfg(feature = "gitoxide-core-tools-corpus")] Corpus(corpus::Platform), @@ -183,6 +184,33 @@ pub mod archive { } } +pub mod status { + use gitoxide::shared::CheckPathSpec; + use gix::bstr::BString; + + #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)] + pub enum Submodules { + /// display all information about submodules, including ref changes, modifications and untracked files. + #[default] + All, + /// Compare only the configuration of the superprojects commit with the actually checked out `HEAD` commit. + RefChange, + /// See if there are worktree modifications compared to the index, but do not check for untracked files. + Modifications, + } + + #[derive(Debug, clap::Parser)] + #[command(about = "compute repository status similar to `git status`")] + pub struct Platform { + /// Define how to display submodule status. + #[clap(long, default_value = "all")] + pub submodules: Submodules, + /// The git path specifications to list attributes for, or unset to read from stdin one per line. + #[clap(value_parser = CheckPathSpec)] + pub pathspec: Vec, + } +} + #[cfg(feature = "gitoxide-core-tools-corpus")] pub mod corpus { use std::path::PathBuf;