Skip to content

Commit

Permalink
[wip] Always reference the target path of a walk.
Browse files Browse the repository at this point in the history
This change removes much of the awareness of prefixes and adjusted roots
in patterns out of the base walk implementation over paths. Root and
relative paths are now more consistent, referring to the walked `Path`
or, similarly, the path given to `Glob::walk` functions. There is only
one exception: a rooted pattern replaces the path given to `Glob::walk`.
However, this is opaque in the API, though is documented.

Depth too is now more consistently applied. Prefixes in patterns are no
longer exposed (and `GlobWalker` has been removed from public APIs). To
allow for more control over depth behaviors, `Glob` will instead expose
depth variance, which can then be used to construct a `DepthBehavior`.
This has not yet been implemented though.
  • Loading branch information
olson-sean-k committed Mar 15, 2024
1 parent e43d693 commit 03d82b9
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 289 deletions.
14 changes: 7 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,13 +601,6 @@ pub struct Glob<'t> {
}

impl<'t> Glob<'t> {
fn compile<T>(tree: impl Borrow<T>) -> Result<Regex, CompileError>
where
T: ConcatenationTree<'t>,
{
encode::compile(tree)
}

// TODO: Document pattern syntax in the crate documentation and refer to it here.
/// Constructs a [`Glob`] from a glob expression.
///
Expand Down Expand Up @@ -791,6 +784,13 @@ impl<'t> Glob<'t> {
pub fn is_empty(&self) -> bool {
self.tree.as_ref().as_token().is_empty()
}

fn compile<T>(tree: impl Borrow<T>) -> Result<Regex, CompileError>
where
T: ConcatenationTree<'t>,
{
encode::compile(tree)
}
}

impl Display for Glob<'_> {
Expand Down
2 changes: 1 addition & 1 deletion src/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::token::walk::{BranchFold, Fold, FoldMap, Starting, TokenEntry};
use crate::{StrExt as _, PATHS_ARE_CASE_INSENSITIVE};

pub use crate::token::parse::{parse, ParseError, ROOT_SEPARATOR_EXPRESSION};
pub use crate::token::variance::bound::{NaturalRange, VariantRange};
pub use crate::token::variance::bound::NaturalRange;
pub use crate::token::variance::invariant::{Breadth, Depth, GlobVariance, Invariant, Size, Text};
pub use crate::token::variance::Variance;

Expand Down
217 changes: 87 additions & 130 deletions src/walk/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,39 +147,31 @@ impl<'t> Glob<'t> {
directory: impl Into<PathBuf>,
behavior: impl Into<WalkBehavior>,
) -> impl 'static + FileIterator<Entry = GlobEntry> {
self.walker(directory).walk_with_behavior(behavior)
}

/// Gets an iterator builder over matching files in a directory tree.
///
/// This function gets an intermediate walker that describes iteration over matching files and
/// provides paths prior to iteration. In particular, `walker` can be used when the root
/// directory of the walk is needed. **The root directory may differ from the directory passed
/// to walking functions.**
///
/// See [`Glob::walk`].
///
/// # Examples
///
/// ```rust,no_run
/// use wax::walk::Entry;
/// use wax::Glob;
///
/// let glob = Glob::new("**/*.{log,txt}").unwrap();
/// let walker = glob.walker("/var/log");
/// let root = walker.root_prefix_paths().0.to_path_buf();
/// for entry in walker.walk() {
/// let entry = entry.unwrap();
/// println!("Log: {:?}", entry.path());
/// }
/// ```
///
/// [`Glob::walk`]: crate::Glob::walk
pub fn walker(&self, directory: impl Into<PathBuf>) -> GlobWalker {
GlobWalker {
anchor: self.anchor(directory),
program: WalkProgram::from_glob(self),
program: WalkProgram {
complete: self.program.clone(),
// Do not compile component programs for empty globs.
//
// An empty glob consists solely of an empty literal token and only matches empty text
// (""). A walk program compiled from such a glob has an empty component pattern and
// matches nothing. This means that walking an empty glob never yields any paths. At
// first blush, this seems consistent with an empty glob. However, walking conceptually
// matches a glob against the subtrees in a path and there is arguably an implicit
// empty tree. This is also more composable when partitioning and (re)building paths.
//
// The result is that matching an empty glob against the path `foo` yields `foo` and
// only `foo` (assuming that the path exists).
components: if self.is_empty() {
vec![]
}
else {
WalkProgram::compile::<Tokenized<_>>(self.tree.as_ref())
.expect("failed to compile walk program")
},
},
}
.walk_with_behavior(behavior)
}

fn anchor(&self, directory: impl Into<PathBuf>) -> Anchor {
Expand Down Expand Up @@ -215,24 +207,27 @@ impl<'t> Glob<'t> {
}

/// Root path and pivot of a `Glob` when walking a particular directory.
///
/// For unrooted globs, the pivot can be used to isolate the target directory given to walk
/// functions like `Glob::walk`. This is necessary to implement `Entry` and for interpreting depth
/// behavior, which is always relative to the target directory (and ignores any invariant prefix in
/// a glob).
#[derive(Clone, Debug)]
struct Anchor {
/// The root (starting) path of the walk.
/// The root path of the walk.
///
/// This root, unlike in `PathExt::walk`, may include an invariant prefix from a glob.
root: PathBuf,
/// The number of components from the end of `root` that are present in any invariant prefix of
/// the glob expression.
///
/// The pivot partitions the root path into a target directory and any invariant prefix in the
/// `Glob` (this prefix becomes a postfix in the root path or, when rooted, replaces any target
/// directory).
/// The pivot partitions the root path into the target directory and any invariant prefix in
/// the `Glob` (this prefix becomes a postfix in the root path or, when rooted, replaces any
/// target directory).
pivot: usize,
}

impl Anchor {
pub fn root_prefix_paths(&self) -> (&Path, &Path) {
self.root.split_at_depth(self.pivot)
}

pub fn walk_with_behavior(self, behavior: impl Into<WalkBehavior>) -> WalkTree {
WalkTree::with_pivot_and_behavior(self.root, self.pivot, behavior)
}
Expand Down Expand Up @@ -260,84 +255,16 @@ impl WalkProgram {
}
Ok(regexes)
}

fn from_glob(glob: &Glob<'_>) -> Self {
WalkProgram {
complete: glob.program.clone(),
// Do not compile component programs for empty globs.
//
// An empty glob consists solely of an empty literal token and only matches empty text
// (""). A walk program compiled from such a glob has an empty component pattern and
// matches nothing. This means that walking an empty glob never yields any paths. At
// first blush, this seems consistent with an empty glob. However, walking conceptually
// matches a glob against the subtrees in a path and there is arguably an implicit
// empty tree. This is also more composable when partitioning and (re)building paths.
//
// The result is that matching an empty glob against the path `foo` yields `foo` and
// only `foo` (assuming that the path exists).
components: if glob.is_empty() {
vec![]
}
else {
WalkProgram::compile::<Tokenized<_>>(glob.tree.as_ref())
.expect("failed to compile glob component expressions")
},
}
}
}

/// Describes iteration over matching files in a directory tree.
///
/// A walker provides the paths walked by a [`Glob`] prior to iteration, most notably the [root
/// path][`GlobWalker::root_prefix_paths`], which may differ from the directory passed to walking
/// functions. When ready, it can be converted into an iterator over matching files.
///
/// See [`Glob::walker`].
///
/// [`Glob`]: crate::Glob
/// [`Glob::walker`]: crate::Glob::walker
/// [`GlobWalker::root_prefix_paths`]: crate::walk::GlobWalker::root_prefix_paths
#[derive(Clone, Debug)]
pub struct GlobWalker {
struct GlobWalker {
anchor: Anchor,
program: WalkProgram,
}

impl GlobWalker {
/// Gets the root and prefix paths.
///
/// The root path is the path to the walked directory tree. **This path may differ from the
/// directory passed to walking functions like [`Glob::walk`]**, because it may incorporate an
/// invariant path prefix from the glob expression.
///
/// The prefix path is the invariant path prefix of the glob expression. This path may be empty
/// and is always a suffix of the root path.
///
/// The following table describes some example paths when using [`Glob::walk`].
///
/// | Glob Expression | Directory | Root | Prefix |
/// |---------------------------|--------------|--------------|------------|
/// | `**/*.txt` | `/home/user` | `/home/user` | |
/// | `projects/**/src/**/*.rs` | `.` | `./projects` | `projects` |
/// | `/var/log/**/*.log` | `.` | `/var/log` | `/var/log` |
///
/// See also [`Entry::root_relative_paths`].
///
/// [`Entry::root_relative_paths`]: crate::walk::Entry::root_relative_paths
/// [`Glob::walk`]: crate::Glob::walk
pub fn root_prefix_paths(&self) -> (&Path, &Path) {
self.anchor.root_prefix_paths()
}

/// Converts a walker into an iterator over matching files in its directory tree.
///
/// See [`Glob::walk`].
///
/// [`Glob::walk`]: crate::Glob::walk
pub fn walk(self) -> impl 'static + FileIterator<Entry = GlobEntry> {
self.walk_with_behavior(WalkBehavior::default())
}

/// Converts a walker into an iterator over matching files in its directory tree.
///
/// See [`Glob::walk_with_behavior`].
Expand All @@ -347,6 +274,7 @@ impl GlobWalker {
self,
behavior: impl Into<WalkBehavior>,
) -> impl 'static + FileIterator<Entry = GlobEntry, Residue = TreeEntry> {
let pivot = self.anchor.pivot;
self.anchor
.walk_with_behavior(behavior)
.filter_map_tree(move |cancellation, separation| {
Expand All @@ -364,7 +292,7 @@ impl GlobWalker {
_ => unreachable!(),
};
let entry = filtrate.as_ref();
let (_, path) = entry.root_relative_paths();
let (_, path) = self::root_relative_paths(entry.path(), entry.depth(), pivot);
let depth = entry.depth().saturating_sub(1);
for (position, candidate) in path
.components()
Expand Down Expand Up @@ -395,7 +323,13 @@ impl GlobWalker {
.map(MatchedText::into_owned)
{
filtrate
.map(|entry| Ok(GlobEntry { entry, matched }))
.map(|entry| {
Ok(GlobEntry {
entry,
pivot,
matched,
})
})
.into()
}
else {
Expand All @@ -418,7 +352,13 @@ impl GlobWalker {
.map(MatchedText::into_owned)
{
filtrate
.map(|entry| Ok(GlobEntry { entry, matched }))
.map(|entry| {
Ok(GlobEntry {
entry,
pivot,
matched,
})
})
.into()
}
else {
Expand All @@ -441,7 +381,13 @@ impl GlobWalker {
.map(MatchedText::into_owned)
{
return filtrate
.map(|entry| Ok(GlobEntry { entry, matched }))
.map(|entry| {
Ok(GlobEntry {
entry,
pivot,
matched,
})
})
.into();
}
filtrate.filter_node().into()
Expand All @@ -465,22 +411,7 @@ enum FilterAnyProgram {
}

impl FilterAnyProgram {
fn compile<'t, I>(tokens: I) -> Result<Option<Regex>, BuildError>
where
I: IntoIterator,
I::Item: Pattern<'t>,
I::IntoIter: ExactSizeIterator,
{
let tokens = tokens.into_iter();
if 0 == tokens.len() {
Ok(None)
}
else {
crate::any(tokens).map(|any| Some(any.program))
}
}

fn try_from_partitions<'t, I>(exhaustive: I, nonexhaustive: I) -> Result<Self, BuildError>
pub fn try_from_partitions<'t, I>(exhaustive: I, nonexhaustive: I) -> Result<Self, BuildError>
where
I: IntoIterator,
I::Item: Pattern<'t>,
Expand Down Expand Up @@ -524,6 +455,21 @@ impl FilterAnyProgram {
_ => None,
}
}

fn compile<'t, I>(tokens: I) -> Result<Option<Regex>, BuildError>
where
I: IntoIterator,
I::Item: Pattern<'t>,
I::IntoIter: ExactSizeIterator,
{
let tokens = tokens.into_iter();
if 0 == tokens.len() {
Ok(None)
}
else {
crate::any(tokens).map(|any| Some(any.program))
}
}
}

/// Negated glob combinator that efficiently filters file entries against patterns.
Expand Down Expand Up @@ -588,6 +534,7 @@ impl FilterAny {
#[derive(Debug)]
pub struct GlobEntry {
entry: TreeEntry,
pivot: usize,
matched: MatchedText<'static>,
}

Expand Down Expand Up @@ -622,7 +569,7 @@ impl Entry for GlobEntry {
}

fn root_relative_paths(&self) -> (&Path, &Path) {
self.entry.root_relative_paths()
self::root_relative_paths(self.path(), self.entry.depth(), self.pivot)
}

fn file_type(&self) -> FileType {
Expand All @@ -633,9 +580,11 @@ impl Entry for GlobEntry {
self.entry.metadata().map_err(WalkError::from)
}

// TODO: This needs some work and requires some explanation when applied to globs.
fn depth(&self) -> usize {
self.entry.depth()
self.entry
.depth()
.checked_add(self.pivot)
.expect("overflow determining depth")
}
}

Expand All @@ -644,3 +593,11 @@ impl From<GlobEntry> for TreeEntry {
entry.entry
}
}

fn root_relative_paths(path: &Path, depth: usize, pivot: usize) -> (&Path, &Path) {
path.split_at_depth(
depth
.checked_add(pivot)
.expect("overflow determining root and relative paths"),
)
}
Loading

0 comments on commit 03d82b9

Please sign in to comment.