diff --git a/src/token/mod.rs b/src/token/mod.rs index fa241af..8dfcdd3 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -14,7 +14,9 @@ use std::str; use crate::diagnostics::{Span, Spanned}; use crate::query::When; -use crate::token::variance::invariant::{IntoNominalText, IntoStructuralText}; +use crate::token::variance::invariant::{ + BoundaryTerm, IntoNominalText, IntoStructuralText, One, Zero, +}; use crate::token::variance::ops; use crate::token::variance::{TreeExhaustiveness, TreeVariance, VarianceFold, VarianceTerm}; use crate::token::walk::{BranchFold, Fold, FoldMap, Starting, TokenEntry}; @@ -24,7 +26,7 @@ pub use crate::token::parse::{parse, ParseError, ROOT_SEPARATOR_EXPRESSION}; pub use crate::token::variance::bound::{ BoundedVariantRange, Boundedness, NaturalRange, VariantRange, }; -pub use crate::token::variance::invariant::{Breadth, Depth, Invariant, Size, Text}; +pub use crate::token::variance::invariant::{Finalize, Breadth, Depth, DepthTerm, Invariant, Size, Text}; pub use crate::token::variance::{TokenVariance, Variance}; // TODO: Tree representations of expressions are intrusive and only differ in their annotations. @@ -157,7 +159,7 @@ impl AsRef for Composition { } } -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum Boundary { Component, Separator, @@ -447,12 +449,12 @@ impl<'t, A> Token<'t, A> { pub fn variance(&self) -> TokenVariance where - TreeVariance: Fold<'t, A, Term = TokenVariance>, + TreeVariance: Fold<'t, A, Term = T::Term>, T: Invariant, { T::finalize( self.fold(TreeVariance::default()) - .unwrap_or_else(Variance::zero), + .unwrap_or_else(Zero::zero), ) } @@ -581,6 +583,7 @@ impl<'t, A> Token<'t, A> { // discard directory trees in the `FileIterator::not` combinator. pub fn is_exhaustive(&self) -> bool { self.fold(TreeExhaustiveness) + .map(Finalize::finalize) .as_ref() .map_or(false, Variance::is_exhaustive) } @@ -831,7 +834,7 @@ where Repetition<'t, A>: VarianceFold, T: Invariant, { - fn fold(&self, terms: Vec>) -> Option> { + fn fold(&self, terms: Vec) -> Option { use BranchKind::{Alternation, Concatenation, Repetition}; match self { @@ -841,7 +844,7 @@ where } } - fn finalize(&self, term: TokenVariance) -> TokenVariance { + fn finalize(&self, term: T::Term) -> T::Term { use BranchKind::{Alternation, Concatenation, Repetition}; match self { @@ -943,7 +946,7 @@ where Wildcard: VarianceTerm, T: Invariant, { - fn term(&self) -> TokenVariance { + fn term(&self) -> T::Term { use LeafKind::{Class, Literal, Separator, Wildcard}; match self { @@ -984,7 +987,7 @@ impl<'t, A> From>> for Alternation<'t, A> { } impl<'t, A> VarianceFold for Alternation<'t, A> { - fn fold(&self, terms: Vec>) -> Option> { + fn fold(&self, terms: Vec<::Term>) -> Option<::Term> { terms.into_iter().reduce(ops::disjunction) } } @@ -1083,14 +1086,14 @@ impl Class { where Archetype: VarianceTerm, T: Invariant, - F: FnMut(TokenVariance, TokenVariance) -> TokenVariance, + F: FnMut(T::Term, T::Term) -> T::Term, { T::finalize( self.archetypes() .iter() .map(Archetype::term) .reduce(f) - .unwrap_or_else(Variance::zero), + .unwrap_or_else(Zero::zero), ) } } @@ -1102,8 +1105,8 @@ impl VarianceTerm for Class { } impl VarianceTerm for Class { - fn term(&self) -> TokenVariance { - Variance::zero() + fn term(&self) -> ::Term { + Zero::zero() } } @@ -1157,42 +1160,56 @@ impl<'t, A> From>> for Concatenation<'t, A> { } impl<'t, A> VarianceFold for Concatenation<'t, A> { - fn fold(&self, terms: Vec>) -> Option> { - terms.into_iter().reduce(ops::conjunction) - } - - fn finalize(&self, term: TokenVariance) -> TokenVariance { - // Depth must consider boundary component tokens (i.e., tree wildcards). Boundary component - // tokens do not have explict separators, so additional depth terms are needed for any - // adjacent non-boundary tokens. For example, consider the pattern `a/**/b`. The `a` and - // `b` components have depth terms of zero and the tree wildcard `**` has an unbounded - // depth term. Note that the forward slashes in the expression are part of the tree - // wildcard; there are no separator tokens. The conjunction of these terms is unbounded (no - // lower bound), but the depth of this expression should have a lower bound of two. - ops::conjunction( - term, - Variance::Invariant({ - let mut sum = Depth::ZERO; - let mut term = Depth::ZERO; - for token in self.tokens() { - match token.boundary() { - Some(Boundary::Component) => { - sum = ops::conjunction(sum, term); - term = Depth::ZERO; - }, - None => { - term = Depth::ONE; - }, - _ => {}, - } - } - if sum != Depth::ZERO { - sum = ops::conjunction(sum, term); - } - sum - }), - ) - } + fn fold(&self, terms: Vec) -> Option { + //terms.into_iter().with_position().map(|(position, term)| term.close_boundary_separation(position)).reduce(ops::conjunction) + eprintln!("==> Concatenation (Conjunction) Terms"); + let sum = terms + .into_iter() + .with_position() + .map(|(position, term)| { + eprintln!("\t{:?}", term); + term.close_boundary_separation(position) + }) + .inspect(|term| eprintln!("\t{:?}\n", term)) + .reduce(ops::conjunction); + eprintln!("\tConjuction: {:?}", sum); + sum + } + + // TODO: See term finalization. Tree wildcards remain `Open`, which should produce the correct + // sum. + //fn finalize(&self, term: DepthTerm) -> DepthTerm { + // // Depth must consider boundary component tokens (i.e., tree wildcards). Boundary component + // // tokens do not have explict separators, so additional depth terms are needed for any + // // adjacent non-boundary tokens. For example, consider the pattern `a/**/b`. The `a` and + // // `b` components have depth terms of zero and the tree wildcard `**` has an unbounded + // // depth term. Note that the forward slashes in the expression are part of the tree + // // wildcard; there are no separator tokens. The conjunction of these terms is unbounded (no + // // lower bound), but the depth of this expression should have a lower bound of two. + // ops::conjunction( + // term, + // Variance::Invariant({ + // let mut sum = Depth::ZERO; + // let mut term = Depth::ZERO; + // for token in self.tokens() { + // match token.boundary() { + // Some(Boundary::Component) => { + // sum = ops::conjunction(sum, term); + // term = Depth::ZERO; + // }, + // None => { + // term = Depth::ONE; + // }, + // _ => {}, + // } + // } + // if sum != Depth::ZERO { + // sum = ops::conjunction(sum, term); + // } + // sum + // }), + // ) + //} } impl<'t, A> VarianceFold for Concatenation<'t, A> { @@ -1269,8 +1286,8 @@ impl<'t> VarianceTerm for Literal<'t> { } impl<'t> VarianceTerm for Literal<'t> { - fn term(&self) -> TokenVariance { - Variance::zero() + fn term(&self) -> ::Term { + Zero::zero() } } @@ -1341,12 +1358,17 @@ impl<'t, A> BranchComposition<'t> for Repetition<'t, A> { } impl<'t, A> VarianceFold for Repetition<'t, A> { - fn fold(&self, terms: Vec>) -> Option> { + fn fold(&self, terms: Vec<::Term>) -> Option<::Term> { terms.into_iter().reduce(ops::conjunction) } - fn finalize(&self, term: TokenVariance) -> TokenVariance { - ops::product(term, self.variance()) + fn finalize(&self, term: ::Term) -> ::Term { + //ops::product(term, self.variance()) + eprintln!("==> Repetition (Product) Term"); + eprintln!("\t{:?}\n", term); + let sum = ops::product(term, self.variance()); + eprintln!("\tProduct: {:?}", sum); + sum } } @@ -1384,8 +1406,8 @@ impl VarianceTerm for Separator { } impl VarianceTerm for Separator { - fn term(&self) -> TokenVariance { - Variance::Invariant(1.into()) + fn term(&self) -> ::Term { + BoundaryTerm::separator(One::one()) } } @@ -1451,10 +1473,10 @@ impl VarianceTerm for Wildcard { } impl VarianceTerm for Wildcard { - fn term(&self) -> TokenVariance { + fn term(&self) -> ::Term { match self { - Wildcard::Tree { .. } => Variance::unbounded(), - _ => Variance::zero(), + Wildcard::Tree { .. } => BoundaryTerm::component(Variance::unbounded()), + _ => Zero::zero(), } } } diff --git a/src/token/variance/bound.rs b/src/token/variance/bound.rs index e14e8e9..95e9f3d 100644 --- a/src/token/variance/bound.rs +++ b/src/token/variance/bound.rs @@ -2,7 +2,7 @@ use std::cmp::{self, Ordering}; use std::marker::PhantomData; use std::num::NonZeroUsize; -use crate::token::variance::invariant::{Identity, UnitBound}; +use crate::token::variance::invariant::{self, UnitBound, Zero as _}; use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::Variance; @@ -232,7 +232,7 @@ impl From for usize { } } -impl Identity for Zero { +impl invariant::Zero for Zero { fn zero() -> Self { Zero } diff --git a/src/token/variance/invariant/mod.rs b/src/token/variance/invariant/mod.rs index 46778c8..2827857 100644 --- a/src/token/variance/invariant/mod.rs +++ b/src/token/variance/invariant/mod.rs @@ -1,4 +1,5 @@ mod natural; +mod separation; mod text; use std::num::NonZeroUsize; @@ -7,7 +8,10 @@ use crate::token::variance::bound::{BoundedVariantRange, Boundedness, OpenedUppe use crate::token::variance::ops::{Conjunction, Disjunction, Product}; use crate::token::variance::TokenVariance; -pub use crate::token::variance::invariant::natural::{Depth, Size}; +pub use crate::token::variance::invariant::natural::{Depth, DepthTerm, Size}; +pub use crate::token::variance::invariant::separation::{ + BoundaryTerm, SeparatedTerm, Separation, Finalize, +}; pub use crate::token::variance::invariant::text::{IntoNominalText, IntoStructuralText, Text}; pub type InvariantBound = ::Bound; @@ -16,17 +20,23 @@ pub trait BoundedVariance { type Bound; } -pub trait Identity { +pub trait Zero { fn zero() -> Self; } -pub trait Invariant: BoundedVariance + Sized { - type Term: InvariantTerm; +pub trait One { + fn one() -> Self; +} + +// TODO: Recombine these traits? Or rename them? It's confusing that this has a bound on +// `InvariantTerm`, but its `Term` does not! +pub trait Invariant: InvariantTerm + Sized { + type Term: Zero; - fn finalize(term: TokenVariance) -> TokenVariance; + fn finalize(term: Self::Term) -> TokenVariance; } -pub trait InvariantTerm: BoundedVariance + Identity + Sized { +pub trait InvariantTerm: BoundedVariance + Zero + Sized { fn bound(lhs: Self, rhs: Self) -> Boundedness; fn into_lower_bound(self) -> Boundedness; @@ -61,16 +71,16 @@ impl BoundedVariance for Breadth { type Bound = (); } -impl Identity for Breadth { +impl Zero for Breadth { fn zero() -> Self { Breadth } } impl Invariant for Breadth { - type Term = Self; + type Term = TokenVariance; - fn finalize(term: TokenVariance) -> TokenVariance { + fn finalize(term: Self::Term) -> TokenVariance { term } } @@ -85,7 +95,7 @@ impl InvariantTerm for Breadth { } } -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct UnitBound; impl Conjunction for UnitBound { diff --git a/src/token/variance/invariant/natural.rs b/src/token/variance/invariant/natural.rs index 944dcfe..95a4454 100644 --- a/src/token/variance/invariant/natural.rs +++ b/src/token/variance/invariant/natural.rs @@ -3,16 +3,17 @@ use std::num::NonZeroUsize; use crate::token::variance::bound::{ Bounded, BoundedVariantRange, Boundedness, Unbounded, VariantRange, }; -use crate::token::variance::invariant::{BoundedVariance, Identity, Invariant, InvariantTerm}; +use crate::token::variance::invariant::separation::{ + BoundaryTerm, Finalize, SeparatedTerm, Separation, +}; +use crate::token::variance::invariant::{BoundedVariance, Invariant, InvariantTerm, One, Zero}; use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::{TokenVariance, Variance}; +use crate::token::Boundary; -macro_rules! impl_invariant_natural { +macro_rules! impl_natural_invariant_term { ($name:ident $(,)?) => { - impl_invariant_natural!($name, once => 0); - }; - ($name:ident, once => $once:expr $(,)?) => { - #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[repr(transparent)] pub struct $name(usize); @@ -69,21 +70,6 @@ macro_rules! impl_invariant_natural { } } - impl Identity for $name { - fn zero() -> Self { - $name(0) - } - } - - impl Invariant for $name { - type Term = Self; - - fn finalize(term: TokenVariance) -> TokenVariance { - // TODO: This is incorrect. - term.map_invariant(|term| ops::conjunction(term, $name($once))) - } - } - impl InvariantTerm for $name { fn bound(lhs: Self, rhs: Self) -> Boundedness { let [lower, upper] = crate::minmax(lhs, rhs); @@ -99,6 +85,12 @@ macro_rules! impl_invariant_natural { } } + impl One for $name { + fn one() -> Self { + $name(1) + } + } + impl PartialEq for $name { fn eq(&self, rhs: &usize) -> bool { self.0 == *rhs @@ -117,11 +109,15 @@ macro_rules! impl_invariant_natural { type Output = TokenVariance; fn product(self, rhs: VariantRange) -> Self::Output { - NonZeroUsize::new(self.0) - .map_or_else( - TokenVariance::::zero, - |lhs| Variance::Variant(rhs.map_bounded(|rhs| ops::product(rhs, lhs))), - ) + NonZeroUsize::new(self.0).map_or_else(TokenVariance::::zero, |lhs| { + Variance::Variant(rhs.map_bounded(|rhs| ops::product(rhs, lhs))) + }) + } + } + + impl Zero for $name { + fn zero() -> Self { + $name(0) } } @@ -142,11 +138,52 @@ macro_rules! impl_invariant_natural { } }; } -impl_invariant_natural!(Depth, once => 1); -impl_invariant_natural!(Size); +impl_natural_invariant_term!(Depth); +impl_natural_invariant_term!(Size); + +impl Invariant for Depth { + type Term = DepthTerm; + + fn finalize(term: Self::Term) -> TokenVariance { + term.finalize() + } +} + +impl Invariant for Size { + type Term = TokenVariance; + + fn finalize(term: Self::Term) -> TokenVariance { + term + } +} + +pub type DepthTerm = BoundaryTerm; impl TokenVariance { pub fn is_exhaustive(&self) -> bool { !self.has_upper_bound() } } + +impl Finalize for SeparatedTerm> { + type Output = TokenVariance; + + fn finalize(self) -> Self::Output { + use Boundary::Component; + use Separation::{Closed, First, Last, Open}; + use Variance::Invariant; + + let SeparatedTerm(separation, term) = self; + match separation { + Open(Some(Component)) | First | Last => term, + Open(_) => ops::conjunction(term, Invariant(Depth::ONE)), + Closed => term.map_invariant(|term| Depth(term.0.saturating_sub(1))), + } + } +} + +impl BoundaryTerm { + pub fn is_exhaustive(&self) -> bool { + self.clone().finalize().is_exhaustive() + } +} diff --git a/src/token/variance/invariant/separation.rs b/src/token/variance/invariant/separation.rs new file mode 100644 index 0000000..e63dd32 --- /dev/null +++ b/src/token/variance/invariant/separation.rs @@ -0,0 +1,432 @@ +use itertools::{Itertools, Position}; +use std::collections::HashSet; +use std::hash::Hash; + +use crate::token::variance::bound::Boundedness; +use crate::token::variance::invariant::{BoundedVariance, Invariant, One, Zero}; +use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; +use crate::token::variance::{TokenVariance, Variance}; +use crate::token::{Boundary, Composition}; + +pub trait Finalize: Sized { + type Output; + + fn finalize(self) -> Self::Output; +} + +pub type BoundaryTerm = TreeTerm>>; + +impl BoundaryTerm +where + TokenVariance: Eq + Hash, + T: BoundedVariance, +{ + pub fn separator(term: TokenVariance) -> Self { + BoundaryTerm::Conjunctive(SeparatedTerm::separator(term)) + } + + pub fn component(term: TokenVariance) -> Self { + BoundaryTerm::Conjunctive(SeparatedTerm::component(term)) + } + + pub fn close_boundary_separation(self, position: Position) -> Self { + match self { + BoundaryTerm::Conjunctive(term) => BoundaryTerm::Conjunctive(term.close_boundary_separation(position)), + // TODO: Is it necessary to close boundary separations in disjunctive terms? + BoundaryTerm::Disjunctive(term) => BoundaryTerm::Disjunctive(term.remap(|term| term.close_boundary_separation(position))), + } + } + + pub fn as_variance(&self) -> Variance<&T, Option<&Boundedness>> { + match self { + BoundaryTerm::Conjunctive(ref term) => term.1.as_ref().map_variant(Some), + BoundaryTerm::Disjunctive(_) => Variance::Variant(None), + } + } +} + +pub type TreeTerm = Composition>; + +impl Conjunction for TreeTerm +where + DisjunctiveTerm: Conjunction> + + Conjunction>, + T: Conjunction + + Conjunction, Output = DisjunctiveTerm> + + Eq + + Hash, +{ + type Output = Self; + + fn conjunction(self, rhs: Self) -> Self::Output { + use Composition::{Conjunctive, Disjunctive}; + + match (self, rhs) { + (Conjunctive(lhs), Conjunctive(rhs)) => Conjunctive(ops::conjunction(lhs, rhs)), + (Conjunctive(lhs), Disjunctive(rhs)) => Disjunctive(ops::conjunction(lhs, rhs)), + (Disjunctive(lhs), Conjunctive(rhs)) => Disjunctive(ops::conjunction(lhs, rhs)), + (Disjunctive(lhs), Disjunctive(rhs)) => Disjunctive(ops::conjunction(lhs, rhs)), + } + } +} + +impl Disjunction for TreeTerm +where + DisjunctiveTerm: Disjunction> + + Disjunction, Output = DisjunctiveTerm>, + T: Disjunction> + + Disjunction, Output = DisjunctiveTerm> + + Eq + + Hash, +{ + type Output = Self; + + fn disjunction(self, rhs: Self) -> Self::Output { + use Composition::{Conjunctive, Disjunctive}; + + Disjunctive(match (self, rhs) { + (Conjunctive(lhs), Conjunctive(rhs)) => ops::disjunction(lhs, rhs), + (Conjunctive(lhs), Disjunctive(rhs)) => ops::disjunction(lhs, rhs), + (Disjunctive(lhs), Conjunctive(rhs)) => ops::disjunction(lhs, rhs), + (Disjunctive(lhs), Disjunctive(rhs)) => ops::disjunction(lhs, rhs), + }) + } +} + +impl Finalize for TreeTerm +where + DisjunctiveTerm: Finalize, + T: Eq + Finalize + Hash, +{ + type Output = T::Output; + + fn finalize(self) -> Self::Output { + match self { + TreeTerm::Conjunctive(term) => term.finalize(), + TreeTerm::Disjunctive(term) => term.finalize(), + } + } +} + +impl One for TreeTerm +where + T: Eq + Hash + One, +{ + fn one() -> Self { + TreeTerm::Conjunctive(T::one()) + } +} + +impl Product for TreeTerm +where + T: Eq + Hash + Product, + T::Output: Eq + Hash, + U: Clone, +{ + type Output = TreeTerm; + + fn product(self, rhs: U) -> Self::Output { + match self { + TreeTerm::Conjunctive(lhs) => TreeTerm::Conjunctive(ops::product(lhs, rhs)), + TreeTerm::Disjunctive(lhs) => TreeTerm::Disjunctive(ops::product(lhs, rhs)), + } + } +} + +impl Zero for TreeTerm +where + T: Eq + Hash + Zero, +{ + fn zero() -> Self { + TreeTerm::Conjunctive(T::zero()) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct DisjunctiveTerm(HashSet) +where + T: Eq + Hash; + +impl DisjunctiveTerm +where + T: Eq + Hash, +{ + fn remap(self, f: F) -> DisjunctiveTerm + where + U: Eq + Hash, + F: FnMut(T) -> U, + { + DisjunctiveTerm(self.0.into_iter().map(f).collect()) + } +} + +impl Conjunction for DisjunctiveTerm +where + T: Clone + Conjunction + Eq + Hash, +{ + type Output = Self; + + fn conjunction(self, rhs: Self) -> Self::Output { + DisjunctiveTerm( + self.0 + .into_iter() + .cartesian_product(rhs.0.iter()) + .map(|(lhs, rhs)| ops::conjunction(lhs, rhs.clone())) + .collect(), + ) + } +} + +impl Conjunction for DisjunctiveTerm +where + T: Clone + Conjunction + Eq + Hash, +{ + type Output = Self; + + fn conjunction(self, rhs: T) -> Self::Output { + self.remap(|lhs| ops::conjunction(lhs, rhs.clone())) + } +} + +// This is the same as the above `Conjunction` implementation, but with transposed operand order. +// However, it is limited to `SeparatedTerm`s (rather than any `T`), because otherwise this +// implementation conflicts with an implementation for `UnitBound`. +impl Conjunction>> for SeparatedTerm +where + SeparatedTerm: Clone + Conjunction> + Eq + Hash, +{ + type Output = DisjunctiveTerm>; + + fn conjunction(self, rhs: DisjunctiveTerm>) -> Self::Output { + rhs.remap(|rhs| ops::conjunction(self.clone(), rhs)) + } +} + +impl Disjunction for DisjunctiveTerm +where + T: Eq + Hash, +{ + type Output = Self; + + fn disjunction(self, rhs: Self) -> Self::Output { + DisjunctiveTerm(self.0.into_iter().chain(rhs.0).collect()) + } +} + +impl Disjunction for DisjunctiveTerm +where + T: Eq + Hash, +{ + type Output = Self; + + fn disjunction(mut self, rhs: T) -> Self::Output { + self.0.insert(rhs); + self + } +} + +impl Disjunction> for T +where + T: Eq + Hash, +{ + type Output = DisjunctiveTerm; + + fn disjunction(self, mut rhs: DisjunctiveTerm) -> Self::Output { + rhs.0.insert(self); + rhs + } +} + +impl Finalize for DisjunctiveTerm +where + T: Eq + Finalize + Hash, + T::Output: Disjunction + Zero, +{ + type Output = T::Output; + + fn finalize(self) -> Self::Output { + self.0 + .into_iter() + .map(Finalize::finalize) + .reduce(ops::disjunction) + .unwrap_or_else(Zero::zero) + } +} + +impl Product for DisjunctiveTerm +where + T: Eq + Hash + Product, + T::Output: Eq + Hash, + U: Clone, +{ + type Output = DisjunctiveTerm; + + fn product(self, rhs: U) -> Self::Output { + self.remap(|lhs| ops::product(lhs, rhs.clone())) + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum Separation { + // TODO: While additional information is needed to close `Open` for separators, more general + // boundary information appears to be unnecessary. More importantly, this is insufficient + // to handle the "coalescence" of tree wildcards! This information likely needs to be + // pushed up. One algebra that seems to work is to immediately finalize tree wildcard + // terms in conjunctions (prior to performing the conjunction). The `Separation` is + // otherwise unaffected: tree wildcards are either `Last` or `Closed` and are always + // unbounded. They "just" have an implicit +1 in conjunctions for depth. This extra +1 + // ought to be specified per invariant rather than generalized for all `BoundaryTerm`s. + Open(Option), + First, + Last, + Closed, +} + +impl Conjunction for Separation { + type Output = Self; + + fn conjunction(self, rhs: Self) -> Self::Output { + use Separation::{Closed, First, Last, Open}; + + match (self, rhs) { + (Closed, Closed) | (First, Last | Closed) | (Closed, Last) => Closed, + (Last | Open(_), Closed) | (Last | Open(_), Last) => Last, + (Closed, First | Open(_)) | (First, First | Open(_)) => First, + (Open(_), First | Open(_)) | (Last, First | Open(_)) => Open(None), + } + } +} + +//impl Disjunction for Separation { +// type Output = Self; +// +// fn disjunction(self, rhs: Self) -> Self::Output { +// use Separation::{First, Last, Terminal, Unit}; +// +// match (self, rhs) { +// (Terminal, _) | (_, Terminal) | (First, Last) | (Last, First) => Terminal, +// (Last, _) | (_, Last) => Last, +// (First, _) | (_, First) => First, +// _ => Unit, +// } +// } +//} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct SeparatedTerm(pub Separation, pub T); + +impl SeparatedTerm { + pub fn close_boundary_separation(self, position: Position) -> Self { + use Boundary::{Component, Separator}; + use Position::{First, Middle, Last, Only}; + + let SeparatedTerm(separation, term) = self; + SeparatedTerm(match separation { + separation @ Separation::Open(Some(boundary)) => match (boundary, position) { + (Component, _) | (_, Only) => separation, + (Separator, First) => Separation::First, + (Separator, Middle) => Separation::Open(None), + (Separator, Last) => Separation::Last, + }, + separation => separation, + }, term) + } +} + +impl SeparatedTerm { + pub fn separator(term: T) -> Self { + SeparatedTerm(Separation::Open(Some(Boundary::Separator)), term) + } + + pub fn component(term: T) -> Self { + SeparatedTerm(Separation::Open(Some(Boundary::Component)), term) + } + + pub fn into_inner(self) -> T { + self.1 + } +} + +impl Conjunction> for SeparatedTerm +where + T: Conjunction, +{ + type Output = SeparatedTerm; + + fn conjunction(self, rhs: SeparatedTerm) -> Self::Output { + SeparatedTerm( + ops::conjunction(self.0, rhs.0), + ops::conjunction(self.1, rhs.1), + ) + } +} + +//impl Disjunction> for Separated +//where +// Self: Finalize, +// Separated: Finalize>, +// T: Disjunction, +//{ +// type Output = Separated; +// +// fn disjunction(self, rhs: Separated) -> Self::Output { +// let lhs = self.finalize(); +// let rhs = rhs.finalize(); +// Separated( +// ops::disjunction(lhs.0, rhs.0), +// ops::disjunction(lhs.1, rhs.1), +// ) +// } +//} + +impl Disjunction for SeparatedTerm +where + T: Eq + Hash, +{ + type Output = DisjunctiveTerm; + + fn disjunction(self, rhs: Self) -> Self::Output { + DisjunctiveTerm([self, rhs].into_iter().collect()) + } +} + +//impl Finalize for Separated> +//where +// T: BoundedVariance + Invariant, +//{ +// type Output = Self; +// +// fn finalize(self) -> Self { +// let separation = self.0; +// Separated(separation, T::finalize(self)) +// } +//} + +impl One for SeparatedTerm +where + T: One, +{ + fn one() -> Self { + SeparatedTerm(Separation::Open(None), T::one()) + } +} + +impl Product for SeparatedTerm +where + T: Product, +{ + type Output = SeparatedTerm; + + fn product(self, rhs: U) -> Self::Output { + SeparatedTerm(self.0, ops::product(self.1, rhs)) + } +} + +impl Zero for SeparatedTerm +where + T: Zero, +{ + fn zero() -> Self { + SeparatedTerm(Separation::Open(None), T::zero()) + } +} diff --git a/src/token/variance/invariant/text.rs b/src/token/variance/invariant/text.rs index edb028b..66e2291 100644 --- a/src/token/variance/invariant/text.rs +++ b/src/token/variance/invariant/text.rs @@ -5,7 +5,7 @@ use std::num::NonZeroUsize; use crate::encode; use crate::token::variance::bound::{Boundedness, VariantRange}; use crate::token::variance::invariant::{ - BoundedVariance, Identity, Invariant, InvariantTerm, UnitBound, + BoundedVariance, Invariant, InvariantTerm, UnitBound, Zero, }; use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::{TokenVariance, Variance}; @@ -172,16 +172,16 @@ impl<'t> From> for Text<'t> { } } -impl<'t> Identity for Text<'t> { +impl<'t> Zero for Text<'t> { fn zero() -> Self { Text::new() } } impl<'t> Invariant for Text<'t> { - type Term = Self; + type Term = TokenVariance; - fn finalize(term: TokenVariance) -> TokenVariance { + fn finalize(term: Self::Term) -> TokenVariance { term } } diff --git a/src/token/variance/mod.rs b/src/token/variance/mod.rs index 60cfac3..33a7127 100644 --- a/src/token/variance/mod.rs +++ b/src/token/variance/mod.rs @@ -11,7 +11,8 @@ use crate::token::variance::bound::{ VariantRange, }; use crate::token::variance::invariant::{ - Breadth, Depth, Identity, Invariant, InvariantBound, InvariantTerm, Text, UnitBound, + BoundaryTerm, Breadth, Depth, DepthTerm, Invariant, InvariantBound, InvariantTerm, One, + SeparatedTerm, Separation, Text, UnitBound, Zero, }; use crate::token::variance::ops::{Conjunction, Disjunction, Product}; use crate::token::walk::{ChildToken, Fold, Forward, ParentToken, Sequencer}; @@ -23,34 +24,27 @@ pub trait VarianceTerm where T: Invariant, { - fn term(&self) -> TokenVariance; + fn term(&self) -> T::Term; } pub trait VarianceFold where T: Invariant, { - fn fold(&self, terms: Vec>) -> Option>; + fn fold(&self, terms: Vec) -> Option; - fn finalize(&self, term: TokenVariance) -> TokenVariance { + fn finalize(&self, term: T::Term) -> T::Term { term } } -#[derive(Clone, Copy, Debug, Eq)] +#[derive(Clone, Copy, Debug, Eq, Hash)] pub enum Variance> { Invariant(T), Variant(B), } impl Variance { - pub fn zero() -> Self - where - T: Identity, - { - Variance::Invariant(T::zero()) - } - pub fn map_invariant(self, f: F) -> Variance where F: FnOnce(T) -> U, @@ -214,6 +208,15 @@ where } } +impl One for Variance +where + T: One, +{ + fn one() -> Self { + Variance::Invariant(T::one()) + } +} + impl PartialEq for Variance where T: PartialEq, @@ -259,6 +262,15 @@ where } } +impl Zero for Variance +where + T: Zero, +{ + fn zero() -> Self { + Variance::Invariant(T::zero()) + } +} + pub struct TreeVariance(PhantomData T>); impl Default for TreeVariance { @@ -274,7 +286,7 @@ where T: Invariant, { type Sequencer = Forward; - type Term = TokenVariance; + type Term = T::Term; fn sequencer() -> Self::Sequencer { Forward @@ -318,7 +330,7 @@ impl Sequencer for TreeExhaustiveness { impl<'t, A> Fold<'t, A> for TreeExhaustiveness { type Sequencer = Self; - type Term = TokenVariance; + type Term = DepthTerm; fn sequencer() -> Self::Sequencer { Self @@ -356,8 +368,11 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { }) .into_inner(); if !all && any { - return Some(TokenVariance::::Variant(Bounded( - BoundedVariantRange::Upper(unsafe { NonZeroUsize::new_unchecked(1) }), + return Some(BoundaryTerm::Conjunctive(SeparatedTerm( + Separation::Open(None), + TokenVariance::::Variant(Bounded(BoundedVariantRange::Upper(unsafe { + NonZeroUsize::new_unchecked(1) + }))), ))); } } @@ -371,11 +386,11 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { else { // Terms were discarded, meaning some non-depth quantity was bounded. Yield any sum // only if the depth is exhaustive, otherwise zero. - if sum.as_ref().map_or(false, Variance::is_exhaustive) { + if sum.as_ref().map_or(false, DepthTerm::is_exhaustive) { sum } else { - Some(Variance::zero()) + Some(Zero::zero()) } } } @@ -384,7 +399,7 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { use Variance::{Invariant, Variant}; match branch { - branch @ BranchKind::Repetition(_) => match term { + branch @ BranchKind::Repetition(_) => match term.as_variance() { // When folding terms into a repetition, only finalize variant terms and the // multiplicative identity and annihilator (one and zero). This is necessary, // because natural bounds do not express the subset nor relationship of matched @@ -392,7 +407,7 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { // depth, but only matches paths with a depth that is a multiple of two and so is // nonexhaustive. However, the similar pattern `<*/>` is exhaustive and matches any // sub-tree of a match. - Invariant(Depth::ZERO) | Invariant(Depth::ONE) | Variant(_) => { + Invariant(&Depth::ZERO) | Invariant(&Depth::ONE) | Variant(_) => { self::finalize::(branch, term) }, _ => term, @@ -406,27 +421,21 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { } } -pub fn fold( - token: &impl VarianceFold, - terms: Vec>, -) -> Option> +pub fn fold(token: &impl VarianceFold, terms: Vec) -> Option where T: Invariant, { token.fold(terms) } -pub fn finalize( - token: &impl VarianceFold, - term: TokenVariance, -) -> TokenVariance +pub fn finalize(token: &impl VarianceFold, term: T::Term) -> T::Term where T: Invariant, { token.finalize(term) } -pub fn term(token: &impl VarianceTerm) -> TokenVariance +pub fn term(token: &impl VarianceTerm) -> T::Term where T: Invariant, { @@ -456,7 +465,7 @@ pub mod harness { Variance::Variant(UnitBound.into()) } - pub fn range(lower: usize, upper: Option) -> TokenVariance + pub fn range(lower: usize, upper: impl Into>) -> TokenVariance where T: From + Invariant, { @@ -468,7 +477,7 @@ pub mod harness { expected: TokenVariance, ) -> Tokenized<'t, A> where - TreeVariance: Fold<'t, A, Term = TokenVariance>, + TreeVariance: Fold<'t, A, Term = T::Term>, T: Debug + Eq + Invariant, T::Bound: Debug + Eq, { @@ -511,16 +520,35 @@ mod tests { #[case("a", harness::invariant(1))] #[case("a/", harness::invariant(1))] #[case("a/b", harness::invariant(2))] + #[case("(?i)a(?-i)b", harness::invariant(1))] + #[case("{a/b}", harness::invariant(2))] + #[case("{a,a/b}", harness::range(1, 2))] + #[case("x{a,a/b}", harness::range(1, 2))] + #[case("x/{a,a/b}", harness::range(2, 3))] + #[case("{a,a/b}x", harness::range(1, 2))] + #[case("{a,a/b}/x", harness::range(2, 3))] #[case("", harness::invariant(3))] + #[case("", harness::range(0, 3))] + #[case("x", harness::invariant(3))] + #[case("x", harness::invariant(4))] + #[case("*", harness::invariant(1))] + #[case("a/*", harness::invariant(2))] + #[case("a/*/b", harness::invariant(3))] + #[case("*/a", harness::invariant(2))] + #[case("{a,*}", harness::invariant(1))] + #[case("*", harness::range(2, None))] + #[case("<*/>*", harness::range(1, None))] + #[case("</>*", harness::range(1, None))] + #[case("*", harness::range(1, None))] + #[case("**", Variance::unbounded())] #[case("a/**", harness::range(1, None))] #[case("a/**/b", harness::range(2, None))] - #[case("a/**/b/**/c", harness::range(3, None))] - #[case("*", harness::range(1, None))] - #[case("{a/b/,c/**/}*.ext", harness::range(2, None))] - #[case("**", Variance::unbounded())] - #[case("<*/>*", Variance::unbounded())] - #[case("</>*", Variance::unbounded())] - #[case("*", Variance::unbounded())] + #[case("a/<*/>b", harness::range(2, None))] + //#[case("a/**/b/**/c", harness::range(3, None))] + #[case("*/**", harness::range(1, None))] + #[case("**/*", harness::range(1, None))] + #[case("**/*/**", harness::range(1, None))] + //#[case("{a/b/,c/**/}*.ext", harness::range(2, None))] fn parse_expression_depth_variance_eq( #[case] expression: &str, #[case] expected: TokenVariance,