From 828e7cf9c2300ca71f19c45426172db723855e50 Mon Sep 17 00:00:00 2001 From: Sean Olson Date: Mon, 25 Mar 2024 10:24:44 -0700 Subject: [PATCH] Introduce `BoundaryTerm`s. This (very WIP) change introduces general `BoundaryTerm`s, which associat boundary information in variance terms in folds. This allows for a more complete algebra that is aware of separation terminals as terms are summed. Depth bounds are now correct (AFAICT). However, this still does not properly support the type of "coalescence" needed for tree wildcards. This needs a lot of cleaning up and reorganizing! Names, module hierarchy, oh my! `BoundaryTerm` may be useful for computing the bounds of breadth as well. For example, products of such terms are aware of boundary terminals, which may remove the need to examine terminal tokens while also propogating terminals into the next level of the fold. --- src/token/mod.rs | 140 ++++--- src/token/variance/bound.rs | 4 +- src/token/variance/invariant/mod.rs | 30 +- src/token/variance/invariant/natural.rs | 93 +++-- src/token/variance/invariant/separation.rs | 432 +++++++++++++++++++++ src/token/variance/invariant/text.rs | 8 +- src/token/variance/mod.rs | 104 +++-- 7 files changed, 670 insertions(+), 141 deletions(-) create mode 100644 src/token/variance/invariant/separation.rs diff --git a/src/token/mod.rs b/src/token/mod.rs index fa241af..8dfcdd3 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -14,7 +14,9 @@ use std::str; use crate::diagnostics::{Span, Spanned}; use crate::query::When; -use crate::token::variance::invariant::{IntoNominalText, IntoStructuralText}; +use crate::token::variance::invariant::{ + BoundaryTerm, IntoNominalText, IntoStructuralText, One, Zero, +}; use crate::token::variance::ops; use crate::token::variance::{TreeExhaustiveness, TreeVariance, VarianceFold, VarianceTerm}; use crate::token::walk::{BranchFold, Fold, FoldMap, Starting, TokenEntry}; @@ -24,7 +26,7 @@ pub use crate::token::parse::{parse, ParseError, ROOT_SEPARATOR_EXPRESSION}; pub use crate::token::variance::bound::{ BoundedVariantRange, Boundedness, NaturalRange, VariantRange, }; -pub use crate::token::variance::invariant::{Breadth, Depth, Invariant, Size, Text}; +pub use crate::token::variance::invariant::{Finalize, Breadth, Depth, DepthTerm, Invariant, Size, Text}; pub use crate::token::variance::{TokenVariance, Variance}; // TODO: Tree representations of expressions are intrusive and only differ in their annotations. @@ -157,7 +159,7 @@ impl AsRef for Composition { } } -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub enum Boundary { Component, Separator, @@ -447,12 +449,12 @@ impl<'t, A> Token<'t, A> { pub fn variance(&self) -> TokenVariance where - TreeVariance: Fold<'t, A, Term = TokenVariance>, + TreeVariance: Fold<'t, A, Term = T::Term>, T: Invariant, { T::finalize( self.fold(TreeVariance::default()) - .unwrap_or_else(Variance::zero), + .unwrap_or_else(Zero::zero), ) } @@ -581,6 +583,7 @@ impl<'t, A> Token<'t, A> { // discard directory trees in the `FileIterator::not` combinator. pub fn is_exhaustive(&self) -> bool { self.fold(TreeExhaustiveness) + .map(Finalize::finalize) .as_ref() .map_or(false, Variance::is_exhaustive) } @@ -831,7 +834,7 @@ where Repetition<'t, A>: VarianceFold, T: Invariant, { - fn fold(&self, terms: Vec>) -> Option> { + fn fold(&self, terms: Vec) -> Option { use BranchKind::{Alternation, Concatenation, Repetition}; match self { @@ -841,7 +844,7 @@ where } } - fn finalize(&self, term: TokenVariance) -> TokenVariance { + fn finalize(&self, term: T::Term) -> T::Term { use BranchKind::{Alternation, Concatenation, Repetition}; match self { @@ -943,7 +946,7 @@ where Wildcard: VarianceTerm, T: Invariant, { - fn term(&self) -> TokenVariance { + fn term(&self) -> T::Term { use LeafKind::{Class, Literal, Separator, Wildcard}; match self { @@ -984,7 +987,7 @@ impl<'t, A> From>> for Alternation<'t, A> { } impl<'t, A> VarianceFold for Alternation<'t, A> { - fn fold(&self, terms: Vec>) -> Option> { + fn fold(&self, terms: Vec<::Term>) -> Option<::Term> { terms.into_iter().reduce(ops::disjunction) } } @@ -1083,14 +1086,14 @@ impl Class { where Archetype: VarianceTerm, T: Invariant, - F: FnMut(TokenVariance, TokenVariance) -> TokenVariance, + F: FnMut(T::Term, T::Term) -> T::Term, { T::finalize( self.archetypes() .iter() .map(Archetype::term) .reduce(f) - .unwrap_or_else(Variance::zero), + .unwrap_or_else(Zero::zero), ) } } @@ -1102,8 +1105,8 @@ impl VarianceTerm for Class { } impl VarianceTerm for Class { - fn term(&self) -> TokenVariance { - Variance::zero() + fn term(&self) -> ::Term { + Zero::zero() } } @@ -1157,42 +1160,56 @@ impl<'t, A> From>> for Concatenation<'t, A> { } impl<'t, A> VarianceFold for Concatenation<'t, A> { - fn fold(&self, terms: Vec>) -> Option> { - terms.into_iter().reduce(ops::conjunction) - } - - fn finalize(&self, term: TokenVariance) -> TokenVariance { - // Depth must consider boundary component tokens (i.e., tree wildcards). Boundary component - // tokens do not have explict separators, so additional depth terms are needed for any - // adjacent non-boundary tokens. For example, consider the pattern `a/**/b`. The `a` and - // `b` components have depth terms of zero and the tree wildcard `**` has an unbounded - // depth term. Note that the forward slashes in the expression are part of the tree - // wildcard; there are no separator tokens. The conjunction of these terms is unbounded (no - // lower bound), but the depth of this expression should have a lower bound of two. - ops::conjunction( - term, - Variance::Invariant({ - let mut sum = Depth::ZERO; - let mut term = Depth::ZERO; - for token in self.tokens() { - match token.boundary() { - Some(Boundary::Component) => { - sum = ops::conjunction(sum, term); - term = Depth::ZERO; - }, - None => { - term = Depth::ONE; - }, - _ => {}, - } - } - if sum != Depth::ZERO { - sum = ops::conjunction(sum, term); - } - sum - }), - ) - } + fn fold(&self, terms: Vec) -> Option { + //terms.into_iter().with_position().map(|(position, term)| term.close_boundary_separation(position)).reduce(ops::conjunction) + eprintln!("==> Concatenation (Conjunction) Terms"); + let sum = terms + .into_iter() + .with_position() + .map(|(position, term)| { + eprintln!("\t{:?}", term); + term.close_boundary_separation(position) + }) + .inspect(|term| eprintln!("\t{:?}\n", term)) + .reduce(ops::conjunction); + eprintln!("\tConjuction: {:?}", sum); + sum + } + + // TODO: See term finalization. Tree wildcards remain `Open`, which should produce the correct + // sum. + //fn finalize(&self, term: DepthTerm) -> DepthTerm { + // // Depth must consider boundary component tokens (i.e., tree wildcards). Boundary component + // // tokens do not have explict separators, so additional depth terms are needed for any + // // adjacent non-boundary tokens. For example, consider the pattern `a/**/b`. The `a` and + // // `b` components have depth terms of zero and the tree wildcard `**` has an unbounded + // // depth term. Note that the forward slashes in the expression are part of the tree + // // wildcard; there are no separator tokens. The conjunction of these terms is unbounded (no + // // lower bound), but the depth of this expression should have a lower bound of two. + // ops::conjunction( + // term, + // Variance::Invariant({ + // let mut sum = Depth::ZERO; + // let mut term = Depth::ZERO; + // for token in self.tokens() { + // match token.boundary() { + // Some(Boundary::Component) => { + // sum = ops::conjunction(sum, term); + // term = Depth::ZERO; + // }, + // None => { + // term = Depth::ONE; + // }, + // _ => {}, + // } + // } + // if sum != Depth::ZERO { + // sum = ops::conjunction(sum, term); + // } + // sum + // }), + // ) + //} } impl<'t, A> VarianceFold for Concatenation<'t, A> { @@ -1269,8 +1286,8 @@ impl<'t> VarianceTerm for Literal<'t> { } impl<'t> VarianceTerm for Literal<'t> { - fn term(&self) -> TokenVariance { - Variance::zero() + fn term(&self) -> ::Term { + Zero::zero() } } @@ -1341,12 +1358,17 @@ impl<'t, A> BranchComposition<'t> for Repetition<'t, A> { } impl<'t, A> VarianceFold for Repetition<'t, A> { - fn fold(&self, terms: Vec>) -> Option> { + fn fold(&self, terms: Vec<::Term>) -> Option<::Term> { terms.into_iter().reduce(ops::conjunction) } - fn finalize(&self, term: TokenVariance) -> TokenVariance { - ops::product(term, self.variance()) + fn finalize(&self, term: ::Term) -> ::Term { + //ops::product(term, self.variance()) + eprintln!("==> Repetition (Product) Term"); + eprintln!("\t{:?}\n", term); + let sum = ops::product(term, self.variance()); + eprintln!("\tProduct: {:?}", sum); + sum } } @@ -1384,8 +1406,8 @@ impl VarianceTerm for Separator { } impl VarianceTerm for Separator { - fn term(&self) -> TokenVariance { - Variance::Invariant(1.into()) + fn term(&self) -> ::Term { + BoundaryTerm::separator(One::one()) } } @@ -1451,10 +1473,10 @@ impl VarianceTerm for Wildcard { } impl VarianceTerm for Wildcard { - fn term(&self) -> TokenVariance { + fn term(&self) -> ::Term { match self { - Wildcard::Tree { .. } => Variance::unbounded(), - _ => Variance::zero(), + Wildcard::Tree { .. } => BoundaryTerm::component(Variance::unbounded()), + _ => Zero::zero(), } } } diff --git a/src/token/variance/bound.rs b/src/token/variance/bound.rs index e14e8e9..95e9f3d 100644 --- a/src/token/variance/bound.rs +++ b/src/token/variance/bound.rs @@ -2,7 +2,7 @@ use std::cmp::{self, Ordering}; use std::marker::PhantomData; use std::num::NonZeroUsize; -use crate::token::variance::invariant::{Identity, UnitBound}; +use crate::token::variance::invariant::{self, UnitBound, Zero as _}; use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::Variance; @@ -232,7 +232,7 @@ impl From for usize { } } -impl Identity for Zero { +impl invariant::Zero for Zero { fn zero() -> Self { Zero } diff --git a/src/token/variance/invariant/mod.rs b/src/token/variance/invariant/mod.rs index 46778c8..2827857 100644 --- a/src/token/variance/invariant/mod.rs +++ b/src/token/variance/invariant/mod.rs @@ -1,4 +1,5 @@ mod natural; +mod separation; mod text; use std::num::NonZeroUsize; @@ -7,7 +8,10 @@ use crate::token::variance::bound::{BoundedVariantRange, Boundedness, OpenedUppe use crate::token::variance::ops::{Conjunction, Disjunction, Product}; use crate::token::variance::TokenVariance; -pub use crate::token::variance::invariant::natural::{Depth, Size}; +pub use crate::token::variance::invariant::natural::{Depth, DepthTerm, Size}; +pub use crate::token::variance::invariant::separation::{ + BoundaryTerm, SeparatedTerm, Separation, Finalize, +}; pub use crate::token::variance::invariant::text::{IntoNominalText, IntoStructuralText, Text}; pub type InvariantBound = ::Bound; @@ -16,17 +20,23 @@ pub trait BoundedVariance { type Bound; } -pub trait Identity { +pub trait Zero { fn zero() -> Self; } -pub trait Invariant: BoundedVariance + Sized { - type Term: InvariantTerm; +pub trait One { + fn one() -> Self; +} + +// TODO: Recombine these traits? Or rename them? It's confusing that this has a bound on +// `InvariantTerm`, but its `Term` does not! +pub trait Invariant: InvariantTerm + Sized { + type Term: Zero; - fn finalize(term: TokenVariance) -> TokenVariance; + fn finalize(term: Self::Term) -> TokenVariance; } -pub trait InvariantTerm: BoundedVariance + Identity + Sized { +pub trait InvariantTerm: BoundedVariance + Zero + Sized { fn bound(lhs: Self, rhs: Self) -> Boundedness; fn into_lower_bound(self) -> Boundedness; @@ -61,16 +71,16 @@ impl BoundedVariance for Breadth { type Bound = (); } -impl Identity for Breadth { +impl Zero for Breadth { fn zero() -> Self { Breadth } } impl Invariant for Breadth { - type Term = Self; + type Term = TokenVariance; - fn finalize(term: TokenVariance) -> TokenVariance { + fn finalize(term: Self::Term) -> TokenVariance { term } } @@ -85,7 +95,7 @@ impl InvariantTerm for Breadth { } } -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] pub struct UnitBound; impl Conjunction for UnitBound { diff --git a/src/token/variance/invariant/natural.rs b/src/token/variance/invariant/natural.rs index 944dcfe..95a4454 100644 --- a/src/token/variance/invariant/natural.rs +++ b/src/token/variance/invariant/natural.rs @@ -3,16 +3,17 @@ use std::num::NonZeroUsize; use crate::token::variance::bound::{ Bounded, BoundedVariantRange, Boundedness, Unbounded, VariantRange, }; -use crate::token::variance::invariant::{BoundedVariance, Identity, Invariant, InvariantTerm}; +use crate::token::variance::invariant::separation::{ + BoundaryTerm, Finalize, SeparatedTerm, Separation, +}; +use crate::token::variance::invariant::{BoundedVariance, Invariant, InvariantTerm, One, Zero}; use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::{TokenVariance, Variance}; +use crate::token::Boundary; -macro_rules! impl_invariant_natural { +macro_rules! impl_natural_invariant_term { ($name:ident $(,)?) => { - impl_invariant_natural!($name, once => 0); - }; - ($name:ident, once => $once:expr $(,)?) => { - #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[repr(transparent)] pub struct $name(usize); @@ -69,21 +70,6 @@ macro_rules! impl_invariant_natural { } } - impl Identity for $name { - fn zero() -> Self { - $name(0) - } - } - - impl Invariant for $name { - type Term = Self; - - fn finalize(term: TokenVariance) -> TokenVariance { - // TODO: This is incorrect. - term.map_invariant(|term| ops::conjunction(term, $name($once))) - } - } - impl InvariantTerm for $name { fn bound(lhs: Self, rhs: Self) -> Boundedness { let [lower, upper] = crate::minmax(lhs, rhs); @@ -99,6 +85,12 @@ macro_rules! impl_invariant_natural { } } + impl One for $name { + fn one() -> Self { + $name(1) + } + } + impl PartialEq for $name { fn eq(&self, rhs: &usize) -> bool { self.0 == *rhs @@ -117,11 +109,15 @@ macro_rules! impl_invariant_natural { type Output = TokenVariance; fn product(self, rhs: VariantRange) -> Self::Output { - NonZeroUsize::new(self.0) - .map_or_else( - TokenVariance::::zero, - |lhs| Variance::Variant(rhs.map_bounded(|rhs| ops::product(rhs, lhs))), - ) + NonZeroUsize::new(self.0).map_or_else(TokenVariance::::zero, |lhs| { + Variance::Variant(rhs.map_bounded(|rhs| ops::product(rhs, lhs))) + }) + } + } + + impl Zero for $name { + fn zero() -> Self { + $name(0) } } @@ -142,11 +138,52 @@ macro_rules! impl_invariant_natural { } }; } -impl_invariant_natural!(Depth, once => 1); -impl_invariant_natural!(Size); +impl_natural_invariant_term!(Depth); +impl_natural_invariant_term!(Size); + +impl Invariant for Depth { + type Term = DepthTerm; + + fn finalize(term: Self::Term) -> TokenVariance { + term.finalize() + } +} + +impl Invariant for Size { + type Term = TokenVariance; + + fn finalize(term: Self::Term) -> TokenVariance { + term + } +} + +pub type DepthTerm = BoundaryTerm; impl TokenVariance { pub fn is_exhaustive(&self) -> bool { !self.has_upper_bound() } } + +impl Finalize for SeparatedTerm> { + type Output = TokenVariance; + + fn finalize(self) -> Self::Output { + use Boundary::Component; + use Separation::{Closed, First, Last, Open}; + use Variance::Invariant; + + let SeparatedTerm(separation, term) = self; + match separation { + Open(Some(Component)) | First | Last => term, + Open(_) => ops::conjunction(term, Invariant(Depth::ONE)), + Closed => term.map_invariant(|term| Depth(term.0.saturating_sub(1))), + } + } +} + +impl BoundaryTerm { + pub fn is_exhaustive(&self) -> bool { + self.clone().finalize().is_exhaustive() + } +} diff --git a/src/token/variance/invariant/separation.rs b/src/token/variance/invariant/separation.rs new file mode 100644 index 0000000..e63dd32 --- /dev/null +++ b/src/token/variance/invariant/separation.rs @@ -0,0 +1,432 @@ +use itertools::{Itertools, Position}; +use std::collections::HashSet; +use std::hash::Hash; + +use crate::token::variance::bound::Boundedness; +use crate::token::variance::invariant::{BoundedVariance, Invariant, One, Zero}; +use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; +use crate::token::variance::{TokenVariance, Variance}; +use crate::token::{Boundary, Composition}; + +pub trait Finalize: Sized { + type Output; + + fn finalize(self) -> Self::Output; +} + +pub type BoundaryTerm = TreeTerm>>; + +impl BoundaryTerm +where + TokenVariance: Eq + Hash, + T: BoundedVariance, +{ + pub fn separator(term: TokenVariance) -> Self { + BoundaryTerm::Conjunctive(SeparatedTerm::separator(term)) + } + + pub fn component(term: TokenVariance) -> Self { + BoundaryTerm::Conjunctive(SeparatedTerm::component(term)) + } + + pub fn close_boundary_separation(self, position: Position) -> Self { + match self { + BoundaryTerm::Conjunctive(term) => BoundaryTerm::Conjunctive(term.close_boundary_separation(position)), + // TODO: Is it necessary to close boundary separations in disjunctive terms? + BoundaryTerm::Disjunctive(term) => BoundaryTerm::Disjunctive(term.remap(|term| term.close_boundary_separation(position))), + } + } + + pub fn as_variance(&self) -> Variance<&T, Option<&Boundedness>> { + match self { + BoundaryTerm::Conjunctive(ref term) => term.1.as_ref().map_variant(Some), + BoundaryTerm::Disjunctive(_) => Variance::Variant(None), + } + } +} + +pub type TreeTerm = Composition>; + +impl Conjunction for TreeTerm +where + DisjunctiveTerm: Conjunction> + + Conjunction>, + T: Conjunction + + Conjunction, Output = DisjunctiveTerm> + + Eq + + Hash, +{ + type Output = Self; + + fn conjunction(self, rhs: Self) -> Self::Output { + use Composition::{Conjunctive, Disjunctive}; + + match (self, rhs) { + (Conjunctive(lhs), Conjunctive(rhs)) => Conjunctive(ops::conjunction(lhs, rhs)), + (Conjunctive(lhs), Disjunctive(rhs)) => Disjunctive(ops::conjunction(lhs, rhs)), + (Disjunctive(lhs), Conjunctive(rhs)) => Disjunctive(ops::conjunction(lhs, rhs)), + (Disjunctive(lhs), Disjunctive(rhs)) => Disjunctive(ops::conjunction(lhs, rhs)), + } + } +} + +impl Disjunction for TreeTerm +where + DisjunctiveTerm: Disjunction> + + Disjunction, Output = DisjunctiveTerm>, + T: Disjunction> + + Disjunction, Output = DisjunctiveTerm> + + Eq + + Hash, +{ + type Output = Self; + + fn disjunction(self, rhs: Self) -> Self::Output { + use Composition::{Conjunctive, Disjunctive}; + + Disjunctive(match (self, rhs) { + (Conjunctive(lhs), Conjunctive(rhs)) => ops::disjunction(lhs, rhs), + (Conjunctive(lhs), Disjunctive(rhs)) => ops::disjunction(lhs, rhs), + (Disjunctive(lhs), Conjunctive(rhs)) => ops::disjunction(lhs, rhs), + (Disjunctive(lhs), Disjunctive(rhs)) => ops::disjunction(lhs, rhs), + }) + } +} + +impl Finalize for TreeTerm +where + DisjunctiveTerm: Finalize, + T: Eq + Finalize + Hash, +{ + type Output = T::Output; + + fn finalize(self) -> Self::Output { + match self { + TreeTerm::Conjunctive(term) => term.finalize(), + TreeTerm::Disjunctive(term) => term.finalize(), + } + } +} + +impl One for TreeTerm +where + T: Eq + Hash + One, +{ + fn one() -> Self { + TreeTerm::Conjunctive(T::one()) + } +} + +impl Product for TreeTerm +where + T: Eq + Hash + Product, + T::Output: Eq + Hash, + U: Clone, +{ + type Output = TreeTerm; + + fn product(self, rhs: U) -> Self::Output { + match self { + TreeTerm::Conjunctive(lhs) => TreeTerm::Conjunctive(ops::product(lhs, rhs)), + TreeTerm::Disjunctive(lhs) => TreeTerm::Disjunctive(ops::product(lhs, rhs)), + } + } +} + +impl Zero for TreeTerm +where + T: Eq + Hash + Zero, +{ + fn zero() -> Self { + TreeTerm::Conjunctive(T::zero()) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct DisjunctiveTerm(HashSet) +where + T: Eq + Hash; + +impl DisjunctiveTerm +where + T: Eq + Hash, +{ + fn remap(self, f: F) -> DisjunctiveTerm + where + U: Eq + Hash, + F: FnMut(T) -> U, + { + DisjunctiveTerm(self.0.into_iter().map(f).collect()) + } +} + +impl Conjunction for DisjunctiveTerm +where + T: Clone + Conjunction + Eq + Hash, +{ + type Output = Self; + + fn conjunction(self, rhs: Self) -> Self::Output { + DisjunctiveTerm( + self.0 + .into_iter() + .cartesian_product(rhs.0.iter()) + .map(|(lhs, rhs)| ops::conjunction(lhs, rhs.clone())) + .collect(), + ) + } +} + +impl Conjunction for DisjunctiveTerm +where + T: Clone + Conjunction + Eq + Hash, +{ + type Output = Self; + + fn conjunction(self, rhs: T) -> Self::Output { + self.remap(|lhs| ops::conjunction(lhs, rhs.clone())) + } +} + +// This is the same as the above `Conjunction` implementation, but with transposed operand order. +// However, it is limited to `SeparatedTerm`s (rather than any `T`), because otherwise this +// implementation conflicts with an implementation for `UnitBound`. +impl Conjunction>> for SeparatedTerm +where + SeparatedTerm: Clone + Conjunction> + Eq + Hash, +{ + type Output = DisjunctiveTerm>; + + fn conjunction(self, rhs: DisjunctiveTerm>) -> Self::Output { + rhs.remap(|rhs| ops::conjunction(self.clone(), rhs)) + } +} + +impl Disjunction for DisjunctiveTerm +where + T: Eq + Hash, +{ + type Output = Self; + + fn disjunction(self, rhs: Self) -> Self::Output { + DisjunctiveTerm(self.0.into_iter().chain(rhs.0).collect()) + } +} + +impl Disjunction for DisjunctiveTerm +where + T: Eq + Hash, +{ + type Output = Self; + + fn disjunction(mut self, rhs: T) -> Self::Output { + self.0.insert(rhs); + self + } +} + +impl Disjunction> for T +where + T: Eq + Hash, +{ + type Output = DisjunctiveTerm; + + fn disjunction(self, mut rhs: DisjunctiveTerm) -> Self::Output { + rhs.0.insert(self); + rhs + } +} + +impl Finalize for DisjunctiveTerm +where + T: Eq + Finalize + Hash, + T::Output: Disjunction + Zero, +{ + type Output = T::Output; + + fn finalize(self) -> Self::Output { + self.0 + .into_iter() + .map(Finalize::finalize) + .reduce(ops::disjunction) + .unwrap_or_else(Zero::zero) + } +} + +impl Product for DisjunctiveTerm +where + T: Eq + Hash + Product, + T::Output: Eq + Hash, + U: Clone, +{ + type Output = DisjunctiveTerm; + + fn product(self, rhs: U) -> Self::Output { + self.remap(|lhs| ops::product(lhs, rhs.clone())) + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum Separation { + // TODO: While additional information is needed to close `Open` for separators, more general + // boundary information appears to be unnecessary. More importantly, this is insufficient + // to handle the "coalescence" of tree wildcards! This information likely needs to be + // pushed up. One algebra that seems to work is to immediately finalize tree wildcard + // terms in conjunctions (prior to performing the conjunction). The `Separation` is + // otherwise unaffected: tree wildcards are either `Last` or `Closed` and are always + // unbounded. They "just" have an implicit +1 in conjunctions for depth. This extra +1 + // ought to be specified per invariant rather than generalized for all `BoundaryTerm`s. + Open(Option), + First, + Last, + Closed, +} + +impl Conjunction for Separation { + type Output = Self; + + fn conjunction(self, rhs: Self) -> Self::Output { + use Separation::{Closed, First, Last, Open}; + + match (self, rhs) { + (Closed, Closed) | (First, Last | Closed) | (Closed, Last) => Closed, + (Last | Open(_), Closed) | (Last | Open(_), Last) => Last, + (Closed, First | Open(_)) | (First, First | Open(_)) => First, + (Open(_), First | Open(_)) | (Last, First | Open(_)) => Open(None), + } + } +} + +//impl Disjunction for Separation { +// type Output = Self; +// +// fn disjunction(self, rhs: Self) -> Self::Output { +// use Separation::{First, Last, Terminal, Unit}; +// +// match (self, rhs) { +// (Terminal, _) | (_, Terminal) | (First, Last) | (Last, First) => Terminal, +// (Last, _) | (_, Last) => Last, +// (First, _) | (_, First) => First, +// _ => Unit, +// } +// } +//} + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct SeparatedTerm(pub Separation, pub T); + +impl SeparatedTerm { + pub fn close_boundary_separation(self, position: Position) -> Self { + use Boundary::{Component, Separator}; + use Position::{First, Middle, Last, Only}; + + let SeparatedTerm(separation, term) = self; + SeparatedTerm(match separation { + separation @ Separation::Open(Some(boundary)) => match (boundary, position) { + (Component, _) | (_, Only) => separation, + (Separator, First) => Separation::First, + (Separator, Middle) => Separation::Open(None), + (Separator, Last) => Separation::Last, + }, + separation => separation, + }, term) + } +} + +impl SeparatedTerm { + pub fn separator(term: T) -> Self { + SeparatedTerm(Separation::Open(Some(Boundary::Separator)), term) + } + + pub fn component(term: T) -> Self { + SeparatedTerm(Separation::Open(Some(Boundary::Component)), term) + } + + pub fn into_inner(self) -> T { + self.1 + } +} + +impl Conjunction> for SeparatedTerm +where + T: Conjunction, +{ + type Output = SeparatedTerm; + + fn conjunction(self, rhs: SeparatedTerm) -> Self::Output { + SeparatedTerm( + ops::conjunction(self.0, rhs.0), + ops::conjunction(self.1, rhs.1), + ) + } +} + +//impl Disjunction> for Separated +//where +// Self: Finalize, +// Separated: Finalize>, +// T: Disjunction, +//{ +// type Output = Separated; +// +// fn disjunction(self, rhs: Separated) -> Self::Output { +// let lhs = self.finalize(); +// let rhs = rhs.finalize(); +// Separated( +// ops::disjunction(lhs.0, rhs.0), +// ops::disjunction(lhs.1, rhs.1), +// ) +// } +//} + +impl Disjunction for SeparatedTerm +where + T: Eq + Hash, +{ + type Output = DisjunctiveTerm; + + fn disjunction(self, rhs: Self) -> Self::Output { + DisjunctiveTerm([self, rhs].into_iter().collect()) + } +} + +//impl Finalize for Separated> +//where +// T: BoundedVariance + Invariant, +//{ +// type Output = Self; +// +// fn finalize(self) -> Self { +// let separation = self.0; +// Separated(separation, T::finalize(self)) +// } +//} + +impl One for SeparatedTerm +where + T: One, +{ + fn one() -> Self { + SeparatedTerm(Separation::Open(None), T::one()) + } +} + +impl Product for SeparatedTerm +where + T: Product, +{ + type Output = SeparatedTerm; + + fn product(self, rhs: U) -> Self::Output { + SeparatedTerm(self.0, ops::product(self.1, rhs)) + } +} + +impl Zero for SeparatedTerm +where + T: Zero, +{ + fn zero() -> Self { + SeparatedTerm(Separation::Open(None), T::zero()) + } +} diff --git a/src/token/variance/invariant/text.rs b/src/token/variance/invariant/text.rs index edb028b..66e2291 100644 --- a/src/token/variance/invariant/text.rs +++ b/src/token/variance/invariant/text.rs @@ -5,7 +5,7 @@ use std::num::NonZeroUsize; use crate::encode; use crate::token::variance::bound::{Boundedness, VariantRange}; use crate::token::variance::invariant::{ - BoundedVariance, Identity, Invariant, InvariantTerm, UnitBound, + BoundedVariance, Invariant, InvariantTerm, UnitBound, Zero, }; use crate::token::variance::ops::{self, Conjunction, Disjunction, Product}; use crate::token::variance::{TokenVariance, Variance}; @@ -172,16 +172,16 @@ impl<'t> From> for Text<'t> { } } -impl<'t> Identity for Text<'t> { +impl<'t> Zero for Text<'t> { fn zero() -> Self { Text::new() } } impl<'t> Invariant for Text<'t> { - type Term = Self; + type Term = TokenVariance; - fn finalize(term: TokenVariance) -> TokenVariance { + fn finalize(term: Self::Term) -> TokenVariance { term } } diff --git a/src/token/variance/mod.rs b/src/token/variance/mod.rs index 60cfac3..33a7127 100644 --- a/src/token/variance/mod.rs +++ b/src/token/variance/mod.rs @@ -11,7 +11,8 @@ use crate::token::variance::bound::{ VariantRange, }; use crate::token::variance::invariant::{ - Breadth, Depth, Identity, Invariant, InvariantBound, InvariantTerm, Text, UnitBound, + BoundaryTerm, Breadth, Depth, DepthTerm, Invariant, InvariantBound, InvariantTerm, One, + SeparatedTerm, Separation, Text, UnitBound, Zero, }; use crate::token::variance::ops::{Conjunction, Disjunction, Product}; use crate::token::walk::{ChildToken, Fold, Forward, ParentToken, Sequencer}; @@ -23,34 +24,27 @@ pub trait VarianceTerm where T: Invariant, { - fn term(&self) -> TokenVariance; + fn term(&self) -> T::Term; } pub trait VarianceFold where T: Invariant, { - fn fold(&self, terms: Vec>) -> Option>; + fn fold(&self, terms: Vec) -> Option; - fn finalize(&self, term: TokenVariance) -> TokenVariance { + fn finalize(&self, term: T::Term) -> T::Term { term } } -#[derive(Clone, Copy, Debug, Eq)] +#[derive(Clone, Copy, Debug, Eq, Hash)] pub enum Variance> { Invariant(T), Variant(B), } impl Variance { - pub fn zero() -> Self - where - T: Identity, - { - Variance::Invariant(T::zero()) - } - pub fn map_invariant(self, f: F) -> Variance where F: FnOnce(T) -> U, @@ -214,6 +208,15 @@ where } } +impl One for Variance +where + T: One, +{ + fn one() -> Self { + Variance::Invariant(T::one()) + } +} + impl PartialEq for Variance where T: PartialEq, @@ -259,6 +262,15 @@ where } } +impl Zero for Variance +where + T: Zero, +{ + fn zero() -> Self { + Variance::Invariant(T::zero()) + } +} + pub struct TreeVariance(PhantomData T>); impl Default for TreeVariance { @@ -274,7 +286,7 @@ where T: Invariant, { type Sequencer = Forward; - type Term = TokenVariance; + type Term = T::Term; fn sequencer() -> Self::Sequencer { Forward @@ -318,7 +330,7 @@ impl Sequencer for TreeExhaustiveness { impl<'t, A> Fold<'t, A> for TreeExhaustiveness { type Sequencer = Self; - type Term = TokenVariance; + type Term = DepthTerm; fn sequencer() -> Self::Sequencer { Self @@ -356,8 +368,11 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { }) .into_inner(); if !all && any { - return Some(TokenVariance::::Variant(Bounded( - BoundedVariantRange::Upper(unsafe { NonZeroUsize::new_unchecked(1) }), + return Some(BoundaryTerm::Conjunctive(SeparatedTerm( + Separation::Open(None), + TokenVariance::::Variant(Bounded(BoundedVariantRange::Upper(unsafe { + NonZeroUsize::new_unchecked(1) + }))), ))); } } @@ -371,11 +386,11 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { else { // Terms were discarded, meaning some non-depth quantity was bounded. Yield any sum // only if the depth is exhaustive, otherwise zero. - if sum.as_ref().map_or(false, Variance::is_exhaustive) { + if sum.as_ref().map_or(false, DepthTerm::is_exhaustive) { sum } else { - Some(Variance::zero()) + Some(Zero::zero()) } } } @@ -384,7 +399,7 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { use Variance::{Invariant, Variant}; match branch { - branch @ BranchKind::Repetition(_) => match term { + branch @ BranchKind::Repetition(_) => match term.as_variance() { // When folding terms into a repetition, only finalize variant terms and the // multiplicative identity and annihilator (one and zero). This is necessary, // because natural bounds do not express the subset nor relationship of matched @@ -392,7 +407,7 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { // depth, but only matches paths with a depth that is a multiple of two and so is // nonexhaustive. However, the similar pattern `<*/>` is exhaustive and matches any // sub-tree of a match. - Invariant(Depth::ZERO) | Invariant(Depth::ONE) | Variant(_) => { + Invariant(&Depth::ZERO) | Invariant(&Depth::ONE) | Variant(_) => { self::finalize::(branch, term) }, _ => term, @@ -406,27 +421,21 @@ impl<'t, A> Fold<'t, A> for TreeExhaustiveness { } } -pub fn fold( - token: &impl VarianceFold, - terms: Vec>, -) -> Option> +pub fn fold(token: &impl VarianceFold, terms: Vec) -> Option where T: Invariant, { token.fold(terms) } -pub fn finalize( - token: &impl VarianceFold, - term: TokenVariance, -) -> TokenVariance +pub fn finalize(token: &impl VarianceFold, term: T::Term) -> T::Term where T: Invariant, { token.finalize(term) } -pub fn term(token: &impl VarianceTerm) -> TokenVariance +pub fn term(token: &impl VarianceTerm) -> T::Term where T: Invariant, { @@ -456,7 +465,7 @@ pub mod harness { Variance::Variant(UnitBound.into()) } - pub fn range(lower: usize, upper: Option) -> TokenVariance + pub fn range(lower: usize, upper: impl Into>) -> TokenVariance where T: From + Invariant, { @@ -468,7 +477,7 @@ pub mod harness { expected: TokenVariance, ) -> Tokenized<'t, A> where - TreeVariance: Fold<'t, A, Term = TokenVariance>, + TreeVariance: Fold<'t, A, Term = T::Term>, T: Debug + Eq + Invariant, T::Bound: Debug + Eq, { @@ -511,16 +520,35 @@ mod tests { #[case("a", harness::invariant(1))] #[case("a/", harness::invariant(1))] #[case("a/b", harness::invariant(2))] + #[case("(?i)a(?-i)b", harness::invariant(1))] + #[case("{a/b}", harness::invariant(2))] + #[case("{a,a/b}", harness::range(1, 2))] + #[case("x{a,a/b}", harness::range(1, 2))] + #[case("x/{a,a/b}", harness::range(2, 3))] + #[case("{a,a/b}x", harness::range(1, 2))] + #[case("{a,a/b}/x", harness::range(2, 3))] #[case("", harness::invariant(3))] + #[case("", harness::range(0, 3))] + #[case("x", harness::invariant(3))] + #[case("x", harness::invariant(4))] + #[case("*", harness::invariant(1))] + #[case("a/*", harness::invariant(2))] + #[case("a/*/b", harness::invariant(3))] + #[case("*/a", harness::invariant(2))] + #[case("{a,*}", harness::invariant(1))] + #[case("*", harness::range(2, None))] + #[case("<*/>*", harness::range(1, None))] + #[case("</>*", harness::range(1, None))] + #[case("*", harness::range(1, None))] + #[case("**", Variance::unbounded())] #[case("a/**", harness::range(1, None))] #[case("a/**/b", harness::range(2, None))] - #[case("a/**/b/**/c", harness::range(3, None))] - #[case("*", harness::range(1, None))] - #[case("{a/b/,c/**/}*.ext", harness::range(2, None))] - #[case("**", Variance::unbounded())] - #[case("<*/>*", Variance::unbounded())] - #[case("</>*", Variance::unbounded())] - #[case("*", Variance::unbounded())] + #[case("a/<*/>b", harness::range(2, None))] + //#[case("a/**/b/**/c", harness::range(3, None))] + #[case("*/**", harness::range(1, None))] + #[case("**/*", harness::range(1, None))] + #[case("**/*/**", harness::range(1, None))] + //#[case("{a/b/,c/**/}*.ext", harness::range(2, None))] fn parse_expression_depth_variance_eq( #[case] expression: &str, #[case] expected: TokenVariance,