Skip to content

Commit

Permalink
Introduce BoundaryTerms.
Browse files Browse the repository at this point in the history
This (very WIP) change introduces general `BoundaryTerm`s, which
associat boundary information in variance terms in folds. This allows
for a more complete algebra that is aware of separation terminals as
terms are summed. Depth bounds are now correct (AFAICT). However, this
still does not properly support the type of "coalescence" needed for
tree wildcards.

This needs a lot of cleaning up and reorganizing! Names, module
hierarchy, oh my!

`BoundaryTerm` may be useful for computing the bounds of breadth as
well. For example, products of such terms are aware of boundary
terminals, which may remove the need to examine terminal tokens while
also propogating terminals into the next level of the fold.
  • Loading branch information
olson-sean-k committed Mar 25, 2024
1 parent 8860fe8 commit 828e7cf
Show file tree
Hide file tree
Showing 7 changed files with 670 additions and 141 deletions.
140 changes: 81 additions & 59 deletions src/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ use std::str;

use crate::diagnostics::{Span, Spanned};
use crate::query::When;
use crate::token::variance::invariant::{IntoNominalText, IntoStructuralText};
use crate::token::variance::invariant::{
BoundaryTerm, IntoNominalText, IntoStructuralText, One, Zero,
};
use crate::token::variance::ops;
use crate::token::variance::{TreeExhaustiveness, TreeVariance, VarianceFold, VarianceTerm};
use crate::token::walk::{BranchFold, Fold, FoldMap, Starting, TokenEntry};
Expand All @@ -24,7 +26,7 @@ pub use crate::token::parse::{parse, ParseError, ROOT_SEPARATOR_EXPRESSION};
pub use crate::token::variance::bound::{
BoundedVariantRange, Boundedness, NaturalRange, VariantRange,
};
pub use crate::token::variance::invariant::{Breadth, Depth, Invariant, Size, Text};
pub use crate::token::variance::invariant::{Finalize, Breadth, Depth, DepthTerm, Invariant, Size, Text};
pub use crate::token::variance::{TokenVariance, Variance};

// TODO: Tree representations of expressions are intrusive and only differ in their annotations.
Expand Down Expand Up @@ -157,7 +159,7 @@ impl<T> AsRef<T> for Composition<T, T> {
}
}

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Boundary {
Component,
Separator,
Expand Down Expand Up @@ -447,12 +449,12 @@ impl<'t, A> Token<'t, A> {

pub fn variance<T>(&self) -> TokenVariance<T>
where
TreeVariance<T::Term>: Fold<'t, A, Term = TokenVariance<T::Term>>,
TreeVariance<T>: Fold<'t, A, Term = T::Term>,
T: Invariant,
{
T::finalize(
self.fold(TreeVariance::default())
.unwrap_or_else(Variance::zero),
.unwrap_or_else(Zero::zero),
)
}

Expand Down Expand Up @@ -581,6 +583,7 @@ impl<'t, A> Token<'t, A> {
// discard directory trees in the `FileIterator::not` combinator.
pub fn is_exhaustive(&self) -> bool {
self.fold(TreeExhaustiveness)
.map(Finalize::finalize)
.as_ref()
.map_or(false, Variance::is_exhaustive)
}
Expand Down Expand Up @@ -831,7 +834,7 @@ where
Repetition<'t, A>: VarianceFold<T>,
T: Invariant,
{
fn fold(&self, terms: Vec<TokenVariance<T::Term>>) -> Option<TokenVariance<T::Term>> {
fn fold(&self, terms: Vec<T::Term>) -> Option<T::Term> {
use BranchKind::{Alternation, Concatenation, Repetition};

match self {
Expand All @@ -841,7 +844,7 @@ where
}
}

fn finalize(&self, term: TokenVariance<T::Term>) -> TokenVariance<T::Term> {
fn finalize(&self, term: T::Term) -> T::Term {
use BranchKind::{Alternation, Concatenation, Repetition};

match self {
Expand Down Expand Up @@ -943,7 +946,7 @@ where
Wildcard: VarianceTerm<T>,
T: Invariant,
{
fn term(&self) -> TokenVariance<T::Term> {
fn term(&self) -> T::Term {
use LeafKind::{Class, Literal, Separator, Wildcard};

match self {
Expand Down Expand Up @@ -984,7 +987,7 @@ impl<'t, A> From<Vec<Token<'t, A>>> for Alternation<'t, A> {
}

impl<'t, A> VarianceFold<Depth> for Alternation<'t, A> {
fn fold(&self, terms: Vec<TokenVariance<Depth>>) -> Option<TokenVariance<Depth>> {
fn fold(&self, terms: Vec<<Depth as Invariant>::Term>) -> Option<<Depth as Invariant>::Term> {
terms.into_iter().reduce(ops::disjunction)
}
}
Expand Down Expand Up @@ -1083,14 +1086,14 @@ impl Class {
where
Archetype: VarianceTerm<T>,
T: Invariant,
F: FnMut(TokenVariance<T::Term>, TokenVariance<T::Term>) -> TokenVariance<T::Term>,
F: FnMut(T::Term, T::Term) -> T::Term,
{
T::finalize(
self.archetypes()
.iter()
.map(Archetype::term)
.reduce(f)
.unwrap_or_else(Variance::zero),
.unwrap_or_else(Zero::zero),
)
}
}
Expand All @@ -1102,8 +1105,8 @@ impl VarianceTerm<Breadth> for Class {
}

impl VarianceTerm<Depth> for Class {
fn term(&self) -> TokenVariance<Depth> {
Variance::zero()
fn term(&self) -> <Depth as Invariant>::Term {
Zero::zero()
}
}

Expand Down Expand Up @@ -1157,42 +1160,56 @@ impl<'t, A> From<Vec<Token<'t, A>>> for Concatenation<'t, A> {
}

impl<'t, A> VarianceFold<Depth> for Concatenation<'t, A> {
fn fold(&self, terms: Vec<TokenVariance<Depth>>) -> Option<TokenVariance<Depth>> {
terms.into_iter().reduce(ops::conjunction)
}

fn finalize(&self, term: TokenVariance<Depth>) -> TokenVariance<Depth> {
// Depth must consider boundary component tokens (i.e., tree wildcards). Boundary component
// tokens do not have explict separators, so additional depth terms are needed for any
// adjacent non-boundary tokens. For example, consider the pattern `a/**/b`. The `a` and
// `b` components have depth terms of zero and the tree wildcard `**` has an unbounded
// depth term. Note that the forward slashes in the expression are part of the tree
// wildcard; there are no separator tokens. The conjunction of these terms is unbounded (no
// lower bound), but the depth of this expression should have a lower bound of two.
ops::conjunction(
term,
Variance::Invariant({
let mut sum = Depth::ZERO;
let mut term = Depth::ZERO;
for token in self.tokens() {
match token.boundary() {
Some(Boundary::Component) => {
sum = ops::conjunction(sum, term);
term = Depth::ZERO;
},
None => {
term = Depth::ONE;
},
_ => {},
}
}
if sum != Depth::ZERO {
sum = ops::conjunction(sum, term);
}
sum
}),
)
}
fn fold(&self, terms: Vec<DepthTerm>) -> Option<DepthTerm> {
//terms.into_iter().with_position().map(|(position, term)| term.close_boundary_separation(position)).reduce(ops::conjunction)
eprintln!("==> Concatenation (Conjunction) Terms");
let sum = terms
.into_iter()
.with_position()
.map(|(position, term)| {
eprintln!("\t{:?}", term);
term.close_boundary_separation(position)
})
.inspect(|term| eprintln!("\t{:?}\n", term))
.reduce(ops::conjunction);
eprintln!("\tConjuction: {:?}", sum);
sum
}

// TODO: See term finalization. Tree wildcards remain `Open`, which should produce the correct
// sum.
//fn finalize(&self, term: DepthTerm) -> DepthTerm {
// // Depth must consider boundary component tokens (i.e., tree wildcards). Boundary component
// // tokens do not have explict separators, so additional depth terms are needed for any
// // adjacent non-boundary tokens. For example, consider the pattern `a/**/b`. The `a` and
// // `b` components have depth terms of zero and the tree wildcard `**` has an unbounded
// // depth term. Note that the forward slashes in the expression are part of the tree
// // wildcard; there are no separator tokens. The conjunction of these terms is unbounded (no
// // lower bound), but the depth of this expression should have a lower bound of two.
// ops::conjunction(
// term,
// Variance::Invariant({
// let mut sum = Depth::ZERO;
// let mut term = Depth::ZERO;
// for token in self.tokens() {
// match token.boundary() {
// Some(Boundary::Component) => {
// sum = ops::conjunction(sum, term);
// term = Depth::ZERO;
// },
// None => {
// term = Depth::ONE;
// },
// _ => {},
// }
// }
// if sum != Depth::ZERO {
// sum = ops::conjunction(sum, term);
// }
// sum
// }),
// )
//}
}

impl<'t, A> VarianceFold<Size> for Concatenation<'t, A> {
Expand Down Expand Up @@ -1269,8 +1286,8 @@ impl<'t> VarianceTerm<Breadth> for Literal<'t> {
}

impl<'t> VarianceTerm<Depth> for Literal<'t> {
fn term(&self) -> TokenVariance<Depth> {
Variance::zero()
fn term(&self) -> <Depth as Invariant>::Term {
Zero::zero()
}
}

Expand Down Expand Up @@ -1341,12 +1358,17 @@ impl<'t, A> BranchComposition<'t> for Repetition<'t, A> {
}

impl<'t, A> VarianceFold<Depth> for Repetition<'t, A> {
fn fold(&self, terms: Vec<TokenVariance<Depth>>) -> Option<TokenVariance<Depth>> {
fn fold(&self, terms: Vec<<Depth as Invariant>::Term>) -> Option<<Depth as Invariant>::Term> {
terms.into_iter().reduce(ops::conjunction)
}

fn finalize(&self, term: TokenVariance<Depth>) -> TokenVariance<Depth> {
ops::product(term, self.variance())
fn finalize(&self, term: <Depth as Invariant>::Term) -> <Depth as Invariant>::Term {
//ops::product(term, self.variance())
eprintln!("==> Repetition (Product) Term");
eprintln!("\t{:?}\n", term);
let sum = ops::product(term, self.variance());
eprintln!("\tProduct: {:?}", sum);
sum
}
}

Expand Down Expand Up @@ -1384,8 +1406,8 @@ impl VarianceTerm<Breadth> for Separator {
}

impl VarianceTerm<Depth> for Separator {
fn term(&self) -> TokenVariance<Depth> {
Variance::Invariant(1.into())
fn term(&self) -> <Depth as Invariant>::Term {
BoundaryTerm::separator(One::one())
}
}

Expand Down Expand Up @@ -1451,10 +1473,10 @@ impl VarianceTerm<Breadth> for Wildcard {
}

impl VarianceTerm<Depth> for Wildcard {
fn term(&self) -> TokenVariance<Depth> {
fn term(&self) -> <Depth as Invariant>::Term {
match self {
Wildcard::Tree { .. } => Variance::unbounded(),
_ => Variance::zero(),
Wildcard::Tree { .. } => BoundaryTerm::component(Variance::unbounded()),
_ => Zero::zero(),
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/token/variance/bound.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::cmp::{self, Ordering};
use std::marker::PhantomData;
use std::num::NonZeroUsize;

use crate::token::variance::invariant::{Identity, UnitBound};
use crate::token::variance::invariant::{self, UnitBound, Zero as _};
use crate::token::variance::ops::{self, Conjunction, Disjunction, Product};
use crate::token::variance::Variance;

Expand Down Expand Up @@ -232,7 +232,7 @@ impl From<Zero> for usize {
}
}

impl Identity for Zero {
impl invariant::Zero for Zero {
fn zero() -> Self {
Zero
}
Expand Down
30 changes: 20 additions & 10 deletions src/token/variance/invariant/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod natural;
mod separation;
mod text;

use std::num::NonZeroUsize;
Expand All @@ -7,7 +8,10 @@ use crate::token::variance::bound::{BoundedVariantRange, Boundedness, OpenedUppe
use crate::token::variance::ops::{Conjunction, Disjunction, Product};
use crate::token::variance::TokenVariance;

pub use crate::token::variance::invariant::natural::{Depth, Size};
pub use crate::token::variance::invariant::natural::{Depth, DepthTerm, Size};
pub use crate::token::variance::invariant::separation::{
BoundaryTerm, SeparatedTerm, Separation, Finalize,
};
pub use crate::token::variance::invariant::text::{IntoNominalText, IntoStructuralText, Text};

pub type InvariantBound<T> = <T as BoundedVariance>::Bound;
Expand All @@ -16,17 +20,23 @@ pub trait BoundedVariance {
type Bound;
}

pub trait Identity {
pub trait Zero {
fn zero() -> Self;
}

pub trait Invariant: BoundedVariance + Sized {
type Term: InvariantTerm<Bound = Self::Bound>;
pub trait One {
fn one() -> Self;
}

// TODO: Recombine these traits? Or rename them? It's confusing that this has a bound on
// `InvariantTerm`, but its `Term` does not!
pub trait Invariant: InvariantTerm + Sized {
type Term: Zero;

fn finalize(term: TokenVariance<Self::Term>) -> TokenVariance<Self>;
fn finalize(term: Self::Term) -> TokenVariance<Self>;
}

pub trait InvariantTerm: BoundedVariance + Identity + Sized {
pub trait InvariantTerm: BoundedVariance + Zero + Sized {
fn bound(lhs: Self, rhs: Self) -> Boundedness<Self::Bound>;

fn into_lower_bound(self) -> Boundedness<Self::Bound>;
Expand Down Expand Up @@ -61,16 +71,16 @@ impl BoundedVariance for Breadth {
type Bound = ();
}

impl Identity for Breadth {
impl Zero for Breadth {
fn zero() -> Self {
Breadth
}
}

impl Invariant for Breadth {
type Term = Self;
type Term = TokenVariance<Self>;

fn finalize(term: TokenVariance<Self::Term>) -> TokenVariance<Self> {
fn finalize(term: Self::Term) -> TokenVariance<Self> {
term
}
}
Expand All @@ -85,7 +95,7 @@ impl InvariantTerm for Breadth {
}
}

#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
pub struct UnitBound;

impl<T> Conjunction<T> for UnitBound {
Expand Down
Loading

0 comments on commit 828e7cf

Please sign in to comment.