Skip to content

Commit

Permalink
#18 Case-insensitive operator for the Token macro
Browse files Browse the repository at this point in the history
  • Loading branch information
Eliah-Lakhin committed Jul 5, 2024
1 parent bdb70e2 commit d0407df
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 1 deletion.
4 changes: 3 additions & 1 deletion work/crates/derive/src/token/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ use crate::{
token::{
automata::{AutomataImpl, Scope, Terminal, TokenAutomata},
opt::Opt,
regex::{Regex, RegexImpl},
regex::{Regex, RegexImpl, TransformConfig},
variant::{TokenVariant, EOI, MISMATCH},
},
utils::{
Expand Down Expand Up @@ -205,6 +205,7 @@ impl TryFrom<DeriveInput> for TokenInput {
));
}

regex.transform(&TransformConfig::default());
regex.inline(&inline_map, &variant_map)?;

let _ = inline_map.insert(name, regex);
Expand Down Expand Up @@ -277,6 +278,7 @@ impl TryFrom<DeriveInput> for TokenInput {

if let Some((_, rule)) = &mut variant.rule {
parsable += 1;
rule.transform(&TransformConfig::default());
rule.inline(&inline_map, &variant_map)?;
alphabet.append(rule.alphabet());
}
Expand Down
183 changes: 183 additions & 0 deletions work/crates/derive/src/token/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
// All rights reserved. //
////////////////////////////////////////////////////////////////////////////////

use std::mem::take;

use proc_macro2::{Ident, Span};
use syn::{
parse::{Lookahead1, ParseStream},
Expand Down Expand Up @@ -75,6 +77,12 @@ impl RegexImpl for Regex {

Self::Operand(Operand::Dump(_, inner)) => inner.name(),

Self::Operand(Operand::Transform(_, inner)) => {
let inner = expect_some!(inner, "Empty transformation.",);

inner.name()
}

Self::Operand(_) => None,

Self::Binary(left, Operator::Concat, right) => {
Expand All @@ -92,12 +100,51 @@ impl RegexImpl for Regex {
}
}

fn transform(&mut self, config: &TransformConfig) {
match self {
Self::Operand(Operand::Unresolved(_)) => system_panic!("Unresolved operand."),

Self::Operand(Operand::Dump(_, inner)) => inner.transform(config),

Self::Operand(Operand::Transform(feature, inner)) => {
let mut inner = expect_some!(take(inner), "Empty transformation.",);

let config = config.add(feature);

inner.transform(&config);

*self = *inner;
}

Self::Operand(Operand::Class(span, class)) => {
if let Some(new_regex) = config.transform_class(span, class) {
*self = new_regex;
}
}

Self::Operand(Operand::Exclusion(set)) => {
config.transform_char_set(set);
}

Self::Binary(left, _, right) => {
left.transform(config);
right.transform(config);
}

Self::Unary(_, inner) => inner.transform(config),
}
}

fn alphabet(&self) -> Alphabet {
match self {
Self::Operand(Operand::Unresolved(_)) => system_panic!("Unresolved operand."),

Self::Operand(Operand::Dump(_, inner)) => inner.alphabet(),

Self::Operand(Operand::Transform(_, _)) => {
system_panic!("Unresolved transformation.");
}

Self::Operand(Operand::Class(_, Class::Char(ch))) => Set::new([*ch]),

Self::Operand(Operand::Class(_, _)) => Set::empty(),
Expand All @@ -124,6 +171,10 @@ impl RegexImpl for Regex {

Self::Operand(Operand::Dump(_, inner)) => inner.expand(alphabet),

Self::Operand(Operand::Transform(_, _)) => {
system_panic!("Unresolved transformation.");
}

Self::Operand(Operand::Exclusion(set)) => {
let mut alphabet = alphabet.clone();

Expand Down Expand Up @@ -288,6 +339,12 @@ impl RegexImpl for Regex {
inner.inline(inline_map, variant_map)?;
}

Self::Operand(Operand::Transform(_, inner)) => {
let inner = expect_some!(inner, "Empty transformation.",);

inner.inline(inline_map, variant_map)?;
}

Self::Operand(Operand::Class(_, _)) => (),

Self::Operand(Operand::Exclusion(_)) => (),
Expand Down Expand Up @@ -315,6 +372,12 @@ impl RegexImpl for Regex {
inner.set_span(span);
}

Self::Operand(Operand::Transform(_, inner)) => {
let inner = expect_some!(inner, "Empty transformation.",);

inner.set_span(span);
}

Self::Operand(Operand::Class(op_span, _)) => {
*op_span = span;
}
Expand Down Expand Up @@ -355,6 +418,10 @@ impl RegexImpl for Regex {
))
}

Self::Operand(Operand::Transform(_, _)) => {
system_panic!("Unresolved transformation.");
}

Self::Binary(left, op, right) => {
let left = left.encode(scope)?;
let right = right.encode(scope)?;
Expand Down Expand Up @@ -383,6 +450,8 @@ impl RegexImpl for Regex {
pub(super) trait RegexImpl {
fn name(&self) -> Option<String>;

fn transform(&mut self, config: &TransformConfig);

fn alphabet(&self) -> Alphabet;

fn expand(&mut self, alphabet: &Alphabet);
Expand All @@ -394,6 +463,93 @@ pub(super) trait RegexImpl {
fn encode(&self, scope: &mut Scope) -> Result<TokenAutomata>;
}

#[derive(Clone)]
pub(super) enum TransformFeature {
CaseInsensitive,
}

#[derive(Default, Clone, Copy)]
pub(super) struct TransformConfig {
case_insensitive: bool,
}

impl TransformConfig {
#[inline(always)]
pub(super) fn add(mut self, feature: &TransformFeature) -> Self {
match feature {
TransformFeature::CaseInsensitive => {
self.case_insensitive = true;
}
}

self
}

#[inline(always)]
pub(super) fn transform_class(&self, span: &Span, class: &Class) -> Option<Regex> {
let Class::Char(ch) = class else {
return None;
};

let mut transformed = self.transform_char(*ch).into_iter();

let first = transformed.next()?;

let mut accumulator = Regex::Operand(Operand::Class(*span, Class::Char(first)));

for next in transformed {
accumulator = Regex::Binary(
Box::new(accumulator),
Operator::Union,
Box::new(Regex::Operand(Operand::Class(*span, Class::Char(next)))),
);
}

Some(accumulator)
}

#[inline(always)]
pub(super) fn transform_char_set(&self, char_set: &mut CharSet) {
if !self.case_insensitive {
return;
}

let mut transformed = Set::empty();

for class in char_set.classes.iter() {
let Class::Char(ch) = class else {
continue;
};

transformed.append(self.transform_char(*ch));
}

for ch in transformed {
let _ = char_set.classes.insert(Class::Char(ch));
}
}

#[inline(always)]
fn transform_char(&self, ch: char) -> Set<char> {
if !self.case_insensitive {
return Set::empty();
}

let mut uppercase = ch.to_uppercase().collect::<Set<char>>();
let lowercase = ch.to_lowercase().collect::<Set<char>>();

if uppercase == lowercase {
return Set::empty();
}

uppercase.append(lowercase);

let _ = uppercase.insert(ch);

uppercase
}
}

#[derive(Clone, Copy)]
pub(super) enum Operator {
Union = 10,
Expand Down Expand Up @@ -475,6 +631,7 @@ impl ExpressionOperator for Operator {
pub(super) enum Operand {
Unresolved(Ident),
Dump(Span, Box<Regex>),
Transform(TransformFeature, Option<Box<Regex>>),
Class(Span, Class),
Exclusion(CharSet),
}
Expand Down Expand Up @@ -548,6 +705,24 @@ impl ExpressionOperand<Operator> for Operand {
return Ok(Regex::Operand(Operand::Dump(span, Box::new(inner))));
}

if lookahead.peek(functions_kw::i) {
let _ = input.parse::<functions_kw::i>()?;

let content;
parenthesized!(content in input);

let inner = content.parse::<Regex>()?;

if !content.is_empty() {
return Err(content.error("Unexpected expression end."));
}

return Ok(Regex::Operand(Operand::Transform(
TransformFeature::CaseInsensitive,
Some(Box::new(inner)),
)));
}

if lookahead.peek(syn::Ident) {
let ident = input.parse::<Ident>()?;

Expand Down Expand Up @@ -593,6 +768,10 @@ impl ExpressionOperand<Operator> for Operand {
return true;
}

if input.peek(functions_kw::i) {
return true;
}

if input.peek(syn::Ident) {
return true;
}
Expand All @@ -604,3 +783,7 @@ impl ExpressionOperand<Operator> for Operand {
false
}
}

mod functions_kw {
syn::custom_keyword!(i);
}

0 comments on commit d0407df

Please sign in to comment.