From 2da42efb6f23645388ecfaa9cfbd734fee477beb Mon Sep 17 00:00:00 2001 From: leaysgur <6259812+leaysgur@users.noreply.github.com> Date: Tue, 10 Sep 2024 07:32:28 +0000 Subject: [PATCH] refactor(regular_expression): Improve AST docs with refactoring may_contain_strings (#5665) Follow up #5661 --- .../oxc_ast/src/generated/assert_layouts.rs | 8 +-- crates/oxc_regular_expression/src/ast.rs | 12 +++- .../src/body_parser/parser.rs | 64 +++++++++---------- .../src/generated/derive_clone_in.rs | 2 +- .../src/generated/derive_content_eq.rs | 2 +- .../src/generated/derive_content_hash.rs | 2 +- 6 files changed, 46 insertions(+), 44 deletions(-) diff --git a/crates/oxc_ast/src/generated/assert_layouts.rs b/crates/oxc_ast/src/generated/assert_layouts.rs index 851fc08cc497a..636399610cafc 100644 --- a/crates/oxc_ast/src/generated/assert_layouts.rs +++ b/crates/oxc_ast/src/generated/assert_layouts.rs @@ -1506,8 +1506,8 @@ const _: () = { assert!(align_of::() == 8usize); assert!(offset_of!(CharacterClass, span) == 0usize); assert!(offset_of!(CharacterClass, negative) == 8usize); - assert!(offset_of!(CharacterClass, kind) == 9usize); - assert!(offset_of!(CharacterClass, strings) == 10usize); + assert!(offset_of!(CharacterClass, strings) == 9usize); + assert!(offset_of!(CharacterClass, kind) == 10usize); assert!(offset_of!(CharacterClass, body) == 16usize); assert!(size_of::() == 1usize); @@ -3061,8 +3061,8 @@ const _: () = { assert!(align_of::() == 4usize); assert!(offset_of!(CharacterClass, span) == 0usize); assert!(offset_of!(CharacterClass, negative) == 8usize); - assert!(offset_of!(CharacterClass, kind) == 9usize); - assert!(offset_of!(CharacterClass, strings) == 10usize); + assert!(offset_of!(CharacterClass, strings) == 9usize); + assert!(offset_of!(CharacterClass, kind) == 10usize); assert!(offset_of!(CharacterClass, body) == 12usize); assert!(size_of::() == 1usize); diff --git a/crates/oxc_regular_expression/src/ast.rs b/crates/oxc_regular_expression/src/ast.rs index e592d14b5a4e8..c7740441e8c69 100644 --- a/crates/oxc_regular_expression/src/ast.rs +++ b/crates/oxc_regular_expression/src/ast.rs @@ -213,7 +213,7 @@ pub enum CharacterClassEscapeKind { pub struct UnicodePropertyEscape<'a> { pub span: Span, pub negative: bool, - /// `true` if `UnicodeSetsMode` and `name` matched unicode property of strings. + /// `true` if `UnicodeSetsMode` and `name` matches unicode property of strings. pub strings: bool, pub name: Atom<'a>, pub value: Option>, @@ -237,8 +237,11 @@ pub struct Dot { pub struct CharacterClass<'a> { pub span: Span, pub negative: bool, - pub kind: CharacterClassContentsKind, + /// `true` if: + /// - `body` contains [`UnicodePropertyEscape`], nested [`CharacterClass`] or [`ClassStringDisjunction`] which `strings` is `true` + /// - and matches each logic depends on `kind` pub strings: bool, + pub kind: CharacterClassContentsKind, pub body: Vec<'a, CharacterClassContents<'a>>, } @@ -288,7 +291,7 @@ pub struct CharacterClassRange { #[cfg_attr(feature = "serialize", derive(Serialize, Tsify))] pub struct ClassStringDisjunction<'a> { pub span: Span, - /// `true` if body is empty or contain [`ClassString`] which `strings` is `true` + /// `true` if body is empty or contains [`ClassString`] which `strings` is `true`. pub strings: bool, pub body: Vec<'a, ClassString<'a>>, } @@ -313,6 +316,7 @@ pub struct ClassString<'a> { #[cfg_attr(feature = "serialize", derive(Serialize, Tsify))] pub struct CapturingGroup<'a> { pub span: Span, + /// Group name to be referenced by [`NamedReference`]. pub name: Option>, pub body: Disjunction<'a>, } @@ -330,6 +334,8 @@ pub struct IgnoreGroup<'a> { pub body: Disjunction<'a>, } +/// Pattern modifiers in [`IgnoreGroup`]. +/// e.g. `(?i:...)`, `(?-s:...)` #[ast] #[derive(Debug)] #[generate_derive(CloneIn, ContentEq, ContentHash)] diff --git a/crates/oxc_regular_expression/src/body_parser/parser.rs b/crates/oxc_regular_expression/src/body_parser/parser.rs index 116cb7e9cff6c..2fff63b5a42a9 100644 --- a/crates/oxc_regular_expression/src/body_parser/parser.rs +++ b/crates/oxc_regular_expression/src/body_parser/parser.rs @@ -727,7 +727,7 @@ impl<'a> PatternParser<'a> { let (kind, body) = self.parse_class_contents()?; if self.reader.eat(']') { - let strings = body.iter().any(PatternParser::may_contain_strings_in_class_contents); + let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body); // [SS:EE] CharacterClass :: [^ ClassContents ] // It is a Syntax Error if MayContainStrings of the ClassContents is true. @@ -1259,30 +1259,7 @@ impl<'a> PatternParser<'a> { let (kind, body) = self.parse_class_contents()?; if self.reader.eat(']') { - let strings = match kind { - // MayContainStrings is true - // - if ClassContents is ClassUnion - // - && ClassUnion has ClassOperands - // - && at least 1 ClassOperand has MayContainStrings: true - ast::CharacterClassContentsKind::Union => { - body.iter().any(PatternParser::may_contain_strings_in_class_contents) - } - // MayContainStrings is true - // - if ClassContents is ClassIntersection - // - && ClassIntersection has ClassOperands - // - && all ClassOperands have MayContainStrings: true - ast::CharacterClassContentsKind::Intersection => { - body.iter().all(PatternParser::may_contain_strings_in_class_contents) - } - // MayContainStrings is true - // - if ClassContents is ClassSubtraction - // - && ClassSubtraction has ClassOperands - // - && the first ClassOperand has MayContainStrings: true - ast::CharacterClassContentsKind::Subtraction => body - .iter() - .next() - .map_or(false, PatternParser::may_contain_strings_in_class_contents), - }; + let strings = PatternParser::may_contain_strings_in_class_contents(&kind, &body); // [SS:EE] NestedClass :: [^ ClassContents ] // It is a Syntax Error if MayContainStrings of the ClassContents is true. @@ -2163,27 +2140,46 @@ impl<'a> PatternParser<'a> { // --- - fn may_contain_strings_in_class_contents(item: &ast::CharacterClassContents) -> bool { - match item { + fn may_contain_strings_in_class_contents( + kind: &ast::CharacterClassContentsKind, + body: &Vec<'a, ast::CharacterClassContents<'a>>, + ) -> bool { + let may_contain_strings = |item: &ast::CharacterClassContents<'a>| match item { // MayContainStrings is true // - if ClassContents contains UnicodePropertyValueExpression // - && UnicodePropertyValueExpression is LoneUnicodePropertyNameOrValue // - && it is binary property of strings(can be true only with `UnicodeSetsMode`) - ast::CharacterClassContents::UnicodePropertyEscape(unicode_property_escape) => { - unicode_property_escape.strings - } + ast::CharacterClassContents::UnicodePropertyEscape(item) => item.strings, // MayContainStrings is true // - if ClassStringDisjunction is [empty] // - || if ClassStringDisjunction contains ClassString // - && ClassString is [empty] // - || ClassString contains 2 more ClassSetCharacters - ast::CharacterClassContents::ClassStringDisjunction(class_string_disjunction) => { - class_string_disjunction.strings - } + ast::CharacterClassContents::ClassStringDisjunction(item) => item.strings, // MayContainStrings is true // - if NestedClass has MayContainStrings: true - ast::CharacterClassContents::NestedCharacterClass(nested_class) => nested_class.strings, + ast::CharacterClassContents::NestedCharacterClass(item) => item.strings, _ => false, + }; + + match kind { + // MayContainStrings is true + // - if ClassContents is ClassUnion + // - && ClassUnion has ClassOperands + // - && at least 1 ClassOperand has MayContainStrings: true + ast::CharacterClassContentsKind::Union => body.iter().any(may_contain_strings), + // MayContainStrings is true + // - if ClassContents is ClassIntersection + // - && ClassIntersection has ClassOperands + // - && all ClassOperands have MayContainStrings: true + ast::CharacterClassContentsKind::Intersection => body.iter().all(may_contain_strings), + // MayContainStrings is true + // - if ClassContents is ClassSubtraction + // - && ClassSubtraction has ClassOperands + // - && the first ClassOperand has MayContainStrings: true + ast::CharacterClassContentsKind::Subtraction => { + body.iter().next().map_or(false, may_contain_strings) + } } } } diff --git a/crates/oxc_regular_expression/src/generated/derive_clone_in.rs b/crates/oxc_regular_expression/src/generated/derive_clone_in.rs index 7ae6fc2676767..76890af6bdf89 100644 --- a/crates/oxc_regular_expression/src/generated/derive_clone_in.rs +++ b/crates/oxc_regular_expression/src/generated/derive_clone_in.rs @@ -229,8 +229,8 @@ impl<'old_alloc, 'new_alloc> CloneIn<'new_alloc> for CharacterClass<'old_alloc> CharacterClass { span: CloneIn::clone_in(&self.span, allocator), negative: CloneIn::clone_in(&self.negative, allocator), - kind: CloneIn::clone_in(&self.kind, allocator), strings: CloneIn::clone_in(&self.strings, allocator), + kind: CloneIn::clone_in(&self.kind, allocator), body: CloneIn::clone_in(&self.body, allocator), } } diff --git a/crates/oxc_regular_expression/src/generated/derive_content_eq.rs b/crates/oxc_regular_expression/src/generated/derive_content_eq.rs index df63972e9e894..e21234c42117e 100644 --- a/crates/oxc_regular_expression/src/generated/derive_content_eq.rs +++ b/crates/oxc_regular_expression/src/generated/derive_content_eq.rs @@ -178,8 +178,8 @@ impl ContentEq for Dot { impl<'a> ContentEq for CharacterClass<'a> { fn content_eq(&self, other: &Self) -> bool { ContentEq::content_eq(&self.negative, &other.negative) - && ContentEq::content_eq(&self.kind, &other.kind) && ContentEq::content_eq(&self.strings, &other.strings) + && ContentEq::content_eq(&self.kind, &other.kind) && ContentEq::content_eq(&self.body, &other.body) } } diff --git a/crates/oxc_regular_expression/src/generated/derive_content_hash.rs b/crates/oxc_regular_expression/src/generated/derive_content_hash.rs index 58bd592818d76..1d0fecfb1a298 100644 --- a/crates/oxc_regular_expression/src/generated/derive_content_hash.rs +++ b/crates/oxc_regular_expression/src/generated/derive_content_hash.rs @@ -143,8 +143,8 @@ impl ContentHash for Dot { impl<'a> ContentHash for CharacterClass<'a> { fn content_hash(&self, state: &mut H) { ContentHash::content_hash(&self.negative, state); - ContentHash::content_hash(&self.kind, state); ContentHash::content_hash(&self.strings, state); + ContentHash::content_hash(&self.kind, state); ContentHash::content_hash(&self.body, state); } }