-
-
Notifications
You must be signed in to change notification settings - Fork 501
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(transformer): support all /regex/ to
new RegExp
transforms (#5387
) related: #4754 The implementation port from [esbuild](https://github.com/evanw/esbuild/blob/332727499e62315cff4ecaff9fa8b86336555e46/internal/js_parser/js_parser.go#L12820-L12840). And cover all babel's regexp plugins --- ## The following description was generated by `Graphite` 😋 ### TL;DR Added support for transforming various RegExp features to ensure compatibility with older JavaScript environments. ### What changed? - Implemented a new `RegExp` transformer to handle unsupported RegExp literal features - Added options to control different RegExp transformations (e.g., sticky flag, unicode flag, dot-all flag, etc.) - Updated the transformer to convert unsupported RegExp literals into `new RegExp()` constructor calls - Added test cases for different RegExp transformations - Integrated the new RegExp transformer into the existing transformation pipeline ### How to test? 1. Run the existing test suite to ensure no regressions 2. Execute the new RegExp-specific tests in the `tasks/transform_conformance/tests/esbuild-tests/test/fixtures/regexp/` directory 3. Try transforming code with various RegExp features using different target environments to verify correct transformations
- Loading branch information
Showing
35 changed files
with
476 additions
and
43 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"esbuild-regexp-lookbehind-assertions": { | ||
"chrome": "62", | ||
"deno": "1.0", | ||
"edge": "79", | ||
"firefox": "78", | ||
"hermes": "0.7", | ||
"ios": "16.4", | ||
"node": "8.10", | ||
"opera": "49", | ||
"safari": "16.4" | ||
}, | ||
"esbuild-regexp-match-indices": { | ||
"chrome": "90", | ||
"deno": "1.8", | ||
"edge": "90", | ||
"firefox": "88", | ||
"ios": "15.0", | ||
"node": "16.0", | ||
"opera": "76", | ||
"safari": "15.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
//! RegExp Transformer | ||
//! | ||
//! This module supports various RegExp plugins to handle unsupported RegExp literal features. | ||
//! When an unsupported feature is detected, these plugins convert the RegExp literal into | ||
//! a `new RegExp()` constructor call to avoid syntax errors. | ||
//! | ||
//! Note: You will need to include a polyfill for the `RegExp` constructor in your code to have the correct runtime behavior. | ||
//! | ||
//! ### ES2015 | ||
//! | ||
//! #### Sticky flag (`y`) | ||
//! - @babel/plugin-transform-sticky-regex: <https://babeljs.io/docs/en/babel-plugin-transform-sticky-regex> | ||
//! | ||
//! #### Unicode flag (`u`) | ||
//! - @babel/plugin-transform-unicode-regex: <https://babeljs.io/docs/en/babel-plugin-transform-unicode-regex> | ||
//! | ||
//! ### ES2018 | ||
//! | ||
//! #### DotAll flag (`s`) | ||
//! - @babel/plugin-transform-dotall-regex: <https://babeljs.io/docs/en/babel-plugin-transform-dotall-regex> | ||
//! - Spec: ECMAScript 2018: <https://262.ecma-international.org/9.0/#sec-get-regexp.prototype.dotAll> | ||
//! | ||
//! #### Lookbehind assertions (`/(?<=x)/` and `/(?<!x)/`) | ||
//! - Implementation: Same as esbuild's handling | ||
//! | ||
//! #### Named capture groups (`(?<name>x)`) | ||
//! - @babel/plugin-transform-named-capturing-groups-regex: <https://babeljs.io/docs/en/babel-plugin-transform-named-capturing-groups-regex> | ||
//! | ||
//! #### Unicode property escapes (`\p{...}` and `\P{...}`) | ||
//! - @babel/plugin-transform-unicode-property-regex: <https://babeljs.io/docs/en/babel-plugin-proposal-unicode-property-regex> | ||
//! | ||
//! ### ES2022 | ||
//! | ||
//! #### Match indices flag (`d`) | ||
//! - Implementation: Same as esbuild's handling | ||
//! | ||
//! ### ES2024 | ||
//! | ||
//! #### Set notation + properties of strings (`v`) | ||
//! - @babel/plugin-transform-unicode-sets-regex: <https://babeljs.io/docs/en/babel-plugin-proposal-unicode-sets-regex> | ||
//! - TC39 Proposal: <https://github.com/tc39/proposal-regexp-set-notation> | ||
mod options; | ||
|
||
use std::borrow::Cow; | ||
use std::mem; | ||
|
||
pub use options::RegExpOptions; | ||
use oxc_allocator::Box; | ||
use oxc_allocator::Vec; | ||
use oxc_ast::ast::*; | ||
use oxc_regular_expression::ast::{ | ||
CharacterClass, CharacterClassContents, LookAroundAssertionKind, Pattern, Term, | ||
}; | ||
use oxc_semantic::ReferenceFlags; | ||
use oxc_span::Atom; | ||
use oxc_traverse::{Traverse, TraverseCtx}; | ||
|
||
use crate::context::Ctx; | ||
|
||
pub struct RegExp<'a> { | ||
_ctx: Ctx<'a>, | ||
options: RegExpOptions, | ||
} | ||
|
||
impl<'a> RegExp<'a> { | ||
pub fn new(options: RegExpOptions, ctx: Ctx<'a>) -> Self { | ||
Self { _ctx: ctx, options } | ||
} | ||
} | ||
|
||
impl<'a> Traverse<'a> for RegExp<'a> { | ||
fn enter_expression( | ||
&mut self, | ||
expr: &mut Expression<'a>, | ||
ctx: &mut oxc_traverse::TraverseCtx<'a>, | ||
) { | ||
let Expression::RegExpLiteral(ref mut regexp) = expr else { | ||
return; | ||
}; | ||
|
||
if !self.has_unsupported_regular_expression_flags(regexp.regex.flags) | ||
&& self.requires_pattern_analysis() | ||
{ | ||
match try_parse_pattern(regexp, ctx) { | ||
Ok(pattern) => { | ||
let is_unsupported = self.has_unsupported_regular_expression_pattern(&pattern); | ||
regexp.regex.pattern = RegExpPattern::Pattern(pattern); | ||
if !is_unsupported { | ||
return; | ||
} | ||
} | ||
Err(err) => { | ||
regexp.regex.pattern = RegExpPattern::Invalid(err); | ||
return; | ||
} | ||
} | ||
}; | ||
|
||
let pattern_source: Cow<'_, str> = match ®exp.regex.pattern { | ||
RegExpPattern::Raw(raw) | RegExpPattern::Invalid(raw) => Cow::Borrowed(raw), | ||
RegExpPattern::Pattern(p) => Cow::Owned(p.to_string()), | ||
}; | ||
|
||
let callee = { | ||
let symbol_id = ctx.scopes().find_binding(ctx.current_scope_id(), "RegExp"); | ||
let ident = ctx.create_reference_id( | ||
regexp.span, | ||
Atom::from("RegExp"), | ||
symbol_id, | ||
ReferenceFlags::read(), | ||
); | ||
ctx.ast.expression_from_identifier_reference(ident) | ||
}; | ||
|
||
let mut arguments = ctx.ast.vec_with_capacity(2); | ||
arguments.push( | ||
ctx.ast.argument_expression( | ||
ctx.ast.expression_string_literal(regexp.span, pattern_source), | ||
), | ||
); | ||
|
||
let flags = regexp.regex.flags.to_string(); | ||
let flags = | ||
ctx.ast.argument_expression(ctx.ast.expression_string_literal(regexp.span, flags)); | ||
arguments.push(flags); | ||
|
||
*expr = ctx.ast.expression_new( | ||
regexp.span, | ||
callee, | ||
arguments, | ||
None::<TSTypeParameterInstantiation>, | ||
); | ||
} | ||
} | ||
|
||
impl<'a> RegExp<'a> { | ||
fn requires_pattern_analysis(&self) -> bool { | ||
self.options.named_capture_groups | ||
|| self.options.unicode_property_escapes | ||
|| self.options.look_behind_assertions | ||
} | ||
|
||
/// Check if the regular expression contains any unsupported flags. | ||
fn has_unsupported_regular_expression_flags(&self, flags: RegExpFlags) -> bool { | ||
flags.iter().any(|f| match f { | ||
RegExpFlags::S if self.options.dot_all_flag => true, | ||
RegExpFlags::Y if self.options.sticky_flag => true, | ||
RegExpFlags::U if self.options.unicode_flag => true, | ||
RegExpFlags::D if self.options.match_indices => true, | ||
RegExpFlags::V if self.options.set_notation => true, | ||
_ => false, | ||
}) | ||
} | ||
|
||
/// Check if the regular expression contains any unsupported syntax. | ||
/// | ||
/// Based on parsed regular expression pattern. | ||
fn has_unsupported_regular_expression_pattern(&self, pattern: &Pattern<'a>) -> bool { | ||
let check_terms = |terms: &Vec<'a, Term>| { | ||
terms.iter().any(|element| match element { | ||
Term::CapturingGroup(_) if self.options.named_capture_groups => true, | ||
Term::UnicodePropertyEscape(_) if self.options.unicode_property_escapes => true, | ||
Term::CharacterClass(character_class) if self.options.unicode_property_escapes => { | ||
has_unicode_property_escape_character_class(character_class) | ||
} | ||
Term::LookAroundAssertion(assertion) | ||
if self.options.look_behind_assertions | ||
&& matches!( | ||
assertion.kind, | ||
LookAroundAssertionKind::Lookbehind | ||
| LookAroundAssertionKind::NegativeLookbehind | ||
) => | ||
{ | ||
true | ||
} | ||
_ => false, | ||
}) | ||
}; | ||
|
||
pattern.body.body.iter().any(|alternative| check_terms(&alternative.body)) | ||
} | ||
} | ||
|
||
fn has_unicode_property_escape_character_class(character_class: &CharacterClass) -> bool { | ||
character_class.body.iter().any(|element| match element { | ||
CharacterClassContents::UnicodePropertyEscape(_) => true, | ||
CharacterClassContents::NestedCharacterClass(character_class) => { | ||
has_unicode_property_escape_character_class(character_class) | ||
} | ||
_ => false, | ||
}) | ||
} | ||
|
||
fn try_parse_pattern<'a>( | ||
literal: &mut RegExpLiteral<'a>, | ||
ctx: &mut TraverseCtx<'a>, | ||
) -> Result<Box<'a, Pattern<'a>>, &'a str> { | ||
// Take the ownership of the pattern | ||
let regexp_pattern = mem::replace(&mut literal.regex.pattern, RegExpPattern::Raw("")); | ||
|
||
match regexp_pattern { | ||
RegExpPattern::Raw(raw) => { | ||
use oxc_regular_expression::{ParserOptions, PatternParser}; | ||
let options = ParserOptions { | ||
span_offset: literal.span.start + 1, // exclude `/` | ||
unicode_mode: literal.regex.flags.contains(RegExpFlags::U) | ||
|| literal.regex.flags.contains(RegExpFlags::V), | ||
unicode_sets_mode: literal.regex.flags.contains(RegExpFlags::V), | ||
}; | ||
PatternParser::new(ctx.ast.allocator, raw, options) | ||
.parse() | ||
.map_or_else(|_| Err(raw), |p| Ok(ctx.alloc(p))) | ||
} | ||
RegExpPattern::Pattern(pattern) => Ok(pattern), | ||
RegExpPattern::Invalid(raw) => Err(raw), | ||
} | ||
} |
Oops, something went wrong.