Skip to content

Commit

Permalink
feat(transformer): support all /regex/ to new RegExp transforms (#5387
Browse files Browse the repository at this point in the history
)

related: #4754

The implementation port from [esbuild](https://github.com/evanw/esbuild/blob/332727499e62315cff4ecaff9fa8b86336555e46/internal/js_parser/js_parser.go#L12820-L12840). And cover all babel's regexp plugins

---

## The following description was generated by `Graphite` 😋

### TL;DR

Added support for transforming various RegExp features to ensure compatibility with older JavaScript environments.

### What changed?

- Implemented a new `RegExp` transformer to handle unsupported RegExp literal features
- Added options to control different RegExp transformations (e.g., sticky flag, unicode flag, dot-all flag, etc.)
- Updated the transformer to convert unsupported RegExp literals into `new RegExp()` constructor calls
- Added test cases for different RegExp transformations
- Integrated the new RegExp transformer into the existing transformation pipeline

### How to test?

1. Run the existing test suite to ensure no regressions
2. Execute the new RegExp-specific tests in the `tasks/transform_conformance/tests/esbuild-tests/test/fixtures/regexp/` directory
3. Try transforming code with various RegExp features using different target environments to verify correct transformations
  • Loading branch information
Dunqing committed Sep 5, 2024
1 parent d9d29f8 commit c59d8b3
Show file tree
Hide file tree
Showing 35 changed files with 476 additions and 43 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 8 additions & 7 deletions crates/oxc_transformer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ test = false
doctest = false

[dependencies]
oxc_ast = { workspace = true }
oxc_span = { workspace = true }
oxc_allocator = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_syntax = { workspace = true, features = ["to_js_string"] }
oxc_traverse = { workspace = true }
oxc_semantic = { workspace = true }
oxc_ast = { workspace = true }
oxc_span = { workspace = true }
oxc_allocator = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_syntax = { workspace = true, features = ["to_js_string"] }
oxc_traverse = { workspace = true }
oxc_semantic = { workspace = true }
oxc_regular_expression = { workspace = true }

dashmap = { workspace = true }
indexmap = { workspace = true }
Expand Down
9 changes: 8 additions & 1 deletion crates/oxc_transformer/src/env/data/babel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,17 @@ use crate::env::{targets::version::Version, Versions};
fn features() -> &'static FxHashMap<String, Versions> {
static FEATURES: OnceLock<FxHashMap<String, Versions>> = OnceLock::new();
FEATURES.get_or_init(|| {
let map: FxHashMap<String, FxHashMap<String, String>> =
let mut map: FxHashMap<String, FxHashMap<String, String>> =
serde_json::from_str(include_str!("./@babel/compat_data/data/plugins.json"))
.expect("failed to parse json");

map.extend(
serde_json::from_str::<FxHashMap<String, FxHashMap<String, String>>>(include_str!(
"./esbuild/features.json"
))
.expect("failed to parse json"),
);

map.into_iter()
.map(|(feature, mut versions)| {
(feature, {
Expand Down
23 changes: 23 additions & 0 deletions crates/oxc_transformer/src/env/data/esbuild/features.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"esbuild-regexp-lookbehind-assertions": {
"chrome": "62",
"deno": "1.0",
"edge": "79",
"firefox": "78",
"hermes": "0.7",
"ios": "16.4",
"node": "8.10",
"opera": "49",
"safari": "16.4"
},
"esbuild-regexp-match-indices": {
"chrome": "90",
"deno": "1.8",
"edge": "90",
"firefox": "88",
"ios": "15.0",
"node": "16.0",
"opera": "76",
"safari": "15.0"
}
}
7 changes: 6 additions & 1 deletion crates/oxc_transformer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ mod es2019;
mod es2020;
mod es2021;
mod react;
mod regexp;
mod typescript;

mod helpers {
Expand All @@ -41,6 +42,7 @@ use oxc_diagnostics::OxcDiagnostic;
use oxc_semantic::{ScopeTree, SymbolTable};
use oxc_span::{SourceType, SPAN};
use oxc_traverse::{traverse_mut, Traverse, TraverseCtx};
use regexp::RegExp;

pub use crate::{
compiler_assumptions::CompilerAssumptions,
Expand Down Expand Up @@ -74,6 +76,7 @@ pub struct Transformer<'a> {
x2_es2018: ES2018<'a>,
x2_es2016: ES2016<'a>,
x3_es2015: ES2015<'a>,
x4_regexp: RegExp<'a>,
}

impl<'a> Transformer<'a> {
Expand Down Expand Up @@ -102,7 +105,8 @@ impl<'a> Transformer<'a> {
x2_es2019: ES2019::new(options.es2019, Rc::clone(&ctx)),
x2_es2018: ES2018::new(options.es2018, Rc::clone(&ctx)),
x2_es2016: ES2016::new(options.es2016, Rc::clone(&ctx)),
x3_es2015: ES2015::new(options.es2015, ctx),
x3_es2015: ES2015::new(options.es2015, Rc::clone(&ctx)),
x4_regexp: RegExp::new(options.regexp, ctx),
}
}

Expand Down Expand Up @@ -177,6 +181,7 @@ impl<'a> Traverse<'a> for Transformer<'a> {
self.x2_es2018.enter_expression(expr, ctx);
self.x2_es2016.enter_expression(expr, ctx);
self.x3_es2015.enter_expression(expr, ctx);
self.x4_regexp.enter_expression(expr, ctx);
}

fn exit_expression(&mut self, expr: &mut Expression<'a>, ctx: &mut TraverseCtx<'a>) {
Expand Down
27 changes: 27 additions & 0 deletions crates/oxc_transformer/src/options/transformer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::{
es2021::ES2021Options,
options::babel::BabelOptions,
react::ReactOptions,
regexp::RegExpOptions,
typescript::TypeScriptOptions,
};

Expand All @@ -38,6 +39,8 @@ pub struct TransformOptions {
/// [preset-react](https://babeljs.io/docs/babel-preset-react)
pub react: ReactOptions,

pub regexp: RegExpOptions,

pub es2015: ES2015Options,

pub es2016: ES2016Options,
Expand All @@ -60,6 +63,7 @@ impl TransformOptions {
es2019: ES2019Options::from_targets_and_bugfixes(targets, bugfixes),
es2020: ES2020Options::from_targets_and_bugfixes(targets, bugfixes),
es2021: ES2021Options::from_targets_and_bugfixes(targets, bugfixes),
regexp: RegExpOptions::from_targets_and_bugfixes(targets, bugfixes),
..Default::default()
}
}
Expand Down Expand Up @@ -215,6 +219,29 @@ impl TransformOptions {
}
};

let regexp = transformer_options.regexp;
if !regexp.sticky_flag {
transformer_options.regexp.sticky_flag = options.has_plugin("transform-sticky-regex");
}
if !regexp.unicode_flag {
transformer_options.regexp.unicode_flag = options.has_plugin("transform-unicode-regex");
}
if !regexp.dot_all_flag {
transformer_options.regexp.dot_all_flag = options.has_plugin("transform-dotall-regex");
}
if !regexp.named_capture_groups {
transformer_options.regexp.named_capture_groups =
options.has_plugin("transform-named-capturing-groups-regex");
}
if !regexp.unicode_property_escapes {
transformer_options.regexp.unicode_property_escapes =
options.has_plugin("transform-unicode-property-regex");
}
if !regexp.set_notation {
transformer_options.regexp.set_notation =
options.has_plugin("transform-unicode-sets-regex");
}

transformer_options.assumptions = if options.assumptions.is_null() {
CompilerAssumptions::default()
} else {
Expand Down
218 changes: 218 additions & 0 deletions crates/oxc_transformer/src/regexp/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
//! RegExp Transformer
//!
//! This module supports various RegExp plugins to handle unsupported RegExp literal features.
//! When an unsupported feature is detected, these plugins convert the RegExp literal into
//! a `new RegExp()` constructor call to avoid syntax errors.
//!
//! Note: You will need to include a polyfill for the `RegExp` constructor in your code to have the correct runtime behavior.
//!
//! ### ES2015
//!
//! #### Sticky flag (`y`)
//! - @babel/plugin-transform-sticky-regex: <https://babeljs.io/docs/en/babel-plugin-transform-sticky-regex>
//!
//! #### Unicode flag (`u`)
//! - @babel/plugin-transform-unicode-regex: <https://babeljs.io/docs/en/babel-plugin-transform-unicode-regex>
//!
//! ### ES2018
//!
//! #### DotAll flag (`s`)
//! - @babel/plugin-transform-dotall-regex: <https://babeljs.io/docs/en/babel-plugin-transform-dotall-regex>
//! - Spec: ECMAScript 2018: <https://262.ecma-international.org/9.0/#sec-get-regexp.prototype.dotAll>
//!
//! #### Lookbehind assertions (`/(?<=x)/` and `/(?<!x)/`)
//! - Implementation: Same as esbuild's handling
//!
//! #### Named capture groups (`(?<name>x)`)
//! - @babel/plugin-transform-named-capturing-groups-regex: <https://babeljs.io/docs/en/babel-plugin-transform-named-capturing-groups-regex>
//!
//! #### Unicode property escapes (`\p{...}` and `\P{...}`)
//! - @babel/plugin-transform-unicode-property-regex: <https://babeljs.io/docs/en/babel-plugin-proposal-unicode-property-regex>
//!
//! ### ES2022
//!
//! #### Match indices flag (`d`)
//! - Implementation: Same as esbuild's handling
//!
//! ### ES2024
//!
//! #### Set notation + properties of strings (`v`)
//! - @babel/plugin-transform-unicode-sets-regex: <https://babeljs.io/docs/en/babel-plugin-proposal-unicode-sets-regex>
//! - TC39 Proposal: <https://github.com/tc39/proposal-regexp-set-notation>
mod options;

use std::borrow::Cow;
use std::mem;

pub use options::RegExpOptions;
use oxc_allocator::Box;
use oxc_allocator::Vec;
use oxc_ast::ast::*;
use oxc_regular_expression::ast::{
CharacterClass, CharacterClassContents, LookAroundAssertionKind, Pattern, Term,
};
use oxc_semantic::ReferenceFlags;
use oxc_span::Atom;
use oxc_traverse::{Traverse, TraverseCtx};

use crate::context::Ctx;

pub struct RegExp<'a> {
_ctx: Ctx<'a>,
options: RegExpOptions,
}

impl<'a> RegExp<'a> {
pub fn new(options: RegExpOptions, ctx: Ctx<'a>) -> Self {
Self { _ctx: ctx, options }
}
}

impl<'a> Traverse<'a> for RegExp<'a> {
fn enter_expression(
&mut self,
expr: &mut Expression<'a>,
ctx: &mut oxc_traverse::TraverseCtx<'a>,
) {
let Expression::RegExpLiteral(ref mut regexp) = expr else {
return;
};

if !self.has_unsupported_regular_expression_flags(regexp.regex.flags)
&& self.requires_pattern_analysis()
{
match try_parse_pattern(regexp, ctx) {
Ok(pattern) => {
let is_unsupported = self.has_unsupported_regular_expression_pattern(&pattern);
regexp.regex.pattern = RegExpPattern::Pattern(pattern);
if !is_unsupported {
return;
}
}
Err(err) => {
regexp.regex.pattern = RegExpPattern::Invalid(err);
return;
}
}
};

let pattern_source: Cow<'_, str> = match &regexp.regex.pattern {
RegExpPattern::Raw(raw) | RegExpPattern::Invalid(raw) => Cow::Borrowed(raw),
RegExpPattern::Pattern(p) => Cow::Owned(p.to_string()),
};

let callee = {
let symbol_id = ctx.scopes().find_binding(ctx.current_scope_id(), "RegExp");
let ident = ctx.create_reference_id(
regexp.span,
Atom::from("RegExp"),
symbol_id,
ReferenceFlags::read(),
);
ctx.ast.expression_from_identifier_reference(ident)
};

let mut arguments = ctx.ast.vec_with_capacity(2);
arguments.push(
ctx.ast.argument_expression(
ctx.ast.expression_string_literal(regexp.span, pattern_source),
),
);

let flags = regexp.regex.flags.to_string();
let flags =
ctx.ast.argument_expression(ctx.ast.expression_string_literal(regexp.span, flags));
arguments.push(flags);

*expr = ctx.ast.expression_new(
regexp.span,
callee,
arguments,
None::<TSTypeParameterInstantiation>,
);
}
}

impl<'a> RegExp<'a> {
fn requires_pattern_analysis(&self) -> bool {
self.options.named_capture_groups
|| self.options.unicode_property_escapes
|| self.options.look_behind_assertions
}

/// Check if the regular expression contains any unsupported flags.
fn has_unsupported_regular_expression_flags(&self, flags: RegExpFlags) -> bool {
flags.iter().any(|f| match f {
RegExpFlags::S if self.options.dot_all_flag => true,
RegExpFlags::Y if self.options.sticky_flag => true,
RegExpFlags::U if self.options.unicode_flag => true,
RegExpFlags::D if self.options.match_indices => true,
RegExpFlags::V if self.options.set_notation => true,
_ => false,
})
}

/// Check if the regular expression contains any unsupported syntax.
///
/// Based on parsed regular expression pattern.
fn has_unsupported_regular_expression_pattern(&self, pattern: &Pattern<'a>) -> bool {
let check_terms = |terms: &Vec<'a, Term>| {
terms.iter().any(|element| match element {
Term::CapturingGroup(_) if self.options.named_capture_groups => true,
Term::UnicodePropertyEscape(_) if self.options.unicode_property_escapes => true,
Term::CharacterClass(character_class) if self.options.unicode_property_escapes => {
has_unicode_property_escape_character_class(character_class)
}
Term::LookAroundAssertion(assertion)
if self.options.look_behind_assertions
&& matches!(
assertion.kind,
LookAroundAssertionKind::Lookbehind
| LookAroundAssertionKind::NegativeLookbehind
) =>
{
true
}
_ => false,
})
};

pattern.body.body.iter().any(|alternative| check_terms(&alternative.body))
}
}

fn has_unicode_property_escape_character_class(character_class: &CharacterClass) -> bool {
character_class.body.iter().any(|element| match element {
CharacterClassContents::UnicodePropertyEscape(_) => true,
CharacterClassContents::NestedCharacterClass(character_class) => {
has_unicode_property_escape_character_class(character_class)
}
_ => false,
})
}

fn try_parse_pattern<'a>(
literal: &mut RegExpLiteral<'a>,
ctx: &mut TraverseCtx<'a>,
) -> Result<Box<'a, Pattern<'a>>, &'a str> {
// Take the ownership of the pattern
let regexp_pattern = mem::replace(&mut literal.regex.pattern, RegExpPattern::Raw(""));

match regexp_pattern {
RegExpPattern::Raw(raw) => {
use oxc_regular_expression::{ParserOptions, PatternParser};
let options = ParserOptions {
span_offset: literal.span.start + 1, // exclude `/`
unicode_mode: literal.regex.flags.contains(RegExpFlags::U)
|| literal.regex.flags.contains(RegExpFlags::V),
unicode_sets_mode: literal.regex.flags.contains(RegExpFlags::V),
};
PatternParser::new(ctx.ast.allocator, raw, options)
.parse()
.map_or_else(|_| Err(raw), |p| Ok(ctx.alloc(p)))
}
RegExpPattern::Pattern(pattern) => Ok(pattern),
RegExpPattern::Invalid(raw) => Err(raw),
}
}
Loading

0 comments on commit c59d8b3

Please sign in to comment.