From d75ddbd2b53a3a6b89f296133b77209f3043a635 Mon Sep 17 00:00:00 2001 From: Cameron McHenry Date: Sun, 22 Sep 2024 19:24:57 -0400 Subject: [PATCH] refactor(linter): use parsed patterns in `no-empty-character-class` rule --- .../rules/eslint/no_empty_character_class.rs | 97 +++++++++++++++---- .../snapshots/no_empty_character_class.snap | 92 ++++++++++++------ 2 files changed, 136 insertions(+), 53 deletions(-) diff --git a/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs b/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs index 69e71e51e27b63..7db6cfdb201735 100644 --- a/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs +++ b/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs @@ -1,16 +1,18 @@ +use memchr::memchr2; // Ported from https://github.com/eslint/eslint/blob/main/lib/rules/no-empty-character-class.js -use lazy_static::lazy_static; use oxc_ast::AstKind; use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; +use oxc_regular_expression::ast::{ + Alternative, CharacterClass, CharacterClassContents, Disjunction, Pattern, Term, +}; use oxc_span::Span; -use regex::Regex; use crate::{context::LintContext, rule::Rule, AstNode}; fn no_empty_character_class_diagnostic(span: Span) -> OxcDiagnostic { - OxcDiagnostic::warn("Empty character class") - .with_help("Try to remove empty character class `[]` in regexp literal") + OxcDiagnostic::warn("Empty character class will not match anything") + .with_help("Remove the empty character class: `[]`") .with_label(span) } @@ -34,26 +36,77 @@ declare_oxc_lint!( impl Rule for NoEmptyCharacterClass { fn run<'a>(&self, node: &AstNode<'a>, ctx: &LintContext<'a>) { - lazy_static! { - /* - * plain-English description of the following regexp: - * 0. `^` fix the match at the beginning of the string - * 1. `([^\\[]|\\.|\[([^\\\]]|\\.)+\])*`: regexp contents; 0 or more of the following - * 1.0. `[^\\[]`: any character that's not a `\` or a `[` (anything but escape sequences and character classes) - * 1.1. `\\.`: an escape sequence - * 1.2. `\[([^\\\]]|\\.)+\]`: a character class that isn't empty - * 2. `$`: fix the match at the end of the string - */ - static ref NO_EMPTY_CLASS_REGEX_PATTERN: Regex = - Regex::new(r"^([^\\\[]|\\.|\[([^\\\]]|\\.)+\])*$").unwrap(); - } - if let AstKind::RegExpLiteral(lit) = node.kind() { - if !NO_EMPTY_CLASS_REGEX_PATTERN - .is_match(lit.regex.pattern.source_text(ctx.source_text()).as_ref()) + let Some(pattern) = lit.regex.pattern.as_pattern() else { + return; + }; + + // Skip if the pattern doesn't contain a `[` or `]` character + if memchr2(b'[', b']', lit.regex.pattern.source_text(ctx.source_text()).as_bytes()) + .is_none() { - ctx.diagnostic(no_empty_character_class_diagnostic(lit.span)); + return; + } + + visit_terms(pattern, &mut |term| { + if let Term::CharacterClass(class) = term { + check_character_class(ctx, class); + } + }); + } + } +} + +fn check_character_class(ctx: &LintContext, class: &CharacterClass) { + // Class has nothing in it, example: `/[]/` + if !class.negative && class.body.is_empty() { + ctx.diagnostic(no_empty_character_class_diagnostic(class.span)); + return; + } + + // Class has something in it, but might contain empty nested character classes, + // example: `/[[]]/` + for term in &class.body { + if let CharacterClassContents::NestedCharacterClass(class) = term { + check_character_class(ctx, class); + } + } +} + +// TODO: Replace with proper regex AST visitor when available +/// Calls the given closure on every [`Term`] in the [`Pattern`]. +fn visit_terms<'a, F: FnMut(&'a Term<'a>)>(pattern: &'a Pattern, f: &mut F) { + visit_terms_disjunction(&pattern.body, f); +} + +/// Calls the given closure on every [`Term`] in the [`Disjunction`]. +fn visit_terms_disjunction<'a, F: FnMut(&'a Term<'a>)>(disjunction: &'a Disjunction, f: &mut F) { + for alternative in &disjunction.body { + visit_terms_alternative(alternative, f); + } +} + +/// Calls the given closure on every [`Term`] in the [`Alternative`]. +fn visit_terms_alternative<'a, F: FnMut(&'a Term<'a>)>(alternative: &'a Alternative, f: &mut F) { + for term in &alternative.body { + match term { + Term::LookAroundAssertion(lookaround) => { + f(term); + visit_terms_disjunction(&lookaround.body, f); + } + Term::Quantifier(quant) => { + f(term); + f(&quant.body); + } + Term::CapturingGroup(group) => { + f(term); + visit_terms_disjunction(&group.body, f); + } + Term::IgnoreGroup(group) => { + f(term); + visit_terms_disjunction(&group.body, f); } + _ => f(term), } } } @@ -88,6 +141,8 @@ fn test() { ("var foo = /\\[[]/;", None), ("var foo = /\\[\\[\\]a-z[]/;", None), ("var foo = /[]]/d;", None), + ("var foo = /[[][]]/v;", None), + ("var foo = /[[]]|[]/v;", None), ]; Tester::new(NoEmptyCharacterClass::NAME, pass, fail).test_and_snapshot(); diff --git a/crates/oxc_linter/src/snapshots/no_empty_character_class.snap b/crates/oxc_linter/src/snapshots/no_empty_character_class.snap index 35f2a9cb7e446a..2d0095eb7a998f 100644 --- a/crates/oxc_linter/src/snapshots/no_empty_character_class.snap +++ b/crates/oxc_linter/src/snapshots/no_empty_character_class.snap @@ -1,58 +1,86 @@ --- source: crates/oxc_linter/src/tester.rs --- - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:16] 1 │ var foo = /^abc[]/; - · ──────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:15] 1 │ var foo = /foo[]bar/; - · ────────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:15] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:20] 1 │ if (foo.match(/^abc[]/)) {} - · ──────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:5] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:10] 1 │ if (/^abc[]/.test(foo)) {} - · ──────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:12] 1 │ var foo = /[]]/; - · ───── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:14] 1 │ var foo = /\[[]/; - · ────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:21] 1 │ var foo = /\[\[\]a-z[]/; - · ───────────── + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]` - ⚠ eslint(no-empty-character-class): Empty character class - ╭─[no_empty_character_class.tsx:1:11] + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:12] 1 │ var foo = /[]]/d; - · ────── + · ── + ╰──── + help: Remove the empty character class: `[]` + + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:13] + 1 │ var foo = /[[][]]/v; + · ── + ╰──── + help: Remove the empty character class: `[]` + + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:15] + 1 │ var foo = /[[][]]/v; + · ── + ╰──── + help: Remove the empty character class: `[]` + + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:13] + 1 │ var foo = /[[]]|[]/v; + · ── + ╰──── + help: Remove the empty character class: `[]` + + ⚠ eslint(no-empty-character-class): Empty character class will not match anything + ╭─[no_empty_character_class.tsx:1:17] + 1 │ var foo = /[[]]|[]/v; + · ── ╰──── - help: Try to remove empty character class `[]` in regexp literal + help: Remove the empty character class: `[]`