Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruff] re and regex calls with unraw string as first argument (RUF039) #14446

Merged
merged 11 commits into from
Nov 19, 2024
Merged
55 changes: 55 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF051.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import re
import regex

# Errors
re.compile('single free-spacing', flags=re.X)
re.findall('si\ngle')
re.finditer("dou\ble")
re.fullmatch('''t\riple single''')
re.match("""\triple double""")
re.search('two', 'args')
re.split("raw", r'second')
re.sub(u'''nicode''', u"f(?i)rst")
re.subn(b"""ytes are""", f"\u006e")

regex.compile('single free-spacing', flags=regex.X)
regex.findall('si\ngle')
regex.finditer("dou\ble")
regex.fullmatch('''t\riple single''')
regex.match("""\triple double""")
regex.search('two', 'args')
regex.split("raw", r'second')
regex.sub(u'''nicode''', u"f(?i)rst")
regex.subn(b"""ytes are""", f"\u006e")

regex.template("""(?m)
(?:ulti)?
(?=(?<!(?<=(?!l)))
l(?i:ne)
""", flags = regex.X)


# No errors
re.compile(R'uppercase')
re.findall(not_literal)
re.finditer(0, literal_but_not_string)
re.fullmatch() # no first argument
re.match('string' f'''concatenation''')
re.search(R"raw" r'concatenation')
re.split(rf"multiple", f"""lags""")
re.sub(FR'ee', '''as in free speech''')
re.subn(br"""eak your machine with rm -""", rf"""/""")

regex.compile(R'uppercase')
regex.findall(not_literal)
regex.finditer(0, literal_but_not_string)
regex.fullmatch() # no first argument
regex.match('string' f'''concatenation''')
regex.search(R"raw" r'concatenation')
regex.split(rf"multiple", f"""lags""")
regex.sub(FR'ee', '''as in free speech''')
regex.subn(br"""eak your machine with rm -""", rf"""/""")

regex.splititer(both, non_literal)
regex.subf(f, lambda _: r'means', '"format"')
regex.subfn(fn, f'''a$1n't''', lambda: "'function'")
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::MapIntVersionParsing) {
ruff::rules::map_int_version_parsing(checker, call);
}
if checker.enabled(Rule::UnrawRePattern) {
ruff::rules::unraw_re_pattern(checker, call);
}
}
Expr::Dict(dict) => {
if checker.any_enabled(&[
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "036") => (RuleGroup::Preview, rules::ruff::rules::NoneNotAtEndOfUnion),
(Ruff, "038") => (RuleGroup::Preview, rules::ruff::rules::RedundantBoolLiteral),
(Ruff, "048") => (RuleGroup::Preview, rules::ruff::rules::MapIntVersionParsing),
(Ruff, "051") => (RuleGroup::Preview, rules::ruff::rules::UnrawRePattern),
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
(Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA),
(Ruff, "101") => (RuleGroup::Stable, rules::ruff::rules::RedirectedNOQA),

Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ mod tests {
#[test_case(Rule::MutableDataclassDefault, Path::new("RUF008_attrs.py"))]
#[test_case(Rule::MapIntVersionParsing, Path::new("RUF048.py"))]
#[test_case(Rule::MapIntVersionParsing, Path::new("RUF048_1.py"))]
#[test_case(Rule::UnrawRePattern, Path::new("RUF051.py"))]
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!(
"preview__{}_{}",
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub(crate) use static_key_dict_comprehension::*;
pub(crate) use test_rules::*;
pub(crate) use unnecessary_iterable_allocation_for_first_element::*;
pub(crate) use unnecessary_key_check::*;
pub(crate) use unraw_re_pattern::*;
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
pub(crate) use unsafe_markup_use::*;
pub(crate) use unused_async::*;
pub(crate) use unused_noqa::*;
Expand Down Expand Up @@ -74,6 +75,7 @@ mod suppression_comment_visitor;
pub(crate) mod test_rules;
mod unnecessary_iterable_allocation_for_first_element;
mod unnecessary_key_check;
mod unraw_re_pattern;
mod unsafe_markup_use;
mod unused_async;
mod unused_noqa;
Expand Down
176 changes: 176 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/unraw_re_pattern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{Expr, ExprBytesLiteral, ExprCall, ExprStringLiteral};
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::{Ranged, TextRange};
use std::fmt::{Display, Formatter};

use crate::checkers::ast::Checker;

/// ## What it does
/// Reports the following `re` and `regex` calls when
/// their first arguments are not raw strings:
///
/// - Both modules: `compile`, `findall`, `finditer`,
/// `fullmatch`, `match`, `search`, `split`, `sub`, `subn`.
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
/// - `regex`-specific: `splititer`, `subf`, `subfn`, `template`.
///
/// ## Why is this bad?
/// Regular expressions should be written
/// using raw strings to avoid double escaping.
///
/// ## Example
///
/// ```python
/// re.compile("foo\\bar")
/// ```
///
/// Use instead:
///
/// ```python
/// re.compile(r"foo\bar")
/// ```
#[violation]
pub struct UnrawRePattern {
module: RegexModule,
func: String,
kind: PatternKind,
}

impl Violation for UnrawRePattern {
#[derive_message_formats]
fn message(&self) -> String {
let Self { module, func, kind } = &self;
let call = format!("`{module}.{func}()`");

match kind {
PatternKind::String => format!("First argument to {call} is not raw string"),
PatternKind::Bytes => format!("First argument to {call} is not raw bytes literal"),
}
}

fn fix_title(&self) -> Option<String> {
match self.kind {
PatternKind::String => Some("Replace with raw string".to_string()),
PatternKind::Bytes => Some("Replace with raw bytes literal".to_string()),
}
}
}

#[derive(Debug, Eq, PartialEq)]
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
enum RegexModule {
Re,
Regex,
}

impl RegexModule {
fn is_regex(&self) -> bool {
matches!(self, RegexModule::Regex)
}
}

impl Display for RegexModule {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
RegexModule::Re => "re",
RegexModule::Regex => "regex",
}
)
}
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
}

#[derive(Debug, Eq, PartialEq)]
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
enum PatternKind {
String,
Bytes,
}

/// RUF051
pub(crate) fn unraw_re_pattern(checker: &mut Checker, call: &ExprCall) {
let semantic = checker.semantic();

if !semantic.seen_module(Modules::RE) && !semantic.seen_module(Modules::REGEX) {
return;
}

let Some((module, func)) = regex_module_and_func(semantic, call.func.as_ref()) else {
return;
};
let Some((kind, range)) = pattern_kind_and_range(call.arguments.args.as_ref()) else {
return;
};

let diagnostic = Diagnostic::new(UnrawRePattern { module, func, kind }, range);

checker.diagnostics.push(diagnostic);
}

fn regex_module_and_func(semantic: &SemanticModel, expr: &Expr) -> Option<(RegexModule, String)> {
let qualified_name = semantic.resolve_qualified_name(expr)?;

let (module, func) = match qualified_name.segments() {
[module, func] => match *module {
"re" => (RegexModule::Re, *func),
"regex" => (RegexModule::Regex, *func),
_ => return None,
},
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
_ => return None,
};

if is_shared(func) || module.is_regex() && is_regex_specific(func) {
return Some((module, func.to_string()));
}
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved

None
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved
}

fn pattern_kind_and_range(arguments: &[Expr]) -> Option<(PatternKind, TextRange)> {
let first = arguments.first()?;
let range = first.range();

let pattern_kind = match first {
Expr::StringLiteral(ExprStringLiteral { value, .. }) => {
if value.is_implicit_concatenated() || value.is_raw() {
return None;
}
InSyncWithFoo marked this conversation as resolved.
Show resolved Hide resolved

PatternKind::String
}

Expr::BytesLiteral(ExprBytesLiteral { value, .. }) => {
if value.is_implicit_concatenated() || value.is_raw() {
return None;
}

PatternKind::Bytes
}

_ => return None,
};

Some((pattern_kind, range))
}

/// Whether `func` is an attribute of both `re` and `regex`.
fn is_shared(func: &str) -> bool {
matches!(
func,
"compile"
| "findall"
| "finditer"
| "fullmatch"
| "match"
| "search"
| "split"
| "sub"
| "subn"
)
}

/// Whether `func` is an extension specific to `regex`.
fn is_regex_specific(func: &str) -> bool {
matches!(func, "splititer" | "subf" | "subfn" | "template")
}
Loading
Loading