From d25fa26fbed878fd4de18aa9742b84ae1fb7ef20 Mon Sep 17 00:00:00 2001 From: Garrett-R Date: Sat, 25 Jan 2025 22:17:49 -0600 Subject: [PATCH] [`ruff`] Add support for more patterns (`RUF055`) --- .../resources/test/fixtures/ruff/RUF055_0.py | 10 +- .../resources/test/fixtures/ruff/RUF055_2.py | 27 +++ crates/ruff_linter/src/rules/ruff/mod.rs | 1 + .../rules/unnecessary_regular_expression.rs | 179 ++++++++++++++---- ...f__tests__preview__RUF055_RUF055_0.py.snap | 20 +- ...f__tests__preview__RUF055_RUF055_2.py.snap | 108 +++++++++++ 6 files changed, 288 insertions(+), 57 deletions(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/ruff/RUF055_2.py create mode 100644 crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_2.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py index 14c70f190fab0c..81835c506d61d7 100644 --- a/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py @@ -2,7 +2,7 @@ s = "str" -# this should be replaced with s.replace("abc", "") +# this should be replaced with `s.replace("abc", "")` re.sub("abc", "", s) @@ -17,7 +17,7 @@ def dashrepl(matchobj): re.sub("-", dashrepl, "pro----gram-files") -# this one should be replaced with s.startswith("abc") because the Match is +# this one should be replaced with `s.startswith("abc")` because the Match is # used in an if context for its truth value if re.match("abc", s): pass @@ -25,17 +25,17 @@ def dashrepl(matchobj): pass re.match("abc", s) # this should not be replaced because match returns a Match -# this should be replaced with "abc" in s +# this should be replaced with `"abc" in s` if re.search("abc", s): pass re.search("abc", s) # this should not be replaced -# this should be replaced with "abc" == s +# this should be replaced with `"abc" == s` if re.fullmatch("abc", s): pass re.fullmatch("abc", s) # this should not be replaced -# this should be replaced with s.split("abc") +# this should be replaced with `s.split("abc")`` re.split("abc", s) # these currently should not be modified because the patterns contain regex diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_2.py b/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_2.py new file mode 100644 index 00000000000000..7e3752baa9c430 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/ruff/RUF055_2.py @@ -0,0 +1,27 @@ +"""Patterns that don't just involve the call, but rather the parent expression""" +import re + +s = "str" + +# this should be replaced with `"abc" not in s` +re.search("abc", s) is None + + +# this shuold be replaced with `"abc" in s` +re.search("abc", s) is not None + + +# this should be replaced with `not s.startswith("abc")` +re.match("abc", s) is None + + +# this should be replaced with `s.startswith("abc")` +re.match("abc", s) is not None + + +# this should be replaced with `s != "abc"` +re.fullmatch("abc", s) is None + + +# this should be replaced with `s == "abc"` +re.fullmatch("abc", s) is not None diff --git a/crates/ruff_linter/src/rules/ruff/mod.rs b/crates/ruff_linter/src/rules/ruff/mod.rs index 3c413d571af9ba..1c9d718ae4fd17 100644 --- a/crates/ruff_linter/src/rules/ruff/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/mod.rs @@ -422,6 +422,7 @@ mod tests { #[test_case(Rule::UnrawRePattern, Path::new("RUF039_concat.py"))] #[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_0.py"))] #[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_1.py"))] + #[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_2.py"))] #[test_case(Rule::UnnecessaryCastToInt, Path::new("RUF046.py"))] #[test_case(Rule::PytestRaisesAmbiguousPattern, Path::new("RUF043.py"))] #[test_case(Rule::UnnecessaryRound, Path::new("RUF057.py"))] diff --git a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs index fe263f1926bb23..90896a6664856a 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/unnecessary_regular_expression.rs @@ -3,11 +3,11 @@ use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Vi use ruff_macros::{derive_message_formats, ViolationMetadata}; use ruff_python_ast::{ Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, ExprStringLiteral, - Identifier, + ExprUnaryOp, Identifier, UnaryOp, }; use ruff_python_semantic::analyze::typing::find_binding_value; use ruff_python_semantic::{Modules, SemanticModel}; -use ruff_text_size::TextRange; +use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; @@ -111,21 +111,32 @@ pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprC return; } - // Here we know the pattern is a string literal with no metacharacters, so - // we can proceed with the str method replacement - let new_expr = re_func.replacement(); + // Now we know the pattern is a string literal with no metacharacters, so + // we can proceed with the str method replacement. + // + // We first check the complex case, where more than just the call need replacing + // Example: `re.search("abc", s) is not None` => `"abc" in s` + let (mut new_expr, mut call_range) = match re_func.get_parent_replacement(semantic) { + Some((expr, range)) => (Some(expr), range), + None => (None, TextRange::default()), + }; + // Second, we check the simple case, where only the call needs replacing + if new_expr.is_none() { + new_expr = re_func.get_call_replacement(); + call_range = call.range; + } let repl = new_expr.map(|expr| checker.generator().expr(&expr)); let mut diagnostic = Diagnostic::new( UnnecessaryRegularExpression { replacement: repl.clone(), }, - call.range, + call_range, ); if let Some(repl) = repl { diagnostic.set_fix(Fix::applicable_edit( - Edit::range_replacement(repl, call.range), + Edit::range_replacement(repl, call_range), if checker .comment_ranges() .has_comments(call, checker.source()) @@ -211,67 +222,112 @@ impl<'a> ReFunc<'a> { string: call.arguments.find_argument_value("string", 2)?, }) } - ("match", 2) if in_if_context => Some(ReFunc { - kind: ReFuncKind::Match, - pattern: call.arguments.find_argument_value("pattern", 0)?, - string: call.arguments.find_argument_value("string", 1)?, - }), - ("search", 2) if in_if_context => Some(ReFunc { - kind: ReFuncKind::Search, - pattern: call.arguments.find_argument_value("pattern", 0)?, - string: call.arguments.find_argument_value("string", 1)?, - }), - ("fullmatch", 2) if in_if_context => Some(ReFunc { - kind: ReFuncKind::Fullmatch, - pattern: call.arguments.find_argument_value("pattern", 0)?, - string: call.arguments.find_argument_value("string", 1)?, - }), + ("match", 2) if in_if_context || get_comparison_to_none(semantic).is_some() => { + Some(ReFunc { + kind: ReFuncKind::Match, + pattern: call.arguments.find_argument_value("pattern", 0)?, + string: call.arguments.find_argument_value("string", 1)?, + }) + } + ("search", 2) if in_if_context || get_comparison_to_none(semantic).is_some() => { + Some(ReFunc { + kind: ReFuncKind::Search, + pattern: call.arguments.find_argument_value("pattern", 0)?, + string: call.arguments.find_argument_value("string", 1)?, + }) + } + ("fullmatch", 2) if in_if_context || get_comparison_to_none(semantic).is_some() => { + Some(ReFunc { + kind: ReFuncKind::Fullmatch, + pattern: call.arguments.find_argument_value("pattern", 0)?, + string: call.arguments.find_argument_value("string", 1)?, + }) + } _ => None, } } - fn replacement(&self) -> Option { + /// Get replacement for the parent expression. + /// Example: `re.search("abc", s) is None` => `"abc" not in s` + fn get_parent_replacement(&self, semantic: &'a SemanticModel) -> Option<(Expr, TextRange)> { + let Some((comparison, range)) = get_comparison_to_none(semantic) else { + return None; + }; + match self.kind { + // pattern in string / pattern not in string + ReFuncKind::Search => match comparison { + ComparisonToNone::Is => Some((self.compare_expr(CmpOp::NotIn), range)), + ComparisonToNone::IsNot => Some((self.compare_expr(CmpOp::In), range)), + }, + // string.startswith(pattern) / not string.startswith(pattern) + ReFuncKind::Match => match comparison { + ComparisonToNone::Is => Some(( + self.method_expr("startswith", vec![self.pattern.clone()], true), + range, + )), + ComparisonToNone::IsNot => Some(( + self.method_expr("startswith", vec![self.pattern.clone()], false), + range, + )), + }, + // string == pattern / string != pattern + ReFuncKind::Fullmatch => match comparison { + ComparisonToNone::Is => Some((self.compare_expr(CmpOp::NotEq), range)), + ComparisonToNone::IsNot => Some((self.compare_expr(CmpOp::Eq), range)), + }, + _ => None, + } + } + + fn get_call_replacement(&self) -> Option { match self.kind { // string.replace(pattern, repl) ReFuncKind::Sub { repl } => repl .cloned() - .map(|repl| self.method_expr("replace", vec![self.pattern.clone(), repl])), + .map(|repl| self.method_expr("replace", vec![self.pattern.clone(), repl], false)), // string.startswith(pattern) - ReFuncKind::Match => Some(self.method_expr("startswith", vec![self.pattern.clone()])), + ReFuncKind::Match => { + Some(self.method_expr("startswith", vec![self.pattern.clone()], false)) + } // pattern in string ReFuncKind::Search => Some(self.compare_expr(CmpOp::In)), // string == pattern - ReFuncKind::Fullmatch => Some(Expr::Compare(ExprCompare { - range: TextRange::default(), - left: Box::new(self.string.clone()), - ops: Box::new([CmpOp::Eq]), - comparators: Box::new([self.pattern.clone()]), - })), + ReFuncKind::Fullmatch => Some(self.compare_expr(CmpOp::Eq)), // string.split(pattern) - ReFuncKind::Split => Some(self.method_expr("split", vec![self.pattern.clone()])), + ReFuncKind::Split => Some(self.method_expr("split", vec![self.pattern.clone()], false)), } } - /// Return a new compare expr of the form `self.pattern op self.string` + /// Return a new compare expr of the form `self.pattern op self.string` (or the reverse) fn compare_expr(&self, op: CmpOp) -> Expr { - Expr::Compare(ExprCompare { - left: Box::new(self.pattern.clone()), - ops: Box::new([op]), - comparators: Box::new([self.string.clone()]), - range: TextRange::default(), - }) + match op { + // Example: 'abc' in s + CmpOp::In | CmpOp::NotIn => Expr::Compare(ExprCompare { + left: Box::new(self.pattern.clone()), + ops: Box::new([op]), + comparators: Box::new([self.string.clone()]), + range: TextRange::default(), + }), + // Example: s == 'abc' + _ => Expr::Compare(ExprCompare { + left: Box::new(self.string.clone()), + ops: Box::new([op]), + comparators: Box::new([self.pattern.clone()]), + range: TextRange::default(), + }), + } } /// Return a new method call expression on `self.string` with `args` like /// `self.string.method(args...)` - fn method_expr(&self, method: &str, args: Vec) -> Expr { + fn method_expr(&self, method: &str, args: Vec, negate: bool) -> Expr { let method = Expr::Attribute(ExprAttribute { value: Box::new(self.string.clone()), attr: Identifier::new(method, TextRange::default()), ctx: ExprContext::Load, range: TextRange::default(), }); - Expr::Call(ExprCall { + let call_expr = Expr::Call(ExprCall { func: Box::new(method), arguments: Arguments { args: args.into_boxed_slice(), @@ -279,7 +335,17 @@ impl<'a> ReFunc<'a> { range: TextRange::default(), }, range: TextRange::default(), - }) + }); + + if negate { + Expr::UnaryOp(ExprUnaryOp { + op: UnaryOp::Not, + operand: Box::new(call_expr), + range: TextRange::default(), + }) + } else { + call_expr + } } } @@ -302,3 +368,32 @@ fn resolve_string_literal<'a>( None } + +enum ComparisonToNone { + Is, + IsNot, +} + +/// If the regex call is compared to `None`, return the comparison and its range. +/// Example: `re.search("abc", s) is None` +fn get_comparison_to_none(semantic: &SemanticModel) -> Option<(ComparisonToNone, TextRange)> { + if let Some(parent_expr) = semantic.current_expression_parent() { + if let Expr::Compare(ExprCompare { + ops, comparators, .. + }) = parent_expr + { + if ops.len() == 1 { + if let Some(right) = comparators.first() { + if right.is_none_literal_expr() { + return match ops[0] { + CmpOp::Is => Some((ComparisonToNone::Is, parent_expr.range())), + CmpOp::IsNot => Some((ComparisonToNone::IsNot, parent_expr.range())), + _ => None, + }; + } + } + } + } + } + None +} diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap index a2a3ee23048d52..9aea3d19ac278a 100644 --- a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_0.py.snap @@ -4,7 +4,7 @@ snapshot_kind: text --- RUF055_0.py:6:1: RUF055 [*] Plain string pattern passed to `re` function | -5 | # this should be replaced with s.replace("abc", "") +5 | # this should be replaced with `s.replace("abc", "")` 6 | re.sub("abc", "", s) | ^^^^^^^^^^^^^^^^^^^^ RUF055 | @@ -13,7 +13,7 @@ RUF055_0.py:6:1: RUF055 [*] Plain string pattern passed to `re` function ℹ Safe fix 3 3 | s = "str" 4 4 | -5 5 | # this should be replaced with s.replace("abc", "") +5 5 | # this should be replaced with `s.replace("abc", "")` 6 |-re.sub("abc", "", s) 6 |+s.replace("abc", "") 7 7 | @@ -22,7 +22,7 @@ RUF055_0.py:6:1: RUF055 [*] Plain string pattern passed to `re` function RUF055_0.py:22:4: RUF055 [*] Plain string pattern passed to `re` function | -20 | # this one should be replaced with s.startswith("abc") because the Match is +20 | # this one should be replaced with `s.startswith("abc")` because the Match is 21 | # used in an if context for its truth value 22 | if re.match("abc", s): | ^^^^^^^^^^^^^^^^^^ RUF055 @@ -33,7 +33,7 @@ RUF055_0.py:22:4: RUF055 [*] Plain string pattern passed to `re` function ℹ Safe fix 19 19 | -20 20 | # this one should be replaced with s.startswith("abc") because the Match is +20 20 | # this one should be replaced with `s.startswith("abc")` because the Match is 21 21 | # used in an if context for its truth value 22 |-if re.match("abc", s): 22 |+if s.startswith("abc"): @@ -43,7 +43,7 @@ RUF055_0.py:22:4: RUF055 [*] Plain string pattern passed to `re` function RUF055_0.py:29:4: RUF055 [*] Plain string pattern passed to `re` function | -28 | # this should be replaced with "abc" in s +28 | # this should be replaced with `"abc" in s` 29 | if re.search("abc", s): | ^^^^^^^^^^^^^^^^^^^ RUF055 30 | pass @@ -54,7 +54,7 @@ RUF055_0.py:29:4: RUF055 [*] Plain string pattern passed to `re` function ℹ Safe fix 26 26 | re.match("abc", s) # this should not be replaced because match returns a Match 27 27 | -28 28 | # this should be replaced with "abc" in s +28 28 | # this should be replaced with `"abc" in s` 29 |-if re.search("abc", s): 29 |+if "abc" in s: 30 30 | pass @@ -63,7 +63,7 @@ RUF055_0.py:29:4: RUF055 [*] Plain string pattern passed to `re` function RUF055_0.py:34:4: RUF055 [*] Plain string pattern passed to `re` function | -33 | # this should be replaced with "abc" == s +33 | # this should be replaced with `"abc" == s` 34 | if re.fullmatch("abc", s): | ^^^^^^^^^^^^^^^^^^^^^^ RUF055 35 | pass @@ -74,7 +74,7 @@ RUF055_0.py:34:4: RUF055 [*] Plain string pattern passed to `re` function ℹ Safe fix 31 31 | re.search("abc", s) # this should not be replaced 32 32 | -33 33 | # this should be replaced with "abc" == s +33 33 | # this should be replaced with `"abc" == s` 34 |-if re.fullmatch("abc", s): 34 |+if s == "abc": 35 35 | pass @@ -83,7 +83,7 @@ RUF055_0.py:34:4: RUF055 [*] Plain string pattern passed to `re` function RUF055_0.py:39:1: RUF055 [*] Plain string pattern passed to `re` function | -38 | # this should be replaced with s.split("abc") +38 | # this should be replaced with `s.split("abc")`` 39 | re.split("abc", s) | ^^^^^^^^^^^^^^^^^^ RUF055 40 | @@ -94,7 +94,7 @@ RUF055_0.py:39:1: RUF055 [*] Plain string pattern passed to `re` function ℹ Safe fix 36 36 | re.fullmatch("abc", s) # this should not be replaced 37 37 | -38 38 | # this should be replaced with s.split("abc") +38 38 | # this should be replaced with `s.split("abc")`` 39 |-re.split("abc", s) 39 |+s.split("abc") 40 40 | diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_2.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_2.py.snap new file mode 100644 index 00000000000000..bcece70614575b --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__preview__RUF055_RUF055_2.py.snap @@ -0,0 +1,108 @@ +--- +source: crates/ruff_linter/src/rules/ruff/mod.rs +snapshot_kind: text +--- +RUF055_2.py:7:1: RUF055 [*] Plain string pattern passed to `re` function + | +6 | # this should be replaced with `"abc" not in s` +7 | re.search("abc", s) is None + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055 + | + = help: Replace with `"abc" not in s` + +ℹ Safe fix +4 4 | s = "str" +5 5 | +6 6 | # this should be replaced with `"abc" not in s` +7 |-re.search("abc", s) is None + 7 |+"abc" not in s +8 8 | +9 9 | +10 10 | # this shuold be replaced with `"abc" in s` + +RUF055_2.py:11:1: RUF055 [*] Plain string pattern passed to `re` function + | +10 | # this shuold be replaced with `"abc" in s` +11 | re.search("abc", s) is not None + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055 + | + = help: Replace with `"abc" in s` + +ℹ Safe fix +8 8 | +9 9 | +10 10 | # this shuold be replaced with `"abc" in s` +11 |-re.search("abc", s) is not None + 11 |+"abc" in s +12 12 | +13 13 | +14 14 | # this should be replaced with `not s.startswith("abc")` + +RUF055_2.py:15:1: RUF055 [*] Plain string pattern passed to `re` function + | +14 | # this should be replaced with `not s.startswith("abc")` +15 | re.match("abc", s) is None + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055 + | + = help: Replace with `not s.startswith("abc")` + +ℹ Safe fix +12 12 | +13 13 | +14 14 | # this should be replaced with `not s.startswith("abc")` +15 |-re.match("abc", s) is None + 15 |+not s.startswith("abc") +16 16 | +17 17 | +18 18 | # this should be replaced with `s.startswith("abc")` + +RUF055_2.py:19:1: RUF055 [*] Plain string pattern passed to `re` function + | +18 | # this should be replaced with `s.startswith("abc")` +19 | re.match("abc", s) is not None + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055 + | + = help: Replace with `s.startswith("abc")` + +ℹ Safe fix +16 16 | +17 17 | +18 18 | # this should be replaced with `s.startswith("abc")` +19 |-re.match("abc", s) is not None + 19 |+s.startswith("abc") +20 20 | +21 21 | +22 22 | # this should be replaced with `s != "abc"` + +RUF055_2.py:23:1: RUF055 [*] Plain string pattern passed to `re` function + | +22 | # this should be replaced with `s != "abc"` +23 | re.fullmatch("abc", s) is None + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055 + | + = help: Replace with `s != "abc"` + +ℹ Safe fix +20 20 | +21 21 | +22 22 | # this should be replaced with `s != "abc"` +23 |-re.fullmatch("abc", s) is None + 23 |+s != "abc" +24 24 | +25 25 | +26 26 | # this should be replaced with `s == "abc"` + +RUF055_2.py:27:1: RUF055 [*] Plain string pattern passed to `re` function + | +26 | # this should be replaced with `s == "abc"` +27 | re.fullmatch("abc", s) is not None + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055 + | + = help: Replace with `s == "abc"` + +ℹ Safe fix +24 24 | +25 25 | +26 26 | # this should be replaced with `s == "abc"` +27 |-re.fullmatch("abc", s) is not None + 27 |+s == "abc"