Skip to content

Commit

Permalink
[ruff] Add support for more re patterns (RUF055) (#15764)
Browse files Browse the repository at this point in the history
## Summary
Implements some of #14738, by adding support for 6 new patterns:
```py
re.search("abc", s) is None       # ⇒ "abc" not in s
re.search("abc", s) is not None   # ⇒ "abc" in s

re.match("abc", s) is None       # ⇒ not s.startswith("abc")  
re.match("abc", s) is not None   # ⇒ s.startswith("abc")

re.fullmatch("abc", s) is None       # ⇒ s != "abc"
re.fullmatch("abc", s) is not None   # ⇒ s == "abc"
```


## Test Plan

```shell
cargo nextest run
cargo insta review
```

And ran the fix on my startup's repo.


## Note

One minor limitation here:

```py
if not re.match('abc', s) is None:
    pass
```

will get fixed to this (technically correct, just not nice):
```py
if not not s.startswith('abc'):
    pass
```

This seems fine given that Ruff has this covered: the initial code
should be caught by
[E714](https://docs.astral.sh/ruff/rules/not-is-test/) and the fixed
code should be caught by
[SIM208](https://docs.astral.sh/ruff/rules/double-negation/).
  • Loading branch information
Garrett-R authored Jan 29, 2025
1 parent 0f1035b commit 6c1e195
Show file tree
Hide file tree
Showing 6 changed files with 344 additions and 46 deletions.
10 changes: 5 additions & 5 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF055_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

s = "str"

# this should be replaced with s.replace("abc", "")
# this should be replaced with `s.replace("abc", "")`
re.sub("abc", "", s)


Expand All @@ -17,25 +17,25 @@ def dashrepl(matchobj):

re.sub("-", dashrepl, "pro----gram-files")

# this one should be replaced with s.startswith("abc") because the Match is
# this one should be replaced with `s.startswith("abc")` because the Match is
# used in an if context for its truth value
if re.match("abc", s):
pass
if m := re.match("abc", s): # this should *not* be replaced
pass
re.match("abc", s) # this should not be replaced because match returns a Match

# this should be replaced with "abc" in s
# this should be replaced with `"abc" in s`
if re.search("abc", s):
pass
re.search("abc", s) # this should not be replaced

# this should be replaced with "abc" == s
# this should be replaced with `"abc" == s`
if re.fullmatch("abc", s):
pass
re.fullmatch("abc", s) # this should not be replaced

# this should be replaced with s.split("abc")
# this should be replaced with `s.split("abc")`
re.split("abc", s)

# these currently should not be modified because the patterns contain regex
Expand Down
52 changes: 52 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF055_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Patterns that don't just involve the call, but rather the parent expression"""
import re

s = "str"

# this should be replaced with `"abc" not in s`
re.search("abc", s) is None


# this should be replaced with `"abc" in s`
re.search("abc", s) is not None


# this should be replaced with `not s.startswith("abc")`
re.match("abc", s) is None


# this should be replaced with `s.startswith("abc")`
re.match("abc", s) is not None


# this should be replaced with `s != "abc"`
re.fullmatch("abc", s) is None


# this should be replaced with `s == "abc"`
re.fullmatch("abc", s) is not None


# this should trigger an unsafe fix because of the presence of a comment within the
# expression being replaced (which we'd lose)
if (
re.fullmatch(
"a really really really really long string",
s,
)
# with a comment here
is None
):
pass


# this should trigger a safe fix (comments are preserved given they're outside the
# expression)
if ( # leading
re.fullmatch(
"a really really really really long string",
s,
)
is None # trailing
):
pass
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ mod tests {
#[test_case(Rule::UnrawRePattern, Path::new("RUF039_concat.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_0.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_1.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_2.py"))]
#[test_case(Rule::UnnecessaryCastToInt, Path::new("RUF046.py"))]
#[test_case(Rule::PytestRaisesAmbiguousPattern, Path::new("RUF043.py"))]
#[test_case(Rule::UnnecessaryRound, Path::new("RUF057.py"))]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Vi
use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::{
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, ExprStringLiteral,
Identifier,
ExprUnaryOp, Identifier, UnaryOp,
};
use ruff_python_semantic::analyze::typing::find_binding_value;
use ruff_python_semantic::{Modules, SemanticModel};
Expand Down Expand Up @@ -111,25 +111,22 @@ pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprC
return;
}

// Here we know the pattern is a string literal with no metacharacters, so
// we can proceed with the str method replacement
// Now we know the pattern is a string literal with no metacharacters, so
// we can proceed with the str method replacement.
let new_expr = re_func.replacement();

let repl = new_expr.map(|expr| checker.generator().expr(&expr));
let mut diagnostic = Diagnostic::new(
UnnecessaryRegularExpression {
replacement: repl.clone(),
},
call.range,
re_func.range,
);

if let Some(repl) = repl {
diagnostic.set_fix(Fix::applicable_edit(
Edit::range_replacement(repl, call.range),
if checker
.comment_ranges()
.has_comments(call, checker.source())
{
Edit::range_replacement(repl, re_func.range),
if checker.comment_ranges().intersects(re_func.range) {
Applicability::Unsafe
} else {
Applicability::Safe
Expand All @@ -156,6 +153,8 @@ struct ReFunc<'a> {
kind: ReFuncKind<'a>,
pattern: &'a Expr,
string: &'a Expr,
comparison_to_none: Option<ComparisonToNone>,
range: TextRange,
}

impl<'a> ReFunc<'a> {
Expand All @@ -165,8 +164,14 @@ impl<'a> ReFunc<'a> {
func_name: &str,
) -> Option<Self> {
// the proposed fixes for match, search, and fullmatch rely on the
// return value only being used for its truth value
let in_if_context = semantic.in_boolean_test();
// return value only being used for its truth value or being compared to None
let comparison_to_none = get_comparison_to_none(semantic);
let in_truthy_context = semantic.in_boolean_test() || comparison_to_none.is_some();

let (comparison_to_none, range) = match comparison_to_none {
Some((cmp, range)) => (Some(cmp), range),
None => (None, call.range),
};

match (func_name, call.arguments.len()) {
// `split` is the safest of these to fix, as long as metacharacters
Expand All @@ -175,6 +180,8 @@ impl<'a> ReFunc<'a> {
kind: ReFuncKind::Split,
pattern: call.arguments.find_argument_value("pattern", 0)?,
string: call.arguments.find_argument_value("string", 1)?,
comparison_to_none,
range,
}),
// `sub` is only safe to fix if `repl` is a string. `re.sub` also
// allows it to be a function, which will *not* work in the str
Expand Down Expand Up @@ -209,55 +216,91 @@ impl<'a> ReFunc<'a> {
},
pattern: call.arguments.find_argument_value("pattern", 0)?,
string: call.arguments.find_argument_value("string", 2)?,
comparison_to_none,
range,
})
}
("match", 2) if in_if_context => Some(ReFunc {
("match", 2) if in_truthy_context => Some(ReFunc {
kind: ReFuncKind::Match,
pattern: call.arguments.find_argument_value("pattern", 0)?,
string: call.arguments.find_argument_value("string", 1)?,
comparison_to_none,
range,
}),
("search", 2) if in_if_context => Some(ReFunc {
("search", 2) if in_truthy_context => Some(ReFunc {
kind: ReFuncKind::Search,
pattern: call.arguments.find_argument_value("pattern", 0)?,
string: call.arguments.find_argument_value("string", 1)?,
comparison_to_none,
range,
}),
("fullmatch", 2) if in_if_context => Some(ReFunc {
("fullmatch", 2) if in_truthy_context => Some(ReFunc {
kind: ReFuncKind::Fullmatch,
pattern: call.arguments.find_argument_value("pattern", 0)?,
string: call.arguments.find_argument_value("string", 1)?,
comparison_to_none,
range,
}),
_ => None,
}
}

/// Get replacement for the call or parent expression.
///
/// Examples:
/// `re.search("abc", s) is None` => `"abc" not in s`
/// `re.search("abc", s)` => `"abc" in s`
fn replacement(&self) -> Option<Expr> {
match self.kind {
match (&self.kind, &self.comparison_to_none) {
// string.replace(pattern, repl)
ReFuncKind::Sub { repl } => repl
(ReFuncKind::Sub { repl }, _) => repl
.cloned()
.map(|repl| self.method_expr("replace", vec![self.pattern.clone(), repl])),
// string.startswith(pattern)
ReFuncKind::Match => Some(self.method_expr("startswith", vec![self.pattern.clone()])),
// string.split(pattern)
(ReFuncKind::Split, _) => Some(self.method_expr("split", vec![self.pattern.clone()])),
// pattern in string
ReFuncKind::Search => Some(self.compare_expr(CmpOp::In)),
(ReFuncKind::Search, None | Some(ComparisonToNone::IsNot)) => {
Some(ReFunc::compare_expr(self.pattern, CmpOp::In, self.string))
}
// pattern not in string
(ReFuncKind::Search, Some(ComparisonToNone::Is)) => Some(ReFunc::compare_expr(
self.pattern,
CmpOp::NotIn,
self.string,
)),
// string.startswith(pattern)
(ReFuncKind::Match, None | Some(ComparisonToNone::IsNot)) => {
Some(self.method_expr("startswith", vec![self.pattern.clone()]))
}
// not string.startswith(pattern)
(ReFuncKind::Match, Some(ComparisonToNone::Is)) => {
let expr = self.method_expr("startswith", vec![self.pattern.clone()]);
let negated_expr = Expr::UnaryOp(ExprUnaryOp {
op: UnaryOp::Not,
operand: Box::new(expr),
range: TextRange::default(),
});
Some(negated_expr)
}
// string == pattern
ReFuncKind::Fullmatch => Some(Expr::Compare(ExprCompare {
range: TextRange::default(),
left: Box::new(self.string.clone()),
ops: Box::new([CmpOp::Eq]),
comparators: Box::new([self.pattern.clone()]),
})),
// string.split(pattern)
ReFuncKind::Split => Some(self.method_expr("split", vec![self.pattern.clone()])),
(ReFuncKind::Fullmatch, None | Some(ComparisonToNone::IsNot)) => {
Some(ReFunc::compare_expr(self.string, CmpOp::Eq, self.pattern))
}
// string != pattern
(ReFuncKind::Fullmatch, Some(ComparisonToNone::Is)) => Some(ReFunc::compare_expr(
self.string,
CmpOp::NotEq,
self.pattern,
)),
}
}

/// Return a new compare expr of the form `self.pattern op self.string`
fn compare_expr(&self, op: CmpOp) -> Expr {
/// Return a new compare expr of the form `left op right`
fn compare_expr(left: &Expr, op: CmpOp, right: &Expr) -> Expr {
Expr::Compare(ExprCompare {
left: Box::new(self.pattern.clone()),
left: Box::new(left.clone()),
ops: Box::new([op]),
comparators: Box::new([self.string.clone()]),
comparators: Box::new([right.clone()]),
range: TextRange::default(),
})
}
Expand Down Expand Up @@ -302,3 +345,35 @@ fn resolve_string_literal<'a>(

None
}

#[derive(Clone, Copy, Debug)]
enum ComparisonToNone {
Is,
IsNot,
}

/// If the regex call is compared to `None`, return the comparison and its range.
/// Example: `re.search("abc", s) is None`
fn get_comparison_to_none(semantic: &SemanticModel) -> Option<(ComparisonToNone, TextRange)> {
let parent_expr = semantic.current_expression_parent()?;

let Expr::Compare(ExprCompare {
ops,
comparators,
range,
..
}) = parent_expr
else {
return None;
};

let Some(Expr::NoneLiteral(_)) = comparators.first() else {
return None;
};

match ops.as_ref() {
[CmpOp::Is] => Some((ComparisonToNone::Is, *range)),
[CmpOp::IsNot] => Some((ComparisonToNone::IsNot, *range)),
_ => None,
}
}
Loading

0 comments on commit 6c1e195

Please sign in to comment.