Skip to content

Commit

Permalink
Implementation for RUF035 split-of-static-string
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed Oct 30, 2024
1 parent 60a2dc5 commit 721fd96
Show file tree
Hide file tree
Showing 8 changed files with 585 additions and 0 deletions.
56 changes: 56 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF035.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# setup
sep = ","
no_sep = None

# positives
"""
itemA
itemB
itemC
""".split()

"a,b,c,d".split(",")
"a,b,c,d".split(None)
"a,b,c,d".split(",", 1)
"a,b,c,d".split(None, 1)
"a,b,c,d".split(sep=",")
"a,b,c,d".split(sep=None)
"a,b,c,d".split(sep=",", maxsplit=1)
"a,b,c,d".split(sep=None, maxsplit=1)
"a,b,c,d".split(maxsplit=1, sep=",")
"a,b,c,d".split(maxsplit=1, sep=None)
"a,b,c,d".split(",", maxsplit=1)
"a,b,c,d".split(None, maxsplit=1)
"a,b,c,d".split(maxsplit=1)
"a,b,c,d".split(maxsplit=1.0)
"a,b,c,d".split(maxsplit=1)
"a,b,c,d".split(maxsplit=0)
"VERB AUX PRON ADP DET".split(" ")
' 1 2 3 '.split()
'1<>2<>3<4'.split('<>')

# negatives

# test
"a,b,c,d".split(maxsplit="hello")

# variable names not implemented
"a,b,c,d".split(sep)
"a,b,c,d".split(no_sep)
for n in range(3):
"a,b,c,d".split(",", maxsplit=n)

# f-strings not yet implemented
world = "world"
_ = f"{world}_hello_world".split("_")

hello = "hello_world"
_ = f"{hello}_world".split("_")

# split on bytes not yet implemented, much less frequent
b"TesT.WwW.ExamplE.CoM".split(b".")

# str.splitlines not yet implemented
"hello\nworld".splitlines()
"hello\nworld".splitlines(keepends=True)
"hello\nworld".splitlines(keepends=False)
12 changes: 12 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
Rule::StaticJoinToFString,
// refurb
Rule::HashlibDigestHex,
// ruff
Rule::SplitOfStaticString,
]) {
if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() {
let attr = attr.as_str();
Expand All @@ -401,6 +403,16 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
string_value.to_str(),
);
}
} else if attr == "split" || attr == "rsplit" {
// "...".split(...) call
if checker.enabled(Rule::SplitOfStaticString) {
ruff::rules::split_of_static_string(
checker,
attr,
call,
string_value.to_str(),
);
}
} else if attr == "format" {
// "...".format(...) call
let location = expr.range();
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "032") => (RuleGroup::Preview, rules::ruff::rules::DecimalFromFloatLiteral),
(Ruff, "033") => (RuleGroup::Preview, rules::ruff::rules::PostInitDefault),
(Ruff, "034") => (RuleGroup::Preview, rules::ruff::rules::UselessIfElse),
(Ruff, "035") => (RuleGroup::Preview, rules::ruff::rules::SplitOfStaticString),
(Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA),
(Ruff, "101") => (RuleGroup::Stable, rules::ruff::rules::RedirectedNOQA),

Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ mod tests {
#[test_case(Rule::UselessIfElse, Path::new("RUF034.py"))]
#[test_case(Rule::RedirectedNOQA, Path::new("RUF101.py"))]
#[test_case(Rule::PostInitDefault, Path::new("RUF033.py"))]
#[test_case(Rule::SplitOfStaticString, Path::new("RUF035.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,6 @@ pub(crate) enum Context {
pub(crate) use post_init_default::*;

mod post_init_default;
pub(crate) use split_of_static_string::*;

mod split_of_static_string;
165 changes: 165 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/split_of_static_string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{
Expr, ExprCall, ExprContext, ExprList, ExprStringLiteral, StringLiteral, StringLiteralFlags,
StringLiteralValue,
};
use ruff_text_size::{Ranged, TextRange};

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for str.split calls that can be replaced with a `list` literal.
///
/// ## Why is this bad?
/// List literals are more readable and do not require the overhead of calling `str.split`.
///
/// ## Example
/// ```python
/// "a,b,c,d".split(",")
/// ```
///
/// Use instead:
/// ```python
/// ["a", "b", "c", "d"]
///
/// ## References
///
/// - [Python documentation: `str.split`](https://docs.python.org/3/library/stdtypes.html#str.split)
///
/// ```
#[violation]
pub struct SplitOfStaticString;

impl Violation for SplitOfStaticString {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;

#[derive_message_formats]
fn message(&self) -> String {
format!("Consider using a list instead of string split")
}

fn fix_title(&self) -> Option<String> {
Some(format!("Replace string split with list literal"))
}
}

fn construct_replacement(list_items: &[&str]) -> Expr {
Expr::List(ExprList {
elts: list_items
.iter()
.map(|list_item| {
Expr::StringLiteral(ExprStringLiteral {
value: StringLiteralValue::single(StringLiteral {
value: (*list_item).to_string().into_boxed_str(),
range: TextRange::default(),
flags: StringLiteralFlags::default(),
}),
range: TextRange::default(),
})
})
.collect(),
ctx: ExprContext::Load,
range: TextRange::default(),
})
}

fn split_default(str_value: &str, max_split: usize) -> Option<Expr> {
// From the Python documentation:
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
// > consecutive whitespace are regarded as a single separator, and the result will contain
// > no empty strings at the start or end if the string has leading or trailing whitespace.
// > Consequently, splitting an empty string or a string consisting of just whitespace with
// > a None separator returns [].
// https://docs.python.org/3/library/stdtypes.html#str.split
if max_split == 0 {
let list_items: Vec<&str> = str_value.split_whitespace().collect();
Some(construct_replacement(&list_items))
} else {
// Autofix for maxsplit without separator not yet implemented
None
}
}

fn split_sep(str_value: &str, sep_value: &str, max_split: usize, direction_left: bool) -> Expr {
let list_items: Vec<&str> = if direction_left && max_split > 0 {
str_value.splitn(max_split + 1, sep_value).collect()
} else if !direction_left && max_split > 0 {
str_value.rsplitn(max_split + 1, sep_value).collect()
} else if direction_left && max_split == 0 {
str_value.split(sep_value).collect()
} else {
str_value.rsplit(sep_value).collect()
};
construct_replacement(&list_items)
}

/// RUF035
pub(crate) fn split_of_static_string(
checker: &mut Checker,
attr: &str,
call: &ExprCall,
str_value: &str,
) {
let ExprCall { arguments, .. } = call;

let sep_arg = arguments.find_argument("sep", 0);
let maxsplit_arg = arguments.find_argument("maxsplit", 1);

// `split` vs `rsplit`
let direction_left = attr == "split";

let maxsplit_value = if let Some(maxsplit) = maxsplit_arg {
match maxsplit {
Expr::NumberLiteral(maxsplit_val) => {
if let Some(int_value) = maxsplit_val.value.as_int() {
if let Some(usize_value) = int_value.as_usize() {
usize_value
} else {
return;
}
} else {
return;
}
}
// Ignore when `maxsplit` is not a numeric value
_ => {
return;
}
}
} else {
0
};

let split_replacement = if let Some(sep) = sep_arg {
match sep {
Expr::NoneLiteral(_) => split_default(str_value, maxsplit_value),
Expr::StringLiteral(sep_value) => {
let sep_value_str = sep_value.value.to_str();
Some(split_sep(
str_value,
sep_value_str,
maxsplit_value,
direction_left,
))
}
// Ignore names until type inference is available
_ => {
return;
}
}
} else {
split_default(str_value, maxsplit_value)
};

let mut diagnostic = Diagnostic::new(SplitOfStaticString, call.range());
if let Some(ref replacement_expr) = split_replacement {
// Construct replacement list
let replacement = checker.generator().expr(replacement_expr);
diagnostic.set_fix(Fix::unsafe_edit(Edit::range_replacement(
replacement,
call.range(),
)));
}
checker.diagnostics.push(diagnostic);
}
Loading

0 comments on commit 721fd96

Please sign in to comment.