diff --git a/Cargo.lock b/Cargo.lock index be9c99c71e9fc2..6ec9c3b2ac5577 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2454,6 +2454,7 @@ dependencies = [ "insta", "itertools 0.11.0", "ruff_python_ast", + "ruff_python_index", "ruff_python_parser", "ruff_source_file", "ruff_text_size", diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py new file mode 100644 index 00000000000000..2488732a8168fc --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py @@ -0,0 +1,40 @@ +# this line has a non-empty comment and is OK + # this line is also OK, but the three following lines are not +# + # + # + +# this non-empty comment has trailing whitespace and is OK + +# Many codebases use multiple `#` characters on a single line to visually +# separate sections of code, so we don't consider these empty comments. + +########################################## + +# trailing `#` characters are not considered empty comments ### + + +def foo(): # this comment is OK, the one below is not + pass # + + +# the lines below have no comments and are OK +def bar(): + pass + + +# "Empty comments" are common in block comments +# to add legibility. For example: +# +# The above line's "empty comment" is likely +# intentional and is considered OK. + + +# lines in multi-line strings/comments whose last non-whitespace character is a `#` +# do not count as empty comments +""" +The following lines are all fine: +# + # + # +""" diff --git a/crates/ruff_linter/src/checkers/physical_lines.rs b/crates/ruff_linter/src/checkers/physical_lines.rs index a2b8b98b85c343..2025e1e32be52f 100644 --- a/crates/ruff_linter/src/checkers/physical_lines.rs +++ b/crates/ruff_linter/src/checkers/physical_lines.rs @@ -1,4 +1,7 @@ //! Lint rules based on checking physical lines. + +use std::collections::HashSet; + use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; @@ -32,9 +35,18 @@ pub(crate) fn check_physical_lines( let enforce_blank_line_contains_whitespace = settings.rules.enabled(Rule::BlankLineWithWhitespace); let enforce_copyright_notice = settings.rules.enabled(Rule::MissingCopyrightNotice); + let enforce_empty_comment = settings.rules.enabled(Rule::EmptyComment); let mut doc_lines_iter = doc_lines.iter().peekable(); + let block_comment_line_offsets: HashSet = indexer + .comment_ranges() + .block_comments(locator) + .into_iter() + .flatten() + .map(|o| locator.line_start(o)) + .collect(); + for line in locator.contents().universal_newlines() { while doc_lines_iter .next_if(|doc_line_start| line.range().contains_inclusive(**doc_line_start)) @@ -68,6 +80,15 @@ pub(crate) fn check_physical_lines( diagnostics.push(diagnostic); } } + + if enforce_empty_comment + && !block_comment_line_offsets.contains(&line.start()) + && indexer.comment_ranges().intersects(line.range()) + { + if let Some(diagnostic) = pylint::rules::empty_comment(&line) { + diagnostics.push(diagnostic); + } + } } if enforce_no_newline_at_end_of_file { diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 550960ce275afa..df4cbb196cbf60 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -265,6 +265,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "R1733") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryDictIndexLookup), (Pylint, "R1736") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryListIndexLookup), (Pylint, "R2004") => (RuleGroup::Stable, rules::pylint::rules::MagicValueComparison), + (Pylint, "R2044") => (RuleGroup::Stable, rules::pylint::rules::EmptyComment), (Pylint, "R5501") => (RuleGroup::Stable, rules::pylint::rules::CollapsibleElseIf), (Pylint, "R6201") => (RuleGroup::Preview, rules::pylint::rules::LiteralMembership), #[allow(deprecated)] diff --git a/crates/ruff_linter/src/registry.rs b/crates/ruff_linter/src/registry.rs index 2e6fcc199580e4..3fba58247ab056 100644 --- a/crates/ruff_linter/src/registry.rs +++ b/crates/ruff_linter/src/registry.rs @@ -250,6 +250,7 @@ impl Rule { Rule::BidirectionalUnicode | Rule::BlankLineWithWhitespace | Rule::DocLineTooLong + | Rule::EmptyComment | Rule::LineTooLong | Rule::MissingCopyrightNotice | Rule::MissingNewlineAtEndOfFile diff --git a/crates/ruff_linter/src/rules/pylint/mod.rs b/crates/ruff_linter/src/rules/pylint/mod.rs index 303395de504064..02161bad780b79 100644 --- a/crates/ruff_linter/src/rules/pylint/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/mod.rs @@ -40,6 +40,7 @@ mod tests { )] #[test_case(Rule::ComparisonWithItself, Path::new("comparison_with_itself.py"))] #[test_case(Rule::EqWithoutHash, Path::new("eq_without_hash.py"))] + #[test_case(Rule::EmptyComment, Path::new("empty_comment.py"))] #[test_case(Rule::ManualFromImport, Path::new("import_aliasing.py"))] #[test_case(Rule::SingleStringSlots, Path::new("single_string_slots.py"))] #[test_case(Rule::SysExitAlias, Path::new("sys_exit_alias_0.py"))] diff --git a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs new file mode 100644 index 00000000000000..ae1f462286eb15 --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs @@ -0,0 +1,88 @@ +use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_trivia::is_python_whitespace; +use ruff_source_file::newlines::Line; +use ruff_text_size::{TextRange, TextSize}; + +/// ## What it does +/// Checks for a # symbol appearing on a line not followed by an actual comment. +/// +/// ## Why is this bad? +/// Empty comments don't provide any clarity to the code, and just add clutter. +/// Either add a comment or delete the empty comment. +/// +/// ## Example +/// ```python +/// class Foo: # +/// pass +/// ``` +/// +/// Use instead: +/// ```python +/// class Foo: +/// pass +/// ``` +/// +/// ## References +/// - [Pylint documentation](https://pylint.pycqa.org/en/latest/user_guide/messages/refactor/empty-comment.html) +#[violation] +pub struct EmptyComment; + +impl Violation for EmptyComment { + const FIX_AVAILABILITY: FixAvailability = FixAvailability::Always; + + #[derive_message_formats] + fn message(&self) -> String { + format!("Line with empty comment") + } + + fn fix_title(&self) -> Option { + Some(format!("Delete the empty comment")) + } +} + +/// PLR2044 +pub(crate) fn empty_comment(line: &Line) -> Option { + let first_hash_col = line + .as_str() + .char_indices() + .find(|&(_, char)| char == '#')? // indicates the line does not contain a comment + .0; + + let last_non_whitespace_col = line + .as_str() + .char_indices() + .rev() + .find(|&(_, char)| !is_python_whitespace(char)) + .unwrap() // already verified at least one '#' char is in the line + .0; + + if last_non_whitespace_col != first_hash_col { + return None; // the comment is not empty + } + + let last = match line + .as_str() + .char_indices() + .rev() + .find(|&(_, c)| !is_python_whitespace(c) && c != '#') + { + #[allow(clippy::cast_possible_truncation)] // TODO: justify safety + Some((i, _second_to_last_non_whitespace_char)) => { + TextSize::new((line.start().to_usize() + i + 1) as u32) + } + None => line.start(), + }; + + Some( + Diagnostic::new( + EmptyComment, + #[allow(clippy::cast_possible_truncation)] // TODO: justify safety + TextRange::new( + line.start() + TextSize::new(first_hash_col as u32), + line.end(), + ), + ) + .with_fix(Fix::safe_edit(Edit::deletion(last, line.end()))), + ) +} diff --git a/crates/ruff_linter/src/rules/pylint/rules/mod.rs b/crates/ruff_linter/src/rules/pylint/rules/mod.rs index 85699ab30e1ed0..28c0a5c9807949 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/mod.rs @@ -14,6 +14,7 @@ pub(crate) use comparison_of_constant::*; pub(crate) use comparison_with_itself::*; pub(crate) use continue_in_finally::*; pub(crate) use duplicate_bases::*; +pub(crate) use empty_comment::*; pub(crate) use eq_without_hash::*; pub(crate) use global_at_module_level::*; pub(crate) use global_statement::*; @@ -92,6 +93,7 @@ mod comparison_of_constant; mod comparison_with_itself; mod continue_in_finally; mod duplicate_bases; +mod empty_comment; mod eq_without_hash; mod global_at_module_level; mod global_statement; diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap new file mode 100644 index 00000000000000..1472d0412901e0 --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap @@ -0,0 +1,84 @@ +--- +source: crates/ruff_linter/src/rules/pylint/mod.rs +--- +empty_comment.py:3:1: PLR2044 [*] Line with empty comment + | +1 | # this line has a non-empty comment and is OK +2 | # this line is also OK, but the three following lines are not +3 | # + | ^ PLR2044 +4 | # +5 | # + | + = help: Delete the empty comment + +ℹ Safe fix +1 1 | # this line has a non-empty comment and is OK +2 2 | # this line is also OK, but the three following lines are not +3 |-# + 3 |+ +4 4 | # +5 5 | # +6 6 | + +empty_comment.py:4:5: PLR2044 [*] Line with empty comment + | +2 | # this line is also OK, but the three following lines are not +3 | # +4 | # + | ^ PLR2044 +5 | # + | + = help: Delete the empty comment + +ℹ Safe fix +1 1 | # this line has a non-empty comment and is OK +2 2 | # this line is also OK, but the three following lines are not +3 3 | # +4 |- # + 4 |+ +5 5 | # +6 6 | +7 7 | # this non-empty comment has trailing whitespace and is OK + +empty_comment.py:5:9: PLR2044 [*] Line with empty comment + | +3 | # +4 | # +5 | # + | ^^^^^ PLR2044 +6 | +7 | # this non-empty comment has trailing whitespace and is OK + | + = help: Delete the empty comment + +ℹ Safe fix +2 2 | # this line is also OK, but the three following lines are not +3 3 | # +4 4 | # +5 |- # +6 5 | + 6 |+ +7 7 | # this non-empty comment has trailing whitespace and is OK +8 8 | +9 9 | # Many codebases use multiple `#` characters on a single line to visually + +empty_comment.py:18:11: PLR2044 [*] Line with empty comment + | +17 | def foo(): # this comment is OK, the one below is not +18 | pass # + | ^^^^^ PLR2044 + | + = help: Delete the empty comment + +ℹ Safe fix +15 15 | +16 16 | +17 17 | def foo(): # this comment is OK, the one below is not +18 |- pass # + 18 |+ pass +19 19 | +20 20 | +21 21 | # the lines below have no comments and are OK + + diff --git a/crates/ruff_python_trivia/Cargo.toml b/crates/ruff_python_trivia/Cargo.toml index e17aafc44b3b59..fd5fb1f6dcf575 100644 --- a/crates/ruff_python_trivia/Cargo.toml +++ b/crates/ruff_python_trivia/Cargo.toml @@ -23,6 +23,7 @@ unicode-ident = { workspace = true } insta = { workspace = true } ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_parser = { path = "../ruff_python_parser" } +ruff_python_index = { path = "../ruff_python_index" } [lints] workspace = true diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index a5b7b7ed50ddb3..3e923ad04a205b 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -2,8 +2,11 @@ use std::fmt::{Debug, Formatter}; use std::ops::Deref; use itertools::Itertools; +use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextRange}; +use ruff_text_size::{Ranged, TextRange, TextSize}; + +use crate::is_python_whitespace; /// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping. #[derive(Clone, Default)] @@ -45,6 +48,98 @@ impl CommentRanges { None => &self.raw[start..], } } + + /// Given a `CommentRanges`, determine which comments are grouped together + /// in block comments. Block comments are defined as a sequence of consecutive + /// lines which only contain comments and which all contain the first + /// `#` character in the same column. + /// + /// Returns a vector of vectors, with each representing a block comment, and + /// the values of each being the offset of the leading `#` character of the comment. + /// + /// Example: + /// ```python + /// # This is a block comment + /// # because it spans multiple lines + /// + /// # This is also a block comment + /// # even though it is indented + /// + /// # this is not a block comment + /// + /// x = 1 # this is not a block comment because + /// y = 2 # the lines do not *only* contain comments + /// + /// # This is not a block comment because + /// # not all consecutive lines have the + /// # first `#` character in the same column + /// + /// """ + /// # This is not a block comment because it is + /// # contained within a multi-line string/comment + /// """ + /// ``` + pub fn block_comments(&self, locator: &Locator) -> Vec> { + let mut block_comments: Vec> = Vec::new(); + + let mut current_block: Vec = Vec::new(); + let mut current_block_column: Option = None; + let mut current_block_offset: Option = None; + + for comment_range in &self.raw { + let offset = comment_range.start(); + let line_start = locator.line_start(offset); + + if CommentRanges::any_code_before_comment(locator, line_start) { + if current_block.len() > 1 { + block_comments.push(current_block); + current_block = vec![]; + current_block_column = None; + current_block_offset = None; + } + continue; + } + + let line_end = locator.full_line_end(offset).to_u32(); + let column = (offset - line_start).to_u32(); + + if let Some(c) = current_block_column { + if let Some(o) = current_block_offset { + if column == c && line_start.to_u32() == o { + current_block.push(offset); + current_block_offset = Some(line_end); + } else { + if current_block.len() > 1 { + block_comments.push(current_block); + } + current_block = vec![offset]; + current_block_column = Some(column); + current_block_offset = Some(line_end); + } + } + } else { + current_block = vec![offset]; + current_block_column = Some(column); + current_block_offset = Some(line_end); + } + } + if current_block.len() > 1 { + block_comments.push(current_block); + } + block_comments + } + + fn any_code_before_comment(locator: &Locator, offset_line_start: TextSize) -> bool { + let contents = locator.full_line(offset_line_start); + for char in contents.chars() { + if char == '#' || char == '\r' || char == '\n' { + return false; + } else if !is_python_whitespace(char) { + return true; + } + } + unreachable!("The above loop should always return with a valid Locator"); + } } impl Deref for CommentRanges { @@ -69,3 +164,157 @@ impl<'a> IntoIterator for &'a CommentRanges { self.raw.iter() } } + +#[cfg(test)] +mod tests { + use ruff_python_index::Indexer; + use ruff_python_parser::lexer::LexResult; + use ruff_python_parser::{tokenize, Mode}; + use ruff_source_file::Locator; + use ruff_text_size::TextSize; + + #[test] + fn block_comments_two_line_block_at_start() { + // arrange + let source = "# line 1\n# line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!( + block_comments, + vec![vec![TextSize::new(0), TextSize::new(9)]] + ); + } + + #[test] + fn block_comments_indented_block() { + // arrange + let source = " # line 1\n # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!( + block_comments, + vec![vec![TextSize::new(4), TextSize::new(17)]] + ); + } + + #[test] + fn block_comments_single_line_is_not_a_block() { + // arrange + let source = "\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::>::new()); + } + + #[test] + fn block_comments_lines_with_code_not_a_block() { + // arrange + let source = "x = 1 # line 1\ny = 2 # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::>::new()); + } + + #[test] + fn block_comments_sequential_lines_not_in_block() { + // arrange + let source = " # line 1\n # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::>::new()); + } + + #[test] + fn block_comments_lines_in_triple_quotes_not_a_block() { + // arrange + let source = r#" + """ + # line 1 + # line 2 + """ + "#; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::>::new()); + } + + #[test] + fn block_comments_stress_test() { + // arrange + let source = r#" +# block comment 1 line 1 +# block comment 2 line 2 + +# these lines + # do not form +# a block comment + +x = 1 # these lines also do not +y = 2 # do not form a block comment + +# these lines do form a block comment +# + + # + # and so do these + # + +""" +# these lines are in triple quotes and +# therefore do not form a block comment +""" + "#; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!( + block_comments, + vec![ + vec![TextSize::new(1), TextSize::new(26)], + vec![TextSize::new(174), TextSize::new(212)], + vec![TextSize::new(219), TextSize::new(225), TextSize::new(247)] + ] + ); + } +} diff --git a/ruff.schema.json b/ruff.schema.json index 3761d705180f7a..46f6e8fcd9f1f4 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3200,6 +3200,8 @@ "PLR20", "PLR200", "PLR2004", + "PLR204", + "PLR2044", "PLR5", "PLR55", "PLR550",