diff --git a/Cargo.lock b/Cargo.lock index be9c99c71e9fc..6ec9c3b2ac557 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2454,6 +2454,7 @@ dependencies = [ "insta", "itertools 0.11.0", "ruff_python_ast", + "ruff_python_index", "ruff_python_parser", "ruff_source_file", "ruff_text_size", diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py new file mode 100644 index 0000000000000..34e2cc0fb7f60 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pylint/empty_comment.py @@ -0,0 +1,53 @@ +# this line has a non-empty comment and is OK + # this line is also OK, but the three following lines are not +# + # + # + +# this non-empty comment has trailing whitespace and is OK + +# Many codebases use multiple `#` characters on a single line to visually +# separate sections of code, so we don't consider these empty comments. + +########################################## + +# trailing `#` characters are not considered empty comments ### + + +def foo(): # this comment is OK, the one below is not + pass # + + +# the lines below have no comments and are OK +def bar(): + pass + + +# "Empty comments" are common in block comments +# to add legibility. For example: +# +# The above line's "empty comment" is likely +# intentional and is considered OK. + + +# lines in multi-line strings/comments whose last non-whitespace character is a `#` +# do not count as empty comments +""" +The following lines are all fine: +# + # + # +""" + +# These should be removed, despite being an empty "block comment". + +# +# + +# These should also be removed. + +x = 1 + +# +## +# diff --git a/crates/ruff_linter/src/checkers/physical_lines.rs b/crates/ruff_linter/src/checkers/physical_lines.rs index a2b8b98b85c34..fbb9abff633e3 100644 --- a/crates/ruff_linter/src/checkers/physical_lines.rs +++ b/crates/ruff_linter/src/checkers/physical_lines.rs @@ -1,4 +1,5 @@ //! Lint rules based on checking physical lines. + use ruff_diagnostics::Diagnostic; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index ae7643d92f7e1..c67c21ba4d326 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -40,6 +40,10 @@ pub(crate) fn check_tokens( pygrep_hooks::rules::blanket_type_ignore(&mut diagnostics, indexer, locator); } + if settings.rules.enabled(Rule::EmptyComment) { + pylint::rules::empty_comments(&mut diagnostics, indexer, locator); + } + if settings.rules.any_enabled(&[ Rule::AmbiguousUnicodeCharacterString, Rule::AmbiguousUnicodeCharacterDocstring, diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 8614dcda65ad0..321b485e85e0d 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -265,6 +265,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "R1733") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryDictIndexLookup), (Pylint, "R1736") => (RuleGroup::Preview, rules::pylint::rules::UnnecessaryListIndexLookup), (Pylint, "R2004") => (RuleGroup::Stable, rules::pylint::rules::MagicValueComparison), + (Pylint, "R2044") => (RuleGroup::Preview, rules::pylint::rules::EmptyComment), (Pylint, "R5501") => (RuleGroup::Stable, rules::pylint::rules::CollapsibleElseIf), (Pylint, "R6201") => (RuleGroup::Preview, rules::pylint::rules::LiteralMembership), #[allow(deprecated)] diff --git a/crates/ruff_linter/src/registry.rs b/crates/ruff_linter/src/registry.rs index 2e6fcc199580e..21499e9608492 100644 --- a/crates/ruff_linter/src/registry.rs +++ b/crates/ruff_linter/src/registry.rs @@ -265,6 +265,7 @@ impl Rule { | Rule::BlanketNOQA | Rule::BlanketTypeIgnore | Rule::CommentedOutCode + | Rule::EmptyComment | Rule::ExtraneousParentheses | Rule::InvalidCharacterBackspace | Rule::InvalidCharacterEsc diff --git a/crates/ruff_linter/src/rules/pylint/mod.rs b/crates/ruff_linter/src/rules/pylint/mod.rs index 303395de50406..02161bad780b7 100644 --- a/crates/ruff_linter/src/rules/pylint/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/mod.rs @@ -40,6 +40,7 @@ mod tests { )] #[test_case(Rule::ComparisonWithItself, Path::new("comparison_with_itself.py"))] #[test_case(Rule::EqWithoutHash, Path::new("eq_without_hash.py"))] + #[test_case(Rule::EmptyComment, Path::new("empty_comment.py"))] #[test_case(Rule::ManualFromImport, Path::new("import_aliasing.py"))] #[test_case(Rule::SingleStringSlots, Path::new("single_string_slots.py"))] #[test_case(Rule::SysExitAlias, Path::new("sys_exit_alias_0.py"))] diff --git a/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs new file mode 100644 index 0000000000000..1581b6de1ef31 --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/rules/empty_comment.rs @@ -0,0 +1,106 @@ +use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_index::Indexer; +use ruff_python_trivia::is_python_whitespace; + +use ruff_source_file::Locator; +use ruff_text_size::{TextRange, TextSize}; +use rustc_hash::FxHashSet; + +/// ## What it does +/// Checks for a # symbol appearing on a line not followed by an actual comment. +/// +/// ## Why is this bad? +/// Empty comments don't provide any clarity to the code, and just add clutter. +/// Either add a comment or delete the empty comment. +/// +/// ## Example +/// ```python +/// class Foo: # +/// pass +/// ``` +/// +/// Use instead: +/// ```python +/// class Foo: +/// pass +/// ``` +/// +/// ## References +/// - [Pylint documentation](https://pylint.pycqa.org/en/latest/user_guide/messages/refactor/empty-comment.html) +#[violation] +pub struct EmptyComment; + +impl Violation for EmptyComment { + const FIX_AVAILABILITY: FixAvailability = FixAvailability::Always; + + #[derive_message_formats] + fn message(&self) -> String { + format!("Line with empty comment") + } + + fn fix_title(&self) -> Option { + Some(format!("Delete the empty comment")) + } +} + +/// PLR2044 +pub(crate) fn empty_comments( + diagnostics: &mut Vec, + indexer: &Indexer, + locator: &Locator, +) { + let block_comments = FxHashSet::from_iter(indexer.comment_ranges().block_comments(locator)); + + for range in indexer.comment_ranges() { + // Ignore comments that are part of multi-line "comment blocks". + if block_comments.contains(&range.start()) { + continue; + } + + // If the line contains an empty comment, add a diagnostic. + if let Some(diagnostic) = empty_comment(*range, locator) { + diagnostics.push(diagnostic); + } + } +} + +/// Return a [`Diagnostic`] if the comment at the given [`TextRange`] is empty. +fn empty_comment(range: TextRange, locator: &Locator) -> Option { + // Check: is the comment empty? + if !locator + .slice(range) + .chars() + .skip(1) + .all(is_python_whitespace) + { + return None; + } + + // Find the location of the `#`. + let first_hash_col = range.start(); + + // Find the start of the line. + let line = locator.line_range(first_hash_col); + + // Find the last character in the line that precedes the comment, if any. + let deletion_start_col = locator + .slice(TextRange::new(line.start(), first_hash_col)) + .char_indices() + .rev() + .find(|&(_, c)| !is_python_whitespace(c) && c != '#') + .map(|(last_non_whitespace_non_comment_col, _)| { + // SAFETY: (last_non_whitespace_non_comment_col + 1) <= first_hash_col + TextSize::try_from(last_non_whitespace_non_comment_col).unwrap() + TextSize::new(1) + }); + + Some( + Diagnostic::new(EmptyComment, TextRange::new(first_hash_col, line.end())).with_fix( + Fix::safe_edit(if let Some(deletion_start_col) = deletion_start_col { + Edit::deletion(line.start() + deletion_start_col, line.end()) + } else { + Edit::range_deletion(locator.full_line_range(first_hash_col)) + }), + ), + ) +} diff --git a/crates/ruff_linter/src/rules/pylint/rules/mod.rs b/crates/ruff_linter/src/rules/pylint/rules/mod.rs index 85699ab30e1ed..28c0a5c980794 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/mod.rs @@ -14,6 +14,7 @@ pub(crate) use comparison_of_constant::*; pub(crate) use comparison_with_itself::*; pub(crate) use continue_in_finally::*; pub(crate) use duplicate_bases::*; +pub(crate) use empty_comment::*; pub(crate) use eq_without_hash::*; pub(crate) use global_at_module_level::*; pub(crate) use global_statement::*; @@ -92,6 +93,7 @@ mod comparison_of_constant; mod comparison_with_itself; mod continue_in_finally; mod duplicate_bases; +mod empty_comment; mod eq_without_hash; mod global_at_module_level; mod global_statement; diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap new file mode 100644 index 0000000000000..4339810f22c21 --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLR2044_empty_comment.py.snap @@ -0,0 +1,118 @@ +--- +source: crates/ruff_linter/src/rules/pylint/mod.rs +--- +empty_comment.py:3:1: PLR2044 [*] Line with empty comment + | +1 | # this line has a non-empty comment and is OK +2 | # this line is also OK, but the three following lines are not +3 | # + | ^ PLR2044 +4 | # +5 | # + | + = help: Delete the empty comment + +ℹ Safe fix +1 1 | # this line has a non-empty comment and is OK +2 2 | # this line is also OK, but the three following lines are not +3 |-# +4 3 | # +5 4 | # +6 5 | + +empty_comment.py:4:5: PLR2044 [*] Line with empty comment + | +2 | # this line is also OK, but the three following lines are not +3 | # +4 | # + | ^ PLR2044 +5 | # + | + = help: Delete the empty comment + +ℹ Safe fix +1 1 | # this line has a non-empty comment and is OK +2 2 | # this line is also OK, but the three following lines are not +3 3 | # +4 |- # +5 4 | # +6 5 | +7 6 | # this non-empty comment has trailing whitespace and is OK + +empty_comment.py:5:9: PLR2044 [*] Line with empty comment + | +3 | # +4 | # +5 | # + | ^ PLR2044 +6 | +7 | # this non-empty comment has trailing whitespace and is OK + | + = help: Delete the empty comment + +ℹ Safe fix +2 2 | # this line is also OK, but the three following lines are not +3 3 | # +4 4 | # +5 |- # +6 5 | +7 6 | # this non-empty comment has trailing whitespace and is OK +8 7 | + +empty_comment.py:18:11: PLR2044 [*] Line with empty comment + | +17 | def foo(): # this comment is OK, the one below is not +18 | pass # + | ^ PLR2044 + | + = help: Delete the empty comment + +ℹ Safe fix +15 15 | +16 16 | +17 17 | def foo(): # this comment is OK, the one below is not +18 |- pass # + 18 |+ pass +19 19 | +20 20 | +21 21 | # the lines below have no comments and are OK + +empty_comment.py:44:1: PLR2044 [*] Line with empty comment + | +42 | # These should be removed, despite being an empty "block comment". +43 | +44 | # + | ^ PLR2044 +45 | # + | + = help: Delete the empty comment + +ℹ Safe fix +42 42 | # These should be removed, despite being an empty "block comment". +43 43 | +44 44 | # +45 |-# +46 45 | +47 46 | # These should also be removed. +48 47 | + +empty_comment.py:45:1: PLR2044 [*] Line with empty comment + | +44 | # +45 | # + | ^ PLR2044 +46 | +47 | # These should also be removed. + | + = help: Delete the empty comment + +ℹ Safe fix +42 42 | # These should be removed, despite being an empty "block comment". +43 43 | +44 44 | # +45 |-# +46 45 | +47 46 | # These should also be removed. +48 47 | + + diff --git a/crates/ruff_python_trivia/Cargo.toml b/crates/ruff_python_trivia/Cargo.toml index e17aafc44b3b5..fd5fb1f6dcf57 100644 --- a/crates/ruff_python_trivia/Cargo.toml +++ b/crates/ruff_python_trivia/Cargo.toml @@ -23,6 +23,7 @@ unicode-ident = { workspace = true } insta = { workspace = true } ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_parser = { path = "../ruff_python_parser" } +ruff_python_index = { path = "../ruff_python_index" } [lints] workspace = true diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index a5b7b7ed50ddb..02a42d9e2a41b 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -2,8 +2,11 @@ use std::fmt::{Debug, Formatter}; use std::ops::Deref; use itertools::Itertools; +use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextRange}; +use ruff_text_size::{Ranged, TextRange, TextSize}; + +use crate::is_python_whitespace; /// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping. #[derive(Clone, Default)] @@ -45,6 +48,137 @@ impl CommentRanges { None => &self.raw[start..], } } + + /// Given a [`CommentRanges`], determine which comments are grouped together + /// in "comment blocks". A "comment block" is a sequence of consecutive + /// own-line comments in which the comment hash (`#`) appears in the same + /// column in each line, and at least one comment is non-empty. + /// + /// Returns a vector containing the offset of the leading hash (`#`) for + /// each comment in any block comment. + /// + /// ## Examples + /// ```python + /// # This is a block comment + /// # because it spans multiple lines + /// + /// # This is also a block comment + /// # even though it is indented + /// + /// # this is not a block comment + /// + /// x = 1 # this is not a block comment because + /// y = 2 # the lines do not *only* contain comments + /// + /// # This is not a block comment because + /// # not all consecutive lines have the + /// # first `#` character in the same column + /// + /// """ + /// # This is not a block comment because it is + /// # contained within a multi-line string/comment + /// """ + /// ``` + pub fn block_comments(&self, locator: &Locator) -> Vec { + let mut block_comments: Vec = Vec::new(); + + let mut current_block: Vec = Vec::new(); + let mut current_block_column: Option = None; + let mut current_block_non_empty = false; + + let mut prev_line_end = None; + + for comment_range in &self.raw { + let offset = comment_range.start(); + let line_start = locator.line_start(offset); + let line_end = locator.full_line_end(offset); + let column = offset - line_start; + + // If this is an end-of-line comment, reset the current block. + if !Self::is_own_line(offset, locator) { + // Push the current block, and reset. + if current_block.len() > 1 && current_block_non_empty { + block_comments.extend(current_block); + } + current_block = vec![]; + current_block_column = None; + current_block_non_empty = false; + prev_line_end = Some(line_end); + continue; + } + + // If there's a blank line between this comment and the previous + // comment, reset the current block. + if prev_line_end.is_some_and(|prev_line_end| { + locator.contains_line_break(TextRange::new(prev_line_end, line_start)) + }) { + // Push the current block. + if current_block.len() > 1 && current_block_non_empty { + block_comments.extend(current_block); + } + + // Reset the block state. + current_block = vec![offset]; + current_block_column = Some(column); + current_block_non_empty = !Self::is_empty(*comment_range, locator); + prev_line_end = Some(line_end); + continue; + } + + if let Some(current_column) = current_block_column { + if column == current_column { + // Add the comment to the current block. + current_block.push(offset); + current_block_non_empty |= !Self::is_empty(*comment_range, locator); + prev_line_end = Some(line_end); + } else { + // Push the current block. + if current_block.len() > 1 && current_block_non_empty { + block_comments.extend(current_block); + } + + // Reset the block state. + current_block = vec![offset]; + current_block_column = Some(column); + current_block_non_empty = !Self::is_empty(*comment_range, locator); + prev_line_end = Some(line_end); + } + } else { + // Push the current block. + if current_block.len() > 1 && current_block_non_empty { + block_comments.extend(current_block); + } + + // Reset the block state. + current_block = vec![offset]; + current_block_column = Some(column); + current_block_non_empty = !Self::is_empty(*comment_range, locator); + prev_line_end = Some(line_end); + } + } + + // Push any lingering blocks. + if current_block.len() > 1 && current_block_non_empty { + block_comments.extend(current_block); + } + + block_comments + } + + /// Returns `true` if the given range is an empty comment. + fn is_empty(range: TextRange, locator: &Locator) -> bool { + locator + .slice(range) + .chars() + .skip(1) + .all(is_python_whitespace) + } + + /// Returns `true` if a comment is an own-line comment (as opposed to an end-of-line comment). + fn is_own_line(offset: TextSize, locator: &Locator) -> bool { + let range = TextRange::new(locator.line_start(offset), offset); + locator.slice(range).chars().all(is_python_whitespace) + } } impl Deref for CommentRanges { @@ -69,3 +203,158 @@ impl<'a> IntoIterator for &'a CommentRanges { self.raw.iter() } } + +#[cfg(test)] +mod tests { + use ruff_python_index::Indexer; + use ruff_python_parser::lexer::LexResult; + use ruff_python_parser::{tokenize, Mode}; + use ruff_source_file::Locator; + use ruff_text_size::TextSize; + + #[test] + fn block_comments_two_line_block_at_start() { + // arrange + let source = "# line 1\n# line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, vec![TextSize::new(0), TextSize::new(9)]); + } + + #[test] + fn block_comments_indented_block() { + // arrange + let source = " # line 1\n # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, vec![TextSize::new(4), TextSize::new(17)]); + } + + #[test] + fn block_comments_single_line_is_not_a_block() { + // arrange + let source = "\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::::new()); + } + + #[test] + fn block_comments_lines_with_code_not_a_block() { + // arrange + let source = "x = 1 # line 1\ny = 2 # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::::new()); + } + + #[test] + fn block_comments_sequential_lines_not_in_block() { + // arrange + let source = " # line 1\n # line 2\n"; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::::new()); + } + + #[test] + fn block_comments_lines_in_triple_quotes_not_a_block() { + // arrange + let source = r#" + """ + # line 1 + # line 2 + """ + "#; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!(block_comments, Vec::::new()); + } + + #[test] + fn block_comments_stress_test() { + // arrange + let source = r#" +# block comment 1 line 1 +# block comment 2 line 2 + +# these lines + # do not form +# a block comment + +x = 1 # these lines also do not +y = 2 # do not form a block comment + +# these lines do form a block comment +# + + # + # and so do these + # + +""" +# these lines are in triple quotes and +# therefore do not form a block comment +""" + "#; + let tokens: Vec = tokenize(source, Mode::Module); + let locator = Locator::new(source); + let indexer = Indexer::from_tokens(&tokens, &locator); + + // act + let block_comments = indexer.comment_ranges().block_comments(&locator); + + // assert + assert_eq!( + block_comments, + vec![ + // Block #1 + TextSize::new(1), + TextSize::new(26), + // Block #2 + TextSize::new(174), + TextSize::new(212), + // Block #3 + TextSize::new(219), + TextSize::new(225), + TextSize::new(247) + ] + ); + } +} diff --git a/ruff.schema.json b/ruff.schema.json index fb7e2af47f587..05acca4e74464 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3201,6 +3201,8 @@ "PLR20", "PLR200", "PLR2004", + "PLR204", + "PLR2044", "PLR5", "PLR55", "PLR550",