From 67c6c5c7e10e502c49677e18ddcd90aced0ccb90 Mon Sep 17 00:00:00 2001 From: InSyncWithFoo Date: Tue, 10 Dec 2024 19:35:18 +0000 Subject: [PATCH] Per review --- .../pyupgrade/UP009_code_utf8_utf8.py | 3 + crates/ruff_linter/src/checkers/tokens.rs | 7 +- crates/ruff_linter/src/rules/pyupgrade/mod.rs | 1 + .../rules/unnecessary_coding_comment.rs | 99 +++++++++---------- ...grade__tests__UP009_code_utf8_utf8.py.snap | 5 + crates/ruff_source_file/src/line_ranges.rs | 34 +++---- 6 files changed, 69 insertions(+), 80 deletions(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py create mode 100644 crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_code_utf8_utf8.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py new file mode 100644 index 00000000000000..c77de7bb119e9e --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py @@ -0,0 +1,3 @@ +print('No coding coments here') +# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index 7be23f8bfd0352..1aa85c485663ba 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -73,12 +73,7 @@ pub(crate) fn check_tokens( } if settings.rules.enabled(Rule::UTF8EncodingDeclaration) { - pyupgrade::rules::unnecessary_coding_comment( - &mut diagnostics, - locator, - indexer, - comment_ranges, - ); + pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, comment_ranges); } if settings.rules.enabled(Rule::TabIndentation) { diff --git a/crates/ruff_linter/src/rules/pyupgrade/mod.rs b/crates/ruff_linter/src/rules/pyupgrade/mod.rs index ae825c4ee26c4d..458cab3728a661 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/mod.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/mod.rs @@ -83,6 +83,7 @@ mod tests { #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8.py"))] #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8_other.py"))] #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_code_other.py"))] + #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_code_utf8_utf8.py"))] #[test_case( Rule::UTF8EncodingDeclaration, Path::new("UP009_hashbang_utf8_other.py") diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs index 0772506a1e1413..fca4513a955bb3 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs @@ -1,12 +1,11 @@ +use ruff_source_file::LineRanges; use std::sync::LazyLock; use regex::Regex; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, ViolationMetadata}; -use ruff_python_index::Indexer; use ruff_python_trivia::CommentRanges; -use ruff_source_file::LineRanges; use ruff_text_size::TextRange; use crate::Locator; @@ -62,79 +61,71 @@ struct CodingCommentRanges { pub(crate) fn unnecessary_coding_comment( diagnostics: &mut Vec, locator: &Locator, - indexer: &Indexer, comment_ranges: &CommentRanges, ) { - // The coding comment must be on one of the first two lines. - // Since each comment spans at least one line, - // we only need to check the first two comments, - // plus a third to make sure it would not become a new coding comment. - let mut coding_comments = comment_ranges - .iter() - .take(3) - .map(|comment_range| coding_comment(locator, indexer, *comment_range)); + let mut coding_comments = coding_comments_on_first_three_lines(locator, comment_ranges); let first = coding_comments.next().flatten(); let second = coding_comments.next().flatten(); let third = coding_comments.next().flatten(); // Table: https://github.com/astral-sh/ruff/pull/14728#issuecomment-2518114454 - match [first, second, third] { - [Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..)), _] - | [None, Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..))] => { + match (first, second, third) { + (Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..)), _) + | (None, Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..))) => { report(diagnostics, ranges.line_range, ranges.self_range); } _ => {} } } +fn coding_comments_on_first_three_lines<'a>( + locator: &'a Locator<'a>, + comment_ranges: &'a CommentRanges, +) -> impl Iterator> + use<'a> { + let first_line_trimmed = locator.full_line_str(0.into()).trim(); + + let seen_code = !first_line_trimmed.is_empty() && !first_line_trimmed.starts_with('#'); + + // The coding comment must be on one of the first two lines. + // Since each comment spans at least one line, + // we only need to check the first two comments, + // plus a third to make sure it would not become a new coding comment. + comment_ranges.iter().take(3).map(move |comment_range| { + if seen_code { + return None; + } + + let line_range = locator.full_line_range(comment_range.start()); + let line_index = locator.count_lines(TextRange::up_to(line_range.start())); + + if line_index > 2 { + return None; + } + + // If leading content is not whitespace then it's not a valid coding comment e.g. + // ``` + // print(x) # coding=utf8 + // ``` + let before_hash_sign = + locator.slice(TextRange::new(line_range.start(), comment_range.start())); + + if !before_hash_sign.trim().is_empty() { + return None; + } + + coding_comment(locator, *comment_range, line_range) + }) +} + fn coding_comment( locator: &Locator, - indexer: &Indexer, self_range: TextRange, + line_range: TextRange, ) -> Option { - // If leading content is not whitespace then it's not a valid coding comment e.g. - // ``` - // print(x) # coding=utf8 - // ``` - let line_range = locator.full_line_range(self_range.start()); - if !locator - .slice(TextRange::new(line_range.start(), self_range.start())) - .trim() - .is_empty() - { - return None; - } - - // If the line is after a continuation then it's not a valid coding comment e.g. - // ``` - // x = 1 \ - // # coding=utf8 - // x = 2 - // ``` - if indexer - .preceded_by_continuations(line_range.start(), locator.contents()) - .is_some() - { - return None; - } - let part_of_interest = CODING_COMMENT_REGEX.captures(locator.slice(line_range))?; let coding_name = part_of_interest.name("name")?.as_str(); - let line_index = locator.count_lines_until(line_range.start()); - - // Aside from the first two lines, - // we also need to check the third for overridden coding comments: - // ``` - // #!/usr/bin/python - // # -*- coding: utf-8 -*- - // # -*- coding: ascii -*- - // ``` - if line_index > 2 { - return None; - } - let ranges = CodingCommentRanges { self_range, line_range, diff --git a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_code_utf8_utf8.py.snap b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_code_utf8_utf8.py.snap new file mode 100644 index 00000000000000..c996a51d3844d8 --- /dev/null +++ b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_code_utf8_utf8.py.snap @@ -0,0 +1,5 @@ +--- +source: crates/ruff_linter/src/rules/pyupgrade/mod.rs +snapshot_kind: text +--- + diff --git a/crates/ruff_source_file/src/line_ranges.rs b/crates/ruff_source_file/src/line_ranges.rs index d7a43fe588c609..06d097bb911e85 100644 --- a/crates/ruff_source_file/src/line_ranges.rs +++ b/crates/ruff_source_file/src/line_ranges.rs @@ -296,44 +296,38 @@ pub trait LineRanges { /// If the start or end of `range` is out of bounds. fn full_lines_str(&self, range: TextRange) -> &str; - /// Returns the zero-based index of the line containing `range`'s start. + /// The number of lines `range` spans. /// /// ## Examples /// /// ``` - /// # use ruff_text_size::{Ranged, TextRange, TextSize}; + /// # use ruff_text_size::{Ranged, TextRange}; /// # use ruff_source_file::LineRanges; /// /// let text = "First line\nsecond line\r\nthird line"; /// - /// assert_eq!(text.count_lines_until(TextSize::from(5)), 0); - /// assert_eq!(text.count_lines_until(TextSize::from(23)), 1); - /// assert_eq!(text.count_lines_until(TextSize::from(24)), 2); - /// assert_eq!(text.count_lines_until(TextSize::from(34)), 3); - /// - /// let text = "foo\n"; + /// assert_eq!(text.count_lines(TextRange::new(5.into(), 12.into())), 1); /// - /// assert_eq!(text.count_lines_until(TextSize::from(4)), 1); + /// assert_eq!(text.count_lines(TextRange::up_to(5.into())), 0); + /// assert_eq!(text.count_lines(TextRange::up_to(23.into())), 1); + /// assert_eq!(text.count_lines(TextRange::up_to(24.into())), 2); /// ``` /// /// ## Panics - /// If `offset` is out of bounds. - fn count_lines_until(&self, offset: TextSize) -> u32 { + /// If the start or end of `range` is out of bounds. + fn count_lines(&self, range: TextRange) -> u32 { let mut count = 0; - let mut last_line_end = TextSize::default(); + let mut last_line_start = self.line_start(range.start()); loop { - let line_end = self.full_line_end(last_line_end); + last_line_start = self.full_line_end(last_line_start); - if line_end <= offset && line_end != last_line_end { - count += 1; - last_line_end = line_end; - } else { - break; + if last_line_start > range.end() { + break count; } - } - count + count += 1; + } } }