From 67c6c5c7e10e502c49677e18ddcd90aced0ccb90 Mon Sep 17 00:00:00 2001
From: InSyncWithFoo <insyncwithfoo@gmail.com>
Date: Tue, 10 Dec 2024 19:35:18 +0000
Subject: [PATCH] Per review

---
 .../pyupgrade/UP009_code_utf8_utf8.py         |  3 +
 crates/ruff_linter/src/checkers/tokens.rs     |  7 +-
 crates/ruff_linter/src/rules/pyupgrade/mod.rs |  1 +
 .../rules/unnecessary_coding_comment.rs       | 99 +++++++++----------
 ...grade__tests__UP009_code_utf8_utf8.py.snap |  5 +
 crates/ruff_source_file/src/line_ranges.rs    | 34 +++----
 6 files changed, 69 insertions(+), 80 deletions(-)
 create mode 100644 crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py
 create mode 100644 crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_code_utf8_utf8.py.snap
diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py
new file mode 100644
index 00000000000000..c77de7bb119e9e
--- /dev/null
+++ b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py
@@ -0,0 +1,3 @@
+print('No coding coments here')
+# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs
index 7be23f8bfd0352..1aa85c485663ba 100644
--- a/crates/ruff_linter/src/checkers/tokens.rs
+++ b/crates/ruff_linter/src/checkers/tokens.rs
@@ -73,12 +73,7 @@ pub(crate) fn check_tokens(
     }
 
     if settings.rules.enabled(Rule::UTF8EncodingDeclaration) {
-        pyupgrade::rules::unnecessary_coding_comment(
-            &mut diagnostics,
-            locator,
-            indexer,
-            comment_ranges,
-        );
+        pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, comment_ranges);
     }
 
     if settings.rules.enabled(Rule::TabIndentation) {
diff --git a/crates/ruff_linter/src/rules/pyupgrade/mod.rs b/crates/ruff_linter/src/rules/pyupgrade/mod.rs
index ae825c4ee26c4d..458cab3728a661 100644
--- a/crates/ruff_linter/src/rules/pyupgrade/mod.rs
+++ b/crates/ruff_linter/src/rules/pyupgrade/mod.rs
@@ -83,6 +83,7 @@ mod tests {
     #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8.py"))]
     #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8_other.py"))]
     #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_code_other.py"))]
+    #[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_code_utf8_utf8.py"))]
     #[test_case(
         Rule::UTF8EncodingDeclaration,
         Path::new("UP009_hashbang_utf8_other.py")
diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs
index 0772506a1e1413..fca4513a955bb3 100644
--- a/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs
+++ b/crates/ruff_linter/src/rules/pyupgrade/rules/unnecessary_coding_comment.rs
@@ -1,12 +1,11 @@
+use ruff_source_file::LineRanges;
 use std::sync::LazyLock;
 
 use regex::Regex;
 
 use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
 use ruff_macros::{derive_message_formats, ViolationMetadata};
-use ruff_python_index::Indexer;
 use ruff_python_trivia::CommentRanges;
-use ruff_source_file::LineRanges;
 use ruff_text_size::TextRange;
 
 use crate::Locator;
@@ -62,79 +61,71 @@ struct CodingCommentRanges {
 pub(crate) fn unnecessary_coding_comment(
     diagnostics: &mut Vec<Diagnostic>,
     locator: &Locator,
-    indexer: &Indexer,
     comment_ranges: &CommentRanges,
 ) {
-    // The coding comment must be on one of the first two lines.
-    // Since each comment spans at least one line,
-    // we only need to check the first two comments,
-    // plus a third to make sure it would not become a new coding comment.
-    let mut coding_comments = comment_ranges
-        .iter()
-        .take(3)
-        .map(|comment_range| coding_comment(locator, indexer, *comment_range));
+    let mut coding_comments = coding_comments_on_first_three_lines(locator, comment_ranges);
 
     let first = coding_comments.next().flatten();
     let second = coding_comments.next().flatten();
     let third = coding_comments.next().flatten();
 
     // Table: https://github.com/astral-sh/ruff/pull/14728#issuecomment-2518114454
-    match [first, second, third] {
-        [Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..)), _]
-        | [None, Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..))] => {
+    match (first, second, third) {
+        (Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..)), _)
+        | (None, Some(CodingComment::UTF8(ranges)), None | Some(CodingComment::UTF8(..))) => {
             report(diagnostics, ranges.line_range, ranges.self_range);
         }
         _ => {}
     }
 }
 
+fn coding_comments_on_first_three_lines<'a>(
+    locator: &'a Locator<'a>,
+    comment_ranges: &'a CommentRanges,
+) -> impl Iterator<Item = Option<CodingComment>> + use<'a> {
+    let first_line_trimmed = locator.full_line_str(0.into()).trim();
+
+    let seen_code = !first_line_trimmed.is_empty() && !first_line_trimmed.starts_with('#');
+
+    // The coding comment must be on one of the first two lines.
+    // Since each comment spans at least one line,
+    // we only need to check the first two comments,
+    // plus a third to make sure it would not become a new coding comment.
+    comment_ranges.iter().take(3).map(move |comment_range| {
+        if seen_code {
+            return None;
+        }
+
+        let line_range = locator.full_line_range(comment_range.start());
+        let line_index = locator.count_lines(TextRange::up_to(line_range.start()));
+
+        if line_index > 2 {
+            return None;
+        }
+
+        // If leading content is not whitespace then it's not a valid coding comment e.g.
+        // ```
+        // print(x) # coding=utf8
+        // ```
+        let before_hash_sign =
+            locator.slice(TextRange::new(line_range.start(), comment_range.start()));
+
+        if !before_hash_sign.trim().is_empty() {
+            return None;
+        }
+
+        coding_comment(locator, *comment_range, line_range)
+    })
+}
+
 fn coding_comment(
     locator: &Locator,
-    indexer: &Indexer,
     self_range: TextRange,
+    line_range: TextRange,
 ) -> Option<CodingComment> {
-    // If leading content is not whitespace then it's not a valid coding comment e.g.
-    // ```
-    // print(x) # coding=utf8
-    // ```
-    let line_range = locator.full_line_range(self_range.start());
-    if !locator
-        .slice(TextRange::new(line_range.start(), self_range.start()))
-        .trim()
-        .is_empty()
-    {
-        return None;
-    }
-
-    // If the line is after a continuation then it's not a valid coding comment e.g.
-    // ```
-    // x = 1 \
-    //    # coding=utf8
-    // x = 2
-    // ```
-    if indexer
-        .preceded_by_continuations(line_range.start(), locator.contents())
-        .is_some()
-    {
-        return None;
-    }
-
     let part_of_interest = CODING_COMMENT_REGEX.captures(locator.slice(line_range))?;
     let coding_name = part_of_interest.name("name")?.as_str();
 
-    let line_index = locator.count_lines_until(line_range.start());
-
-    // Aside from the first two lines,
-    // we also need to check the third for overridden coding comments:
-    // ```
-    // #!/usr/bin/python
-    // # -*- coding: utf-8 -*-
-    // # -*- coding: ascii -*-
-    // ```
-    if line_index > 2 {
-        return None;
-    }
-
     let ranges = CodingCommentRanges {
         self_range,
         line_range,
diff --git a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_code_utf8_utf8.py.snap b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_code_utf8_utf8.py.snap
new file mode 100644
index 00000000000000..c996a51d3844d8
--- /dev/null
+++ b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP009_code_utf8_utf8.py.snap
@@ -0,0 +1,5 @@
+---
+source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
+snapshot_kind: text
+---
+
diff --git a/crates/ruff_source_file/src/line_ranges.rs b/crates/ruff_source_file/src/line_ranges.rs
index d7a43fe588c609..06d097bb911e85 100644
--- a/crates/ruff_source_file/src/line_ranges.rs
+++ b/crates/ruff_source_file/src/line_ranges.rs
@@ -296,44 +296,38 @@ pub trait LineRanges {
     /// If the start or end of `range` is out of bounds.
     fn full_lines_str(&self, range: TextRange) -> &str;
 
-    /// Returns the zero-based index of the line containing `range`'s start.
+    /// The number of lines `range` spans.
     ///
     /// ## Examples
     ///
     /// ```
-    /// # use ruff_text_size::{Ranged, TextRange, TextSize};
+    /// # use ruff_text_size::{Ranged, TextRange};
     /// # use ruff_source_file::LineRanges;
     ///
     /// let text = "First line\nsecond line\r\nthird line";
     ///
-    /// assert_eq!(text.count_lines_until(TextSize::from(5)), 0);
-    /// assert_eq!(text.count_lines_until(TextSize::from(23)), 1);
-    /// assert_eq!(text.count_lines_until(TextSize::from(24)), 2);
-    /// assert_eq!(text.count_lines_until(TextSize::from(34)), 3);
-    ///
-    /// let text = "foo\n";
+    /// assert_eq!(text.count_lines(TextRange::new(5.into(), 12.into())), 1);
     ///
-    /// assert_eq!(text.count_lines_until(TextSize::from(4)), 1);
+    /// assert_eq!(text.count_lines(TextRange::up_to(5.into())), 0);
+    /// assert_eq!(text.count_lines(TextRange::up_to(23.into())), 1);
+    /// assert_eq!(text.count_lines(TextRange::up_to(24.into())), 2);
     /// ```
     ///
     /// ## Panics
-    /// If `offset` is out of bounds.
-    fn count_lines_until(&self, offset: TextSize) -> u32 {
+    /// If the start or end of `range` is out of bounds.
+    fn count_lines(&self, range: TextRange) -> u32 {
         let mut count = 0;
-        let mut last_line_end = TextSize::default();
+        let mut last_line_start = self.line_start(range.start());
 
         loop {
-            let line_end = self.full_line_end(last_line_end);
+            last_line_start = self.full_line_end(last_line_start);
 
-            if line_end <= offset && line_end != last_line_end {
-                count += 1;
-                last_line_end = line_end;
-            } else {
-                break;
+            if last_line_start > range.end() {
+                break count;
             }
-        }
 
-        count
+            count += 1;
+        }
     }
 }