Replace most usages of lex_starts_at with Tokens (#11511)

## Summary Part of #11401 This PR refactors most usages of `lex_starts_at` to use the `Tokens` struct available on the `Program`. This PR also introduces the following two APIs: 1. `count` (on `StringLiteralValue`) to return the number of string literal parts in the string expression 2. `after` (on `Tokens`) to return the token slice after the given `TextSize` offset ## Test Plan I don't really have a way to test this currently and so I'll have to wait until all changes are made so that the code compiles.
astral-sh · May 31, 2024 · 0e88103 · 0e88103
1 parent bbfe93a
commit 0e88103
Show file tree

Hide file tree

Showing 24 changed files with 355 additions and 405 deletions.
diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs
@@ -1160,7 +1160,7 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
                     }
                 }
                 if checker.enabled(Rule::PrintfStringFormatting) {
-                    pyupgrade::rules::printf_string_formatting(checker, expr, right);
+                    pyupgrade::rules::printf_string_formatting(checker, format_string, right);
                 }
                 if checker.enabled(Rule::BadStringFormatCharacter) {
                     pylint::rules::bad_string_format_character::percent(

diff --git a/crates/ruff_linter/src/checkers/ast/analyze/statement.rs b/crates/ruff_linter/src/checkers/ast/analyze/statement.rs
@@ -762,7 +762,7 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
                 pyupgrade::rules::deprecated_c_element_tree(checker, stmt);
             }
             if checker.enabled(Rule::DeprecatedImport) {
-                pyupgrade::rules::deprecated_import(checker, stmt, names, module, level);
+                pyupgrade::rules::deprecated_import(checker, import_from);
             }
             if checker.enabled(Rule::UnnecessaryBuiltinImport) {
                 if let Some(module) = module {

diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs
@@ -253,7 +253,7 @@ impl<'a> Checker<'a> {
             locator,
             stylist,
             indexer,
-            importer: Importer::new(program.suite(), locator, stylist),
+            importer: Importer::new(program, locator, stylist),
             semantic: SemanticModel::new(&settings.typing_modules, path, module),
             visit: deferred::Visit::default(),
             analyze: deferred::Analyze::default(),

diff --git a/crates/ruff_linter/src/checkers/imports.rs b/crates/ruff_linter/src/checkers/imports.rs
@@ -4,9 +4,10 @@ use std::path::Path;
 use ruff_diagnostics::Diagnostic;
 use ruff_notebook::CellOffsets;
 use ruff_python_ast::statement_visitor::StatementVisitor;
-use ruff_python_ast::{PySourceType, Suite};
+use ruff_python_ast::{ModModule, PySourceType, Suite};
 use ruff_python_codegen::Stylist;
 use ruff_python_index::Indexer;
+use ruff_python_parser::{Program, Tokens};
 use ruff_source_file::Locator;
 
 use crate::directives::IsortDirectives;
@@ -17,7 +18,7 @@ use crate::settings::LinterSettings;
 
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn check_imports(
-    python_ast: &Suite,
+    program: &Program<ModModule>,
     locator: &Locator,
     indexer: &Indexer,
     directives: &IsortDirectives,
@@ -31,7 +32,7 @@ pub(crate) fn check_imports(
     let tracker = {
         let mut tracker =
             BlockBuilder::new(locator, directives, source_type.is_stub(), cell_offsets);
-        tracker.visit_body(python_ast);
+        tracker.visit_body(program.suite());
         tracker
     };
 
@@ -50,6 +51,7 @@ pub(crate) fn check_imports(
                     settings,
                     package,
                     source_type,
+                    program.tokens(),
                 ) {
                     diagnostics.push(diagnostic);
                 }
@@ -58,7 +60,7 @@ pub(crate) fn check_imports(
     }
     if settings.rules.enabled(Rule::MissingRequiredImport) {
         diagnostics.extend(isort::rules::add_required_imports(
-            python_ast,
+            program,
             locator,
             stylist,
             settings,

diff --git a/crates/ruff_linter/src/importer/insertion.rs b/crates/ruff_linter/src/importer/insertion.rs
@@ -2,7 +2,7 @@
 use std::ops::Add;
 
 use ruff_python_ast::{PySourceType, Stmt};
-use ruff_python_parser::{lexer, AsMode, Tok};
+use ruff_python_parser::{lexer, AsMode, Tok, TokenKind, Tokens};
 use ruff_text_size::{Ranged, TextSize};
 
 use ruff_diagnostics::Edit;
@@ -145,7 +145,7 @@ impl<'a> Insertion<'a> {
         mut location: TextSize,
         locator: &Locator<'a>,
         stylist: &Stylist,
-        source_type: PySourceType,
+        tokens: &Tokens,
     ) -> Insertion<'a> {
         enum Awaiting {
             Colon(u32),
@@ -154,40 +154,38 @@ impl<'a> Insertion<'a> {
         }
 
         let mut state = Awaiting::Colon(0);
-        for (tok, range) in
-            lexer::lex_starts_at(locator.after(location), source_type.as_mode(), location).flatten()
-        {
+        for token in tokens.after(location) {
             match state {
                 // Iterate until we find the colon indicating the start of the block body.
-                Awaiting::Colon(depth) => match tok {
-                    Tok::Colon if depth == 0 => {
+                Awaiting::Colon(depth) => match token.kind() {
+                    TokenKind::Colon if depth == 0 => {
                         state = Awaiting::Newline;
                     }
-                    Tok::Lpar | Tok::Lbrace | Tok::Lsqb => {
+                    TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => {
                         state = Awaiting::Colon(depth.saturating_add(1));
                     }
-                    Tok::Rpar | Tok::Rbrace | Tok::Rsqb => {
+                    TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => {
                         state = Awaiting::Colon(depth.saturating_sub(1));
                     }
                     _ => {}
                 },
                 // Once we've seen the colon, we're looking for a newline; otherwise, there's no
                 // block body (e.g. `if True: pass`).
-                Awaiting::Newline => match tok {
-                    Tok::Comment(..) => {}
-                    Tok::Newline => {
+                Awaiting::Newline => match token.kind() {
+                    TokenKind::Comment(..) => {}
+                    TokenKind::Newline => {
                         state = Awaiting::Indent;
                     }
                     _ => {
-                        location = range.start();
+                        location = token.start();
                         break;
                     }
                 },
                 // Once we've seen the newline, we're looking for the indentation of the block body.
-                Awaiting::Indent => match tok {
-                    Tok::Comment(..) => {}
-                    Tok::NonLogicalNewline => {}
-                    Tok::Indent => {
+                Awaiting::Indent => match token.kind() {
+                    TokenKind::Comment(..) => {}
+                    TokenKind::NonLogicalNewline => {}
+                    TokenKind::Indent => {
                         // This is like:
                         // ```python
                         // if True:
@@ -196,13 +194,13 @@ impl<'a> Insertion<'a> {
                         // Where `range` is the indentation before the `pass` token.
                         return Insertion::indented(
                             "",
-                            range.start(),
+                            token.start(),
                             stylist.line_ending().as_str(),
-                            locator.slice(range),
+                            locator.slice(token),
                         );
                     }
                     _ => {
-                        location = range.start();
+                        location = token.start();
                         break;
                     }
                 },
@@ -442,10 +440,10 @@ x = 1
     #[test]
     fn start_of_block() {
         fn insert(contents: &str, offset: TextSize) -> Insertion {
-            let tokens = ruff_python_parser::tokenize(contents, Mode::Module);
+            let program = ruff_python_parser::parse_module(contents).unwrap();
             let locator = Locator::new(contents);
-            let stylist = Stylist::from_tokens(&tokens, &locator);
-            Insertion::start_of_block(offset, &locator, &stylist, PySourceType::default())
+            let stylist = Stylist::from_tokens(&program, &locator);
+            Insertion::start_of_block(offset, &locator, &stylist, program.tokens())
         }
 
         let contents = "if True: pass";

diff --git a/crates/ruff_linter/src/importer/mod.rs b/crates/ruff_linter/src/importer/mod.rs
@@ -7,7 +7,8 @@ use std::error::Error;
 
 use anyhow::Result;
 use libcst_native::{ImportAlias, Name, NameOrAttribute};
-use ruff_python_ast::{self as ast, PySourceType, Stmt};
+use ruff_python_ast::{self as ast, ModModule, Stmt};
+use ruff_python_parser::{Program, Tokens};
 use ruff_text_size::{Ranged, TextSize};
 
 use ruff_diagnostics::Edit;
@@ -27,6 +28,8 @@ mod insertion;
 pub(crate) struct Importer<'a> {
     /// The Python AST to which we are adding imports.
     python_ast: &'a [Stmt],
+    /// The tokens representing the Python AST.
+    tokens: &'a Tokens,
     /// The [`Locator`] for the Python AST.
     locator: &'a Locator<'a>,
     /// The [`Stylist`] for the Python AST.
@@ -39,12 +42,13 @@ pub(crate) struct Importer<'a> {
 
 impl<'a> Importer<'a> {
     pub(crate) fn new(
-        python_ast: &'a [Stmt],
+        program: &'a Program<ModModule>,
         locator: &'a Locator<'a>,
         stylist: &'a Stylist<'a>,
     ) -> Self {
         Self {
-            python_ast,
+            python_ast: program.suite(),
+            tokens: program.tokens(),
             locator,
             stylist,
             runtime_imports: Vec::default(),
@@ -121,7 +125,6 @@ impl<'a> Importer<'a> {
         import: &ImportedMembers,
         at: TextSize,
         semantic: &SemanticModel,
-        source_type: PySourceType,
     ) -> Result<TypingImportEdit> {
         // Generate the modified import statement.
         let content = fix::codemods::retain_imports(
@@ -178,7 +181,7 @@ impl<'a> Importer<'a> {
         // Add the import to a `TYPE_CHECKING` block.
         let add_import_edit = if let Some(block) = self.preceding_type_checking_block(at) {
             // Add the import to the `TYPE_CHECKING` block.
-            self.add_to_type_checking_block(&content, block.start(), source_type)
+            self.add_to_type_checking_block(&content, block.start())
         } else {
             // Add the import to a new `TYPE_CHECKING` block.
             self.add_type_checking_block(
@@ -455,13 +458,8 @@ impl<'a> Importer<'a> {
     }
 
     /// Add an import statement to an existing `TYPE_CHECKING` block.
-    fn add_to_type_checking_block(
-        &self,
-        content: &str,
-        at: TextSize,
-        source_type: PySourceType,
-    ) -> Edit {
-        Insertion::start_of_block(at, self.locator, self.stylist, source_type).into_edit(content)
+    fn add_to_type_checking_block(&self, content: &str, at: TextSize) -> Edit {
+        Insertion::start_of_block(at, self.locator, self.stylist, self.tokens).into_edit(content)
     }
 
     /// Return the import statement that precedes the given position, if any.

diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs
@@ -167,7 +167,7 @@ pub fn check_path(
                 }
                 if use_imports {
                     let import_diagnostics = check_imports(
-                        program.suite(),
+                        &program,
                         locator,
                         indexer,
                         &directives.isort,

diff --git a/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs b/crates/ruff_linter/src/rules/flake8_type_checking/rules/typing_only_runtime_import.rs
@@ -491,7 +491,6 @@ fn fix_imports(checker: &Checker, node_id: NodeId, imports: &[ImportBinding]) ->
             },
             at,
             checker.semantic(),
-            checker.source_type,
         )?
         .into_edits();
 

diff --git a/crates/ruff_linter/src/rules/isort/annotate.rs b/crates/ruff_linter/src/rules/isort/annotate.rs
@@ -1,4 +1,5 @@
-use ruff_python_ast::{self as ast, PySourceType, Stmt};
+use ruff_python_ast::{self as ast, Stmt};
+use ruff_python_parser::Tokens;
 use ruff_text_size::{Ranged, TextRange};
 
 use ruff_source_file::Locator;
@@ -13,7 +14,7 @@ pub(crate) fn annotate_imports<'a>(
     comments: Vec<Comment<'a>>,
     locator: &Locator<'a>,
     split_on_trailing_comma: bool,
-    source_type: PySourceType,
+    tokens: &Tokens,
 ) -> Vec<AnnotatedImport<'a>> {
     let mut comments_iter = comments.into_iter().peekable();
 
@@ -120,7 +121,7 @@ pub(crate) fn annotate_imports<'a>(
                         names: aliases,
                         level: *level,
                         trailing_comma: if split_on_trailing_comma {
-                            trailing_comma(import, locator, source_type)
+                            trailing_comma(import, tokens)
                         } else {
                             TrailingComma::default()
                         },

diff --git a/crates/ruff_linter/src/rules/isort/helpers.rs b/crates/ruff_linter/src/rules/isort/helpers.rs
@@ -1,5 +1,5 @@
-use ruff_python_ast::{PySourceType, Stmt};
-use ruff_python_parser::{lexer, AsMode, Tok};
+use ruff_python_ast::Stmt;
+use ruff_python_parser::{TokenKind, Tokens};
 use ruff_python_trivia::PythonWhitespace;
 use ruff_source_file::{Locator, UniversalNewlines};
 use ruff_text_size::Ranged;
@@ -8,31 +8,23 @@ use crate::rules::isort::types::TrailingComma;
 
 /// Return `true` if a `Stmt::ImportFrom` statement ends with a magic
 /// trailing comma.
-pub(super) fn trailing_comma(
-    stmt: &Stmt,
-    locator: &Locator,
-    source_type: PySourceType,
-) -> TrailingComma {
-    let contents = locator.slice(stmt);
+pub(super) fn trailing_comma(stmt: &Stmt, tokens: &Tokens) -> TrailingComma {
     let mut count = 0u32;
     let mut trailing_comma = TrailingComma::Absent;
-    for (tok, _) in lexer::lex_starts_at(contents, source_type.as_mode(), stmt.start()).flatten() {
-        if matches!(tok, Tok::Lpar) {
-            count = count.saturating_add(1);
-        }
-        if matches!(tok, Tok::Rpar) {
-            count = count.saturating_sub(1);
+    for token in tokens.tokens_in_range(stmt.range()) {
+        match token.kind() {
+            TokenKind::Lpar => count = count.saturating_add(1),
+            TokenKind::Rpar => count = count.saturating_sub(1),
+            _ => {}
         }
         if count == 1 {
-            if matches!(
-                tok,
-                Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent | Tok::Comment(_)
-            ) {
-                continue;
-            } else if matches!(tok, Tok::Comma) {
-                trailing_comma = TrailingComma::Present;
-            } else {
-                trailing_comma = TrailingComma::Absent;
+            match token.kind() {
+                TokenKind::NonLogicalNewline
+                | TokenKind::Indent
+                | TokenKind::Dedent
+                | TokenKind::Comment => continue,
+                TokenKind::Comma => trailing_comma = TrailingComma::Present,
+                _ => trailing_comma = TrailingComma::Absent,
             }
         }
     }

diff --git a/crates/ruff_linter/src/rules/isort/mod.rs b/crates/ruff_linter/src/rules/isort/mod.rs
@@ -12,6 +12,7 @@ use normalize::normalize_imports;
 use order::order_imports;
 use ruff_python_ast::PySourceType;
 use ruff_python_codegen::Stylist;
+use ruff_python_parser::Tokens;
 use ruff_source_file::Locator;
 use settings::Settings;
 use types::EitherImport::{Import, ImportFrom};
@@ -72,14 +73,15 @@ pub(crate) fn format_imports(
     source_type: PySourceType,
     target_version: PythonVersion,
     settings: &Settings,
+    tokens: &Tokens,
 ) -> String {
     let trailer = &block.trailer;
     let block = annotate_imports(
         &block.imports,
         comments,
         locator,
         settings.split_on_trailing_comma,
-        source_type,
+        tokens,
     );
 
     // Normalize imports (i.e., deduplicate, aggregate `from` imports).
-Original file line number
+Diff line change
@@ Expand Up @@
                 },
                 at,
                 checker.semantic(),
-                checker.source_type,
             )?
             .into_edits();
@@ Expand Down @@