From 41da52a61bd3174580f13fef8b7b4169e00b5b0e Mon Sep 17 00:00:00 2001
From: Zanie Blue <contact@zanie.dev>
Date: Tue, 18 Jul 2023 13:21:51 -0500
Subject: [PATCH 1/9] Implement `TokenKind` for type aliases (#5870)

Part of https://github.com/astral-sh/ruff/issues/5062
---
 crates/ruff_python_ast/src/token_kind.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/ruff_python_ast/src/token_kind.rs b/crates/ruff_python_ast/src/token_kind.rs
index 89b88c7fccc9d6..4c90f3c30fb0fc 100644
--- a/crates/ruff_python_ast/src/token_kind.rs
+++ b/crates/ruff_python_ast/src/token_kind.rs
@@ -156,6 +156,7 @@ pub enum TokenKind {
     Try,
     While,
     Match,
+    Type,
     Case,
     With,
     Yield,
@@ -426,13 +427,13 @@ impl TokenKind {
             Tok::While => TokenKind::While,
             Tok::Match => TokenKind::Match,
             Tok::Case => TokenKind::Case,
+            Tok::Type => TokenKind::Type,
             Tok::With => TokenKind::With,
             Tok::Yield => TokenKind::Yield,
             Tok::StartModule => TokenKind::StartModule,
             Tok::StartInteractive => TokenKind::StartInteractive,
             Tok::StartExpression => TokenKind::StartExpression,
             Tok::MagicCommand { .. } => todo!(),
-            Tok::Type => todo!(),
         }
     }
 }

From 4204fc002d67dcb002df6852ec89db9a999c2029 Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Tue, 18 Jul 2023 14:27:46 -0400
Subject: [PATCH 2/9] Remove exception-handler lexing from
 `unused-bound-exception` fix (#5851)

## Summary

The motivation here is that it will make this rule easier to rewrite as
a deferred check. Right now, we can't run this rule in the deferred
phase, because it depends on the `except_handler` to power its autofix.
Instead of lexing the `except_handler`, we can use the `SimpleTokenizer`
from the formatter, and just lex forwards and backwards.

For context, this rule detects the unused `e` in:

```python
try:
  pass
except ValueError as e:
  pass
```
---
 Cargo.lock                                    |  3 +-
 crates/ruff/src/checkers/ast/mod.rs           |  7 +--
 crates/ruff/src/rules/pyflakes/fixes.rs       | 54 +++++++++----------
 crates/ruff_python_formatter/Cargo.toml       |  1 -
 crates/ruff_python_formatter/src/builders.rs  |  4 +-
 .../src/comments/format.rs                    |  2 +-
 .../src/comments/placement.rs                 |  6 ++-
 .../src/expression/expr_call.rs               |  3 +-
 .../src/expression/expr_slice.rs              | 17 +++---
 .../src/expression/expr_unary_op.rs           | 16 +++---
 .../src/expression/parentheses.rs             | 10 ++--
 crates/ruff_python_formatter/src/lib.rs       |  1 -
 .../src/other/arguments.rs                    |  4 +-
 .../src/statement/stmt_class_def.rs           | 11 ++--
 .../src/statement/stmt_function_def.rs        |  2 +-
 .../src/statement/stmt_with.rs                |  2 +-
 .../src/statement/suite.rs                    | 18 ++++---
 crates/ruff_python_whitespace/Cargo.toml      |  4 ++
 crates/ruff_python_whitespace/src/lib.rs      |  2 +
 ...hitespace__tokenizer__tests__Reverse.snap} |  2 +-
 ..._identifier_ending_in_non_start_char.snap} |  2 +-
 ...e_word_with_only_id_continuing_chars.snap} |  2 +-
 ...ce__tokenizer__tests__tokenize_bogus.snap} |  2 +-
 ...ce__tokenizer__tests__tokenize_comma.snap} |  2 +-
 ...enizer__tests__tokenize_continuation.snap} |  2 +-
 ...tokenizer__tests__tokenize_multichar.snap} |  2 +-
 ...kenizer__tests__tokenize_parentheses.snap} |  2 +-
 ...ce__tokenizer__tests__tokenize_slash.snap} |  2 +-
 ...tokenizer__tests__tokenize_substring.snap} |  2 +-
 ...e__tokenizer__tests__tokenize_trivia.snap} |  2 +-
 ...ce__tokenizer__tests__tricky_unicode.snap} |  2 +-
 .../src/tokenizer.rs}                         | 44 +++++++--------
 32 files changed, 124 insertions(+), 111 deletions(-)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap} (98%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap} (65%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap} (80%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap} (97%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap} (83%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap} (88%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap} (89%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap} (88%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap} (91%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap} (81%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap} (84%)
 rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap} (65%)
 rename crates/{ruff_python_formatter/src/trivia.rs => ruff_python_whitespace/src/tokenizer.rs} (93%)

diff --git a/Cargo.lock b/Cargo.lock
index b58fdb4ab8b968..4d904a5884caf1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2157,7 +2157,6 @@ dependencies = [
  "similar",
  "smallvec",
  "thiserror",
- "unic-ucd-ident",
 ]
 
 [[package]]
@@ -2195,8 +2194,10 @@ version = "0.0.0"
 name = "ruff_python_whitespace"
 version = "0.0.0"
 dependencies = [
+ "insta",
  "memchr",
  "ruff_text_size",
+ "unic-ucd-ident",
 ]
 
 [[package]]
diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs
index 455e62b52083bc..c1bf7fb8a5b675 100644
--- a/crates/ruff/src/checkers/ast/mod.rs
+++ b/crates/ruff/src/checkers/ast/mod.rs
@@ -4103,11 +4103,8 @@ where
                     );
                     if self.patch(Rule::UnusedVariable) {
                         diagnostic.try_set_fix(|| {
-                            pyflakes::fixes::remove_exception_handler_assignment(
-                                except_handler,
-                                self.locator,
-                            )
-                            .map(Fix::automatic)
+                            pyflakes::fixes::remove_exception_handler_assignment(name, self.locator)
+                                .map(Fix::automatic)
                         });
                     }
                     self.diagnostics.push(diagnostic);
diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs
index 694e03bf87eeac..c6cdee0f5a3219 100644
--- a/crates/ruff/src/rules/pyflakes/fixes.rs
+++ b/crates/ruff/src/rules/pyflakes/fixes.rs
@@ -1,10 +1,10 @@
-use anyhow::{bail, Ok, Result};
+use anyhow::{Context, Ok, Result};
 use ruff_text_size::TextRange;
-use rustpython_parser::ast::{ExceptHandler, Expr, Ranged};
-use rustpython_parser::{lexer, Mode};
+use rustpython_parser::ast::{Expr, Identifier, Ranged};
 
 use ruff_diagnostics::Edit;
 use ruff_python_ast::source_code::{Locator, Stylist};
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
 
 use crate::autofix::codemods::CodegenStylist;
 use crate::cst::matchers::{match_call_mut, match_dict, match_expression};
@@ -90,31 +90,29 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call(
 
 /// Generate a [`Edit`] to remove the binding from an exception handler.
 pub(crate) fn remove_exception_handler_assignment(
-    except_handler: &ExceptHandler,
+    bound_exception: &Identifier,
     locator: &Locator,
 ) -> Result<Edit> {
-    let contents = locator.slice(except_handler.range());
-    let mut fix_start = None;
-    let mut fix_end = None;
-
-    // End of the token just before the `as` to the semicolon.
-    let mut prev = None;
-    for (tok, range) in
-        lexer::lex_starts_at(contents, Mode::Module, except_handler.start()).flatten()
-    {
-        if tok.is_as() {
-            fix_start = prev;
-        }
-        if tok.is_colon() {
-            fix_end = Some(range.start());
-            break;
-        }
-        prev = Some(range.end());
-    }
-
-    if let (Some(start), Some(end)) = (fix_start, fix_end) {
-        Ok(Edit::deletion(start, end))
-    } else {
-        bail!("Could not find span of exception handler")
-    }
+    // Lex backwards, to the token just before the `as`.
+    let mut tokenizer =
+        SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia();
+
+    // Eat the `as` token.
+    let preceding = tokenizer
+        .next_back()
+        .context("expected the exception name to be preceded by `as`")?;
+    debug_assert!(matches!(preceding.kind, TokenKind::As));
+
+    // Lex to the end of the preceding token, which should be the exception value.
+    let preceding = tokenizer
+        .next_back()
+        .context("expected the exception name to be preceded by a token")?;
+
+    // Lex forwards, to the `:` token.
+    let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents())
+        .next()
+        .context("expected the exception name to be followed by a colon")?;
+    debug_assert!(matches!(following.kind, TokenKind::Colon));
+
+    Ok(Edit::deletion(preceding.end(), following.start()))
 }
diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml
index 381c3ec6c9cca5..ae36a021198b8b 100644
--- a/crates/ruff_python_formatter/Cargo.toml
+++ b/crates/ruff_python_formatter/Cargo.toml
@@ -28,7 +28,6 @@ rustpython-parser = { workspace = true }
 serde = { workspace = true, optional = true }
 smallvec = { workspace = true }
 thiserror = { workspace = true }
-unic-ucd-ident = "0.9.0"
 
 [dev-dependencies]
 ruff_formatter = { path = "../ruff_formatter", features = ["serde"]}
diff --git a/crates/ruff_python_formatter/src/builders.rs b/crates/ruff_python_formatter/src/builders.rs
index 9f24a49ca2a7d1..0456d58df9af77 100644
--- a/crates/ruff_python_formatter/src/builders.rs
+++ b/crates/ruff_python_formatter/src/builders.rs
@@ -2,10 +2,12 @@ use ruff_text_size::{TextRange, TextSize};
 use rustpython_parser::ast::Ranged;
 
 use ruff_formatter::{format_args, write, Argument, Arguments};
+use ruff_python_whitespace::{
+    lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind,
+};
 
 use crate::context::NodeLevel;
 use crate::prelude::*;
-use crate::trivia::{lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind};
 use crate::MagicTrailingComma;
 
 /// Adds parentheses and indents `content` if it doesn't fit on a line.
diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs
index 84b0e3b654a6d1..aa7d7296f10592 100644
--- a/crates/ruff_python_formatter/src/comments/format.rs
+++ b/crates/ruff_python_formatter/src/comments/format.rs
@@ -3,11 +3,11 @@ use rustpython_parser::ast::Ranged;
 
 use ruff_formatter::{format_args, write, FormatError, SourceCode};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
+use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia};
 
 use crate::comments::SourceComment;
 use crate::context::NodeLevel;
 use crate::prelude::*;
-use crate::trivia::{lines_after, lines_before, skip_trailing_trivia};
 
 /// Formats the leading comments of a node.
 pub(crate) fn leading_node_comments<T>(node: &T) -> FormatLeadingComments
diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs
index e4d7ec9d82aa03..27ae9688c86159 100644
--- a/crates/ruff_python_formatter/src/comments/placement.rs
+++ b/crates/ruff_python_formatter/src/comments/placement.rs
@@ -7,14 +7,16 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
 use ruff_python_ast::source_code::Locator;
 use ruff_python_ast::whitespace;
-use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines};
+use ruff_python_whitespace::{
+    first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind,
+    UniversalNewlines,
+};
 
 use crate::comments::visitor::{CommentPlacement, DecoratedComment};
 use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection};
 use crate::other::arguments::{
     assign_argument_separator_comment_placement, find_argument_separators,
 };
-use crate::trivia::{first_non_trivia_token_rev, SimpleTokenizer, Token, TokenKind};
 
 /// Implements the custom comment placement logic.
 pub(super) fn place_comment<'a>(
diff --git a/crates/ruff_python_formatter/src/expression/expr_call.rs b/crates/ruff_python_formatter/src/expression/expr_call.rs
index c46aa374f7c41f..4054208bafae3c 100644
--- a/crates/ruff_python_formatter/src/expression/expr_call.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_call.rs
@@ -3,14 +3,13 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged};
 
 use ruff_formatter::write;
 use ruff_python_ast::node::AnyNodeRef;
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
 
 use crate::comments::dangling_comments;
-
 use crate::expression::parentheses::{
     parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
 };
 use crate::prelude::*;
-use crate::trivia::{SimpleTokenizer, TokenKind};
 use crate::FormatNodeRule;
 
 #[derive(Default)]
diff --git a/crates/ruff_python_formatter/src/expression/expr_slice.rs b/crates/ruff_python_formatter/src/expression/expr_slice.rs
index 0d9dd7445fd8f2..93434b87777fd1 100644
--- a/crates/ruff_python_formatter/src/expression/expr_slice.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_slice.rs
@@ -1,15 +1,16 @@
+use ruff_text_size::TextRange;
+use rustpython_parser::ast::ExprSlice;
+use rustpython_parser::ast::{Expr, Ranged};
+
+use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
+use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
+use ruff_python_ast::node::{AnyNodeRef, AstNode};
+use ruff_python_whitespace::{first_non_trivia_token, Token, TokenKind};
+
 use crate::comments::{dangling_comments, SourceComment};
 use crate::context::PyFormatContext;
 use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
-use crate::trivia::Token;
-use crate::trivia::{first_non_trivia_token, TokenKind};
 use crate::{AsFormat, FormatNodeRule, PyFormatter};
-use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text};
-use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult};
-use ruff_python_ast::node::{AnyNodeRef, AstNode};
-use ruff_text_size::TextRange;
-use rustpython_parser::ast::ExprSlice;
-use rustpython_parser::ast::{Expr, Ranged};
 
 #[derive(Default)]
 pub struct FormatExprSlice;
diff --git a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
index 97462c4d7f9b97..ffe5f0f69cff9a 100644
--- a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
@@ -1,14 +1,16 @@
+use ruff_text_size::{TextLen, TextRange};
+use rustpython_parser::ast::UnaryOp;
+use rustpython_parser::ast::{ExprUnaryOp, Ranged};
+
+use ruff_formatter::prelude::{hard_line_break, space, text};
+use ruff_formatter::{Format, FormatContext, FormatResult};
+use ruff_python_ast::node::AnyNodeRef;
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
+
 use crate::comments::trailing_comments;
 use crate::context::PyFormatContext;
 use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses};
-use crate::trivia::{SimpleTokenizer, TokenKind};
 use crate::{AsFormat, FormatNodeRule, PyFormatter};
-use ruff_formatter::prelude::{hard_line_break, space, text};
-use ruff_formatter::{Format, FormatContext, FormatResult};
-use ruff_python_ast::node::AnyNodeRef;
-use ruff_text_size::{TextLen, TextRange};
-use rustpython_parser::ast::UnaryOp;
-use rustpython_parser::ast::{ExprUnaryOp, Ranged};
 
 #[derive(Default)]
 pub struct FormatExprUnaryOp;
diff --git a/crates/ruff_python_formatter/src/expression/parentheses.rs b/crates/ruff_python_formatter/src/expression/parentheses.rs
index 281d1896b8ab28..85981345f4124c 100644
--- a/crates/ruff_python_formatter/src/expression/parentheses.rs
+++ b/crates/ruff_python_formatter/src/expression/parentheses.rs
@@ -1,10 +1,12 @@
-use crate::context::NodeLevel;
-use crate::prelude::*;
-use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
+use rustpython_parser::ast::Ranged;
+
 use ruff_formatter::prelude::tag::Condition;
 use ruff_formatter::{format_args, write, Argument, Arguments};
 use ruff_python_ast::node::AnyNodeRef;
-use rustpython_parser::ast::Ranged;
+use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
+
+use crate::context::NodeLevel;
+use crate::prelude::*;
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub(crate) enum OptionalParentheses {
diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs
index 7055bab2faf569..d1a7420291177d 100644
--- a/crates/ruff_python_formatter/src/lib.rs
+++ b/crates/ruff_python_formatter/src/lib.rs
@@ -33,7 +33,6 @@ pub(crate) mod other;
 pub(crate) mod pattern;
 mod prelude;
 pub(crate) mod statement;
-mod trivia;
 
 include!("../../ruff_formatter/shared_traits.rs");
 
diff --git a/crates/ruff_python_formatter/src/other/arguments.rs b/crates/ruff_python_formatter/src/other/arguments.rs
index 3e84558ad2c6a4..5e4d7fe6f34901 100644
--- a/crates/ruff_python_formatter/src/other/arguments.rs
+++ b/crates/ruff_python_formatter/src/other/arguments.rs
@@ -1,9 +1,11 @@
 use std::usize;
 
+use ruff_text_size::{TextRange, TextSize};
 use rustpython_parser::ast::{Arguments, Ranged};
 
 use ruff_formatter::{format_args, write};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
+use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
 
 use crate::comments::{
     dangling_comments, leading_comments, leading_node_comments, trailing_comments,
@@ -12,9 +14,7 @@ use crate::comments::{
 use crate::context::NodeLevel;
 use crate::expression::parentheses::parenthesized;
 use crate::prelude::*;
-use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind};
 use crate::FormatNodeRule;
-use ruff_text_size::{TextRange, TextSize};
 
 #[derive(Default)]
 pub struct FormatArguments;
diff --git a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
index 86c76882849767..0876ac7347a6bc 100644
--- a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
@@ -1,11 +1,12 @@
-use crate::comments::trailing_comments;
+use ruff_text_size::TextRange;
+use rustpython_parser::ast::{Ranged, StmtClassDef};
+
+use ruff_formatter::write;
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
 
+use crate::comments::trailing_comments;
 use crate::expression::parentheses::{parenthesized, Parentheses};
 use crate::prelude::*;
-use crate::trivia::{SimpleTokenizer, TokenKind};
-use ruff_formatter::write;
-use ruff_text_size::TextRange;
-use rustpython_parser::ast::{Ranged, StmtClassDef};
 
 #[derive(Default)]
 pub struct FormatStmtClassDef;
diff --git a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs
index 69f370e7c10e32..f7fa032174d2b6 100644
--- a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs
@@ -2,12 +2,12 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef};
 
 use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule};
 use ruff_python_ast::function::AnyFunctionDefinition;
+use ruff_python_whitespace::{lines_after, skip_trailing_trivia};
 
 use crate::comments::{leading_comments, trailing_comments};
 use crate::context::NodeLevel;
 use crate::expression::parentheses::{optional_parentheses, Parentheses};
 use crate::prelude::*;
-use crate::trivia::{lines_after, skip_trailing_trivia};
 use crate::FormatNodeRule;
 
 #[derive(Default)]
diff --git a/crates/ruff_python_formatter/src/statement/stmt_with.rs b/crates/ruff_python_formatter/src/statement/stmt_with.rs
index 56eca66b17236d..2c610f029d5883 100644
--- a/crates/ruff_python_formatter/src/statement/stmt_with.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_with.rs
@@ -3,13 +3,13 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem};
 
 use ruff_formatter::{format_args, write, FormatError};
 use ruff_python_ast::node::AnyNodeRef;
+use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
 
 use crate::comments::trailing_comments;
 use crate::expression::parentheses::{
     in_parentheses_only_soft_line_break_or_space, optional_parentheses,
 };
 use crate::prelude::*;
-use crate::trivia::{SimpleTokenizer, TokenKind};
 use crate::FormatNodeRule;
 
 pub(super) enum AnyStatementWith<'a> {
diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs
index 92fc32ed4e6018..09a12f6697e148 100644
--- a/crates/ruff_python_formatter/src/statement/suite.rs
+++ b/crates/ruff_python_formatter/src/statement/suite.rs
@@ -1,10 +1,12 @@
-use crate::context::NodeLevel;
-use crate::prelude::*;
-use crate::trivia::lines_before;
+use rustpython_parser::ast::{Ranged, Stmt, Suite};
+
 use ruff_formatter::{
     format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
 };
-use rustpython_parser::ast::{Ranged, Stmt, Suite};
+use ruff_python_whitespace::lines_before;
+
+use crate::context::NodeLevel;
+use crate::prelude::*;
 
 /// Level at which the [`Suite`] appears in the source code.
 #[derive(Copy, Clone, Debug)]
@@ -185,13 +187,15 @@ impl<'ast> IntoFormat<PyFormatContext<'ast>> for Suite {
 
 #[cfg(test)]
 mod tests {
+    use rustpython_parser::ast::Suite;
+    use rustpython_parser::Parse;
+
+    use ruff_formatter::format;
+
     use crate::comments::Comments;
     use crate::prelude::*;
     use crate::statement::suite::SuiteLevel;
     use crate::PyFormatOptions;
-    use ruff_formatter::format;
-    use rustpython_parser::ast::Suite;
-    use rustpython_parser::Parse;
 
     fn format_suite(level: SuiteLevel) -> String {
         let source = r#"
diff --git a/crates/ruff_python_whitespace/Cargo.toml b/crates/ruff_python_whitespace/Cargo.toml
index cbfc1aea2470ce..22b36562d39382 100644
--- a/crates/ruff_python_whitespace/Cargo.toml
+++ b/crates/ruff_python_whitespace/Cargo.toml
@@ -16,3 +16,7 @@ license = { workspace = true }
 ruff_text_size = { workspace = true }
 
 memchr = { workspace = true }
+unic-ucd-ident = "0.9.0"
+
+[dev-dependencies]
+insta = { workspace = true }
diff --git a/crates/ruff_python_whitespace/src/lib.rs b/crates/ruff_python_whitespace/src/lib.rs
index b8c95e351c5fe5..4e16d7ca2dcbac 100644
--- a/crates/ruff_python_whitespace/src/lib.rs
+++ b/crates/ruff_python_whitespace/src/lib.rs
@@ -1,7 +1,9 @@
 mod cursor;
 mod newlines;
+mod tokenizer;
 mod whitespace;
 
 pub use cursor::*;
 pub use newlines::*;
+pub use tokenizer::*;
 pub use whitespace::*;
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap
similarity index 98%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap
index ec701539c60a62..3ae643205e83fa 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokenize_reverse()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap
similarity index 65%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap
index 15e9d84407efd2..6f19b912734087 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap
similarity index 80%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap
index 26e9fd18bcbeb3..ccb0282831ef1c 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap
similarity index 97%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap
index 79368160895185..f5005ec2c91376 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap
similarity index 83%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap
index 38d1fed60a6609..a1f98abd4eaea3 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap
similarity index 88%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap
index 83079fe81aeb6e..5e9802280d882d 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap
similarity index 89%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap
index 16a1293b4420a9..ff371d781f317a 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap
similarity index 88%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap
index ccd6969c2db2db..6c792f7cf07822 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap
similarity index 91%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap
index 093715cf177983..f82f501d652efe 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap
similarity index 81%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap
index 181b438c3f3301..9b06f81cb9747c 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap
similarity index 84%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap
index f1d708d6cb8bce..79f9130287c804 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap
similarity index 65%
rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap
rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap
index 91b9cb397a04c3..c8aab65b39d49b 100644
--- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap
+++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap
@@ -1,5 +1,5 @@
 ---
-source: crates/ruff_python_formatter/src/trivia.rs
+source: crates/ruff_python_whitespace/src/tokenizer.rs
 expression: test_case.tokens()
 ---
 [
diff --git a/crates/ruff_python_formatter/src/trivia.rs b/crates/ruff_python_whitespace/src/tokenizer.rs
similarity index 93%
rename from crates/ruff_python_formatter/src/trivia.rs
rename to crates/ruff_python_whitespace/src/tokenizer.rs
index 63f92b6e2f8019..c8aa15dbb76c02 100644
--- a/crates/ruff_python_formatter/src/trivia.rs
+++ b/crates/ruff_python_whitespace/src/tokenizer.rs
@@ -1,7 +1,7 @@
 use ruff_text_size::{TextLen, TextRange, TextSize};
 use unic_ucd_ident::{is_xid_continue, is_xid_start};
 
-use ruff_python_whitespace::{is_python_whitespace, Cursor};
+use crate::{is_python_whitespace, Cursor};
 
 /// Searches for the first non-trivia character in `range`.
 ///
@@ -11,7 +11,7 @@ use ruff_python_whitespace::{is_python_whitespace, Cursor};
 /// of the character, the second item the non-trivia character.
 ///
 /// Returns `None` if the range is empty or only contains trivia (whitespace or comments).
-pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
+pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Token> {
     SimpleTokenizer::starts_at(offset, code)
         .skip_trivia()
         .next()
@@ -23,14 +23,14 @@ pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option<Tok
 /// ## Notes
 ///
 /// Prefer [`first_non_trivia_token`] whenever possible because reverse lookup is expensive because of comments.
-pub(crate) fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
+pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option<Token> {
     SimpleTokenizer::up_to(offset, code)
         .skip_trivia()
         .next_back()
 }
 
 /// Returns the number of newlines between `offset` and the first non whitespace character in the source code.
-pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
+pub fn lines_before(offset: TextSize, code: &str) -> u32 {
     let tokens = SimpleTokenizer::up_to(offset, code);
     let mut newlines = 0u32;
 
@@ -52,7 +52,7 @@ pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 {
 }
 
 /// Counts the empty lines between `offset` and the first non-whitespace character.
-pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
+pub fn lines_after(offset: TextSize, code: &str) -> u32 {
     let tokens = SimpleTokenizer::starts_at(offset, code);
     let mut newlines = 0u32;
 
@@ -74,7 +74,7 @@ pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 {
 }
 
 /// Returns the position after skipping any trailing trivia up to, but not including the newline character.
-pub(crate) fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
+pub fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize {
     let tokenizer = SimpleTokenizer::starts_at(offset, code);
 
     for token in tokenizer {
@@ -110,32 +110,32 @@ fn is_non_ascii_identifier_start(c: char) -> bool {
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Hash)]
-pub(crate) struct Token {
-    pub(crate) kind: TokenKind,
-    pub(crate) range: TextRange,
+pub struct Token {
+    pub kind: TokenKind,
+    pub range: TextRange,
 }
 
 impl Token {
-    pub(crate) const fn kind(&self) -> TokenKind {
+    pub const fn kind(&self) -> TokenKind {
         self.kind
     }
 
     #[allow(unused)]
-    pub(crate) const fn range(&self) -> TextRange {
+    pub const fn range(&self) -> TextRange {
         self.range
     }
 
-    pub(crate) const fn start(&self) -> TextSize {
+    pub const fn start(&self) -> TextSize {
         self.range.start()
     }
 
-    pub(crate) const fn end(&self) -> TextSize {
+    pub const fn end(&self) -> TextSize {
         self.range.end()
     }
 }
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
-pub(crate) enum TokenKind {
+pub enum TokenKind {
     /// A comment, not including the trailing new line.
     Comment,
 
@@ -247,7 +247,7 @@ impl TokenKind {
 ///
 /// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it
 /// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file.
-pub(crate) struct SimpleTokenizer<'a> {
+pub struct SimpleTokenizer<'a> {
     offset: TextSize,
     back_offset: TextSize,
     /// `true` when it is known that the current `back` line has no comment for sure.
@@ -258,7 +258,7 @@ pub(crate) struct SimpleTokenizer<'a> {
 }
 
 impl<'a> SimpleTokenizer<'a> {
-    pub(crate) fn new(source: &'a str, range: TextRange) -> Self {
+    pub fn new(source: &'a str, range: TextRange) -> Self {
         Self {
             offset: range.start(),
             back_offset: range.end(),
@@ -269,20 +269,20 @@ impl<'a> SimpleTokenizer<'a> {
         }
     }
 
-    pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self {
+    pub fn starts_at(offset: TextSize, source: &'a str) -> Self {
         let range = TextRange::new(offset, source.text_len());
         Self::new(source, range)
     }
 
     /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`.
-    pub(crate) fn up_to(offset: TextSize, source: &'a str) -> Self {
+    pub fn up_to(offset: TextSize, source: &'a str) -> Self {
         Self::new(source, TextRange::up_to(offset))
     }
 
     /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs
     /// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing
     /// because the lexer doesn't need to scan for comments.
-    pub(crate) fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
+    pub fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self {
         let mut tokenizer = Self::up_to(offset, source);
         tokenizer.back_line_has_no_comment = true;
         tokenizer
@@ -375,7 +375,7 @@ impl<'a> SimpleTokenizer<'a> {
 
     /// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive
     /// because it needs to check if the line has any comments when encountering any non-trivia token.
-    pub(crate) fn next_token_back(&mut self) -> Token {
+    pub fn next_token_back(&mut self) -> Token {
         self.cursor.start_token();
 
         let Some(last) = self.cursor.bump_back() else {
@@ -503,7 +503,7 @@ impl<'a> SimpleTokenizer<'a> {
         token
     }
 
-    pub(crate) fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
+    pub fn skip_trivia(self) -> impl Iterator<Item = Token> + DoubleEndedIterator + 'a {
         self.filter(|t| !t.kind().is_trivia())
     }
 }
@@ -539,7 +539,7 @@ mod tests {
     use insta::assert_debug_snapshot;
     use ruff_text_size::{TextLen, TextRange, TextSize};
 
-    use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token};
+    use crate::tokenizer::{lines_after, lines_before, SimpleTokenizer, Token};
 
     struct TokenizationTestCase {
         source: &'static str,

From c577045f2e280796591b1afcf867322d6db58062 Mon Sep 17 00:00:00 2001
From: Micha Reiser <micha@reiser.io>
Date: Tue, 18 Jul 2023 23:05:55 +0200
Subject: [PATCH 3/9] perf(formatter): Use memchar for faster back tokenization
 (#5823)

---
 .../ruff_python_whitespace/src/tokenizer.rs   | 52 +++++++++++--------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/crates/ruff_python_whitespace/src/tokenizer.rs b/crates/ruff_python_whitespace/src/tokenizer.rs
index c8aa15dbb76c02..cb56ba5a839f4d 100644
--- a/crates/ruff_python_whitespace/src/tokenizer.rs
+++ b/crates/ruff_python_whitespace/src/tokenizer.rs
@@ -1,3 +1,4 @@
+use memchr::memrchr3_iter;
 use ruff_text_size::{TextLen, TextRange, TextSize};
 use unic_ucd_ident::{is_xid_continue, is_xid_start};
 
@@ -419,36 +420,45 @@ impl<'a> SimpleTokenizer<'a> {
 
             // For all other tokens, test if the character isn't part of a comment.
             c => {
-                let mut comment_offset = None;
-
                 // Skip the test whether there's a preceding comment if it has been performed before.
-                if !self.back_line_has_no_comment {
-                    for (back_index, c) in self.cursor.chars().rev().enumerate() {
-                        match c {
-                            '#' => {
-                                // Potentially a comment
-                                comment_offset = Some(back_index + 1);
-                            }
-                            '\r' | '\n' | '\\' => {
-                                break;
-                            }
-                            c => {
-                                if !is_python_whitespace(c)
-                                    && TokenKind::from_non_trivia_char(c) == TokenKind::Other
-                                {
-                                    comment_offset = None;
-                                }
-                            }
+                let comment_offset = if self.back_line_has_no_comment {
+                    None
+                } else {
+                    let bytes = self.cursor.chars().as_str().as_bytes();
+                    let mut line_start = 0;
+                    let mut last_comment_offset = None;
+
+                    // Find the start of the line, or any potential comments.
+                    for index in memrchr3_iter(b'\n', b'\r', b'#', bytes) {
+                        if bytes[index] == b'#' {
+                            // Potentially a comment, but not guaranteed
+                            last_comment_offset = Some(index);
+                        } else {
+                            line_start = index + 1;
+                            break;
                         }
                     }
-                }
+
+                    // Verify if this is indeed a comment. Doing this only when we've found a comment is significantly
+                    // faster because comments are rare.
+                    last_comment_offset.filter(|last_comment_offset| {
+                        let before_comment =
+                            &self.cursor.chars().as_str()[line_start..*last_comment_offset];
+
+                        before_comment.chars().all(|c| {
+                            is_python_whitespace(c)
+                                || TokenKind::from_non_trivia_char(c) != TokenKind::Other
+                        })
+                    })
+                };
 
                 // From here on it is guaranteed that this line has no other comment.
                 self.back_line_has_no_comment = true;
 
                 if let Some(comment_offset) = comment_offset {
+                    let comment_length = self.cursor.chars().as_str().len() - comment_offset;
                     // It is a comment, bump all tokens
-                    for _ in 0..comment_offset {
+                    for _ in 0..comment_length {
                         self.cursor.bump_back().unwrap();
                     }
 

From a93254f026b8b3473386f7e7cf96a1b779a07305 Mon Sep 17 00:00:00 2001
From: Zanie Blue <contact@zanie.dev>
Date: Tue, 18 Jul 2023 16:25:49 -0500
Subject: [PATCH 4/9] Implement `unparse` for type aliases and parameters
 (#5869)

Part of https://github.com/astral-sh/ruff/issues/5062
---
 .../src/source_code/generator.rs              | 98 ++++++++++++++++++-
 1 file changed, 95 insertions(+), 3 deletions(-)

diff --git a/crates/ruff_python_ast/src/source_code/generator.rs b/crates/ruff_python_ast/src/source_code/generator.rs
index 412b897eedd767..5efc6fe0c07a14 100644
--- a/crates/ruff_python_ast/src/source_code/generator.rs
+++ b/crates/ruff_python_ast/src/source_code/generator.rs
@@ -6,7 +6,8 @@ use std::ops::Deref;
 use rustpython_literal::escape::{AsciiEscape, Escape, UnicodeEscape};
 use rustpython_parser::ast::{
     self, Alias, Arg, Arguments, BoolOp, CmpOp, Comprehension, Constant, ConversionFlag,
-    ExceptHandler, Expr, Identifier, MatchCase, Operator, Pattern, Stmt, Suite, WithItem,
+    ExceptHandler, Expr, Identifier, MatchCase, Operator, Pattern, Stmt, Suite, TypeParam,
+    TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
 };
 
 use ruff_python_whitespace::LineEnding;
@@ -207,6 +208,7 @@ impl<'a> Generator<'a> {
                 body,
                 returns,
                 decorator_list,
+                type_params,
                 ..
             }) => {
                 self.newlines(if self.indent_depth == 0 { 2 } else { 1 });
@@ -219,6 +221,7 @@ impl<'a> Generator<'a> {
                 statement!({
                     self.p("def ");
                     self.p_id(name);
+                    self.unparse_type_params(type_params);
                     self.p("(");
                     self.unparse_args(args);
                     self.p(")");
@@ -239,6 +242,7 @@ impl<'a> Generator<'a> {
                 body,
                 returns,
                 decorator_list,
+                type_params,
                 ..
             }) => {
                 self.newlines(if self.indent_depth == 0 { 2 } else { 1 });
@@ -251,6 +255,7 @@ impl<'a> Generator<'a> {
                 statement!({
                     self.p("async def ");
                     self.p_id(name);
+                    self.unparse_type_params(type_params);
                     self.p("(");
                     self.unparse_args(args);
                     self.p(")");
@@ -271,8 +276,8 @@ impl<'a> Generator<'a> {
                 keywords,
                 body,
                 decorator_list,
+                type_params,
                 range: _,
-                type_params: _,
             }) => {
                 self.newlines(if self.indent_depth == 0 { 2 } else { 1 });
                 for decorator in decorator_list {
@@ -284,6 +289,7 @@ impl<'a> Generator<'a> {
                 statement!({
                     self.p("class ");
                     self.p_id(name);
+                    self.unparse_type_params(type_params);
                     let mut first = true;
                     for base in bases {
                         self.p_if(first, "(");
@@ -525,6 +531,18 @@ impl<'a> Generator<'a> {
                     self.indent_depth = self.indent_depth.saturating_sub(1);
                 }
             }
+            Stmt::TypeAlias(ast::StmtTypeAlias {
+                name,
+                range: _range,
+                type_params,
+                value,
+            }) => {
+                self.p("type ");
+                self.unparse_expr(name, precedence::MAX);
+                self.unparse_type_params(type_params);
+                self.p(" = ");
+                self.unparse_expr(value, precedence::ASSIGN);
+            }
             Stmt::Raise(ast::StmtRaise {
                 exc,
                 cause,
@@ -702,7 +720,6 @@ impl<'a> Generator<'a> {
                     self.p("continue");
                 });
             }
-            Stmt::TypeAlias(_) => todo!(),
         }
     }
 
@@ -830,6 +847,38 @@ impl<'a> Generator<'a> {
         self.body(&ast.body);
     }
 
+    fn unparse_type_params(&mut self, type_params: &Vec<TypeParam>) {
+        if !type_params.is_empty() {
+            self.p("[");
+            let mut first = true;
+            for type_param in type_params {
+                self.p_delim(&mut first, ", ");
+                self.unparse_type_param(type_param);
+            }
+            self.p("]");
+        }
+    }
+
+    pub(crate) fn unparse_type_param(&mut self, ast: &TypeParam) {
+        match ast {
+            TypeParam::TypeVar(TypeParamTypeVar { name, bound, .. }) => {
+                self.p_id(name);
+                if let Some(expr) = bound {
+                    self.p(": ");
+                    self.unparse_expr(expr, precedence::MAX);
+                }
+            }
+            TypeParam::TypeVarTuple(TypeParamTypeVarTuple { name, .. }) => {
+                self.p("*");
+                self.p_id(name);
+            }
+            TypeParam::ParamSpec(TypeParamParamSpec { name, .. }) => {
+                self.p("**");
+                self.p_id(name);
+            }
+        }
+    }
+
     pub(crate) fn unparse_expr(&mut self, ast: &Expr, level: u8) {
         macro_rules! opprec {
             ($opty:ident, $x:expr, $enu:path, $($var:ident($op:literal, $prec:ident)),*$(,)?) => {
@@ -1510,6 +1559,26 @@ mod tests {
         );
         assert_round_trip!(
             r#"class Foo(Bar, object):
+    pass"#
+        );
+        assert_round_trip!(
+            r#"class Foo[T]:
+    pass"#
+        );
+        assert_round_trip!(
+            r#"class Foo[T](Bar):
+    pass"#
+        );
+        assert_round_trip!(
+            r#"class Foo[*Ts]:
+    pass"#
+        );
+        assert_round_trip!(
+            r#"class Foo[**P]:
+    pass"#
+        );
+        assert_round_trip!(
+            r#"class Foo[T, U, *Ts, **P]:
     pass"#
         );
         assert_round_trip!(
@@ -1541,6 +1610,22 @@ mod tests {
         );
         assert_round_trip!(
             r#"def test(a, b=4, /, c=8, d=9):
+    pass"#
+        );
+        assert_round_trip!(
+            r#"def test[T]():
+    pass"#
+        );
+        assert_round_trip!(
+            r#"def test[*Ts]():
+    pass"#
+        );
+        assert_round_trip!(
+            r#"def test[**P]():
+    pass"#
+        );
+        assert_round_trip!(
+            r#"def test[T, U, *Ts, **P]():
     pass"#
         );
         assert_round_trip!(
@@ -1596,6 +1681,13 @@ class Foo:
     def f():
         pass"#
         );
+
+        // Type aliases
+        assert_round_trip!(r#"type Foo = int | str"#);
+        assert_round_trip!(r#"type Foo[T] = list[T]"#);
+        assert_round_trip!(r#"type Foo[*Ts] = ..."#);
+        assert_round_trip!(r#"type Foo[**P] = ..."#);
+        assert_round_trip!(r#"type Foo[T, U, *Ts, **P] = ..."#);
     }
 
     #[test]

From 2d505e2b0443a46b1e5a3016821f22a8c7307913 Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Tue, 18 Jul 2023 18:58:31 -0400
Subject: [PATCH 5/9] Remove suite body tracking from `SemanticModel` (#5848)

## Summary

The `SemanticModel` currently stores the "body" of a given `Suite`,
along with the current statement index. This is used to support "next
sibling" queries, but we only use this in exactly one place -- the rule
that simplifies constructs like this to `any` or `all`:

```python
for x in y:
    if x == 0:
        return True
return False
```

Instead of tracking the state, we can just do a (slightly more
expensive) traversal, by finding the node within its parent and
returning the next node in the body.

Note that we'll only have to do this extremely rarely -- namely, for
functions that contain something like:

```python
for x in y:
    if x == 0:
        return True
```
---
 crates/ruff/src/checkers/ast/mod.rs           |  17 +-
 .../rules/reimplemented_builtin.rs            | 446 +++++++++---------
 crates/ruff_python_ast/src/lib.rs             |   1 +
 crates/ruff_python_ast/src/traversal.rs       | 113 +++++
 crates/ruff_python_semantic/src/model.rs      |  11 -
 5 files changed, 339 insertions(+), 249 deletions(-)
 create mode 100644 crates/ruff_python_ast/src/traversal.rs

diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs
index c1bf7fb8a5b675..d979952140c270 100644
--- a/crates/ruff/src/checkers/ast/mod.rs
+++ b/crates/ruff/src/checkers/ast/mod.rs
@@ -1404,11 +1404,7 @@ where
                 }
                 if stmt.is_for_stmt() {
                     if self.enabled(Rule::ReimplementedBuiltin) {
-                        flake8_simplify::rules::convert_for_loop_to_any_all(
-                            self,
-                            stmt,
-                            self.semantic.sibling_stmt(),
-                        );
+                        flake8_simplify::rules::convert_for_loop_to_any_all(self, stmt);
                     }
                     if self.enabled(Rule::InDictKeys) {
                         flake8_simplify::rules::key_in_dict_for(self, target, iter);
@@ -4237,21 +4233,10 @@ where
             flake8_pie::rules::no_unnecessary_pass(self, body);
         }
 
-        // Step 2: Binding
-        let prev_body = self.semantic.body;
-        let prev_body_index = self.semantic.body_index;
-        self.semantic.body = body;
-        self.semantic.body_index = 0;
-
         // Step 3: Traversal
         for stmt in body {
             self.visit_stmt(stmt);
-            self.semantic.body_index += 1;
         }
-
-        // Step 4: Clean-up
-        self.semantic.body = prev_body;
-        self.semantic.body_index = prev_body_index;
     }
 }
 
diff --git a/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs b/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs
index 4a30d94859b9a8..953e77b8439ad1 100644
--- a/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs
+++ b/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs
@@ -1,4 +1,4 @@
-use ruff_text_size::{TextRange, TextSize};
+use ruff_text_size::TextRange;
 use rustpython_parser::ast::{
     self, CmpOp, Comprehension, Constant, Expr, ExprContext, Ranged, Stmt, UnaryOp,
 };
@@ -7,10 +7,11 @@ use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::helpers::any_over_expr;
 use ruff_python_ast::source_code::Generator;
+use ruff_python_ast::traversal;
 
 use crate::checkers::ast::Checker;
 use crate::line_width::LineWidth;
-use crate::registry::{AsRule, Rule};
+use crate::registry::AsRule;
 
 /// ## What it does
 /// Checks for `for` loops that can be replaced with a builtin function, like
@@ -38,7 +39,7 @@ use crate::registry::{AsRule, Rule};
 /// - [Python documentation: `all`](https://docs.python.org/3/library/functions.html#all)
 #[violation]
 pub struct ReimplementedBuiltin {
-    repl: String,
+    replacement: String,
 }
 
 impl Violation for ReimplementedBuiltin {
@@ -46,200 +47,222 @@ impl Violation for ReimplementedBuiltin {
 
     #[derive_message_formats]
     fn message(&self) -> String {
-        let ReimplementedBuiltin { repl } = self;
-        format!("Use `{repl}` instead of `for` loop")
+        let ReimplementedBuiltin { replacement } = self;
+        format!("Use `{replacement}` instead of `for` loop")
     }
 
     fn autofix_title(&self) -> Option<String> {
-        let ReimplementedBuiltin { repl } = self;
-        Some(format!("Replace with `{repl}`"))
+        let ReimplementedBuiltin { replacement } = self;
+        Some(format!("Replace with `{replacement}`"))
     }
 }
 
 /// SIM110, SIM111
-pub(crate) fn convert_for_loop_to_any_all(
-    checker: &mut Checker,
-    stmt: &Stmt,
-    sibling: Option<&Stmt>,
-) {
-    // There are two cases to consider:
+pub(crate) fn convert_for_loop_to_any_all(checker: &mut Checker, stmt: &Stmt) {
+    if !checker.semantic().scope().kind.is_any_function() {
+        return;
+    }
+
+    // The `for` loop itself must consist of an `if` with a `return`.
+    let Some(loop_) = match_loop(stmt) else {
+        return;
+    };
+
+    // Afterwards, there are two cases to consider:
     // - `for` loop with an `else: return True` or `else: return False`.
-    // - `for` loop followed by `return True` or `return False`
-    if let Some(loop_info) = return_values_for_else(stmt)
-        .or_else(|| sibling.and_then(|sibling| return_values_for_siblings(stmt, sibling)))
-    {
-        // Check if loop_info.target, loop_info.iter, or loop_info.test contains `await`.
-        if contains_await(loop_info.target)
-            || contains_await(loop_info.iter)
-            || contains_await(loop_info.test)
-        {
-            return;
-        }
-        if loop_info.return_value && !loop_info.next_return_value {
-            if checker.enabled(Rule::ReimplementedBuiltin) {
-                let contents = return_stmt(
-                    "any",
-                    loop_info.test,
-                    loop_info.target,
-                    loop_info.iter,
-                    checker.generator(),
-                );
+    // - `for` loop followed by `return True` or `return False`.
+    let Some(terminal) = match_else_return(stmt).or_else(|| {
+        let parent = checker.semantic().stmt_parent()?;
+        let suite = traversal::suite(stmt, parent)?;
+        let sibling = traversal::next_sibling(stmt, suite)?;
+        match_sibling_return(stmt, sibling)
+    }) else {
+        return;
+    };
 
-                // Don't flag if the resulting expression would exceed the maximum line length.
-                let line_start = checker.locator.line_start(stmt.start());
-                if LineWidth::new(checker.settings.tab_size)
-                    .add_str(&checker.locator.contents()[TextRange::new(line_start, stmt.start())])
-                    .add_str(&contents)
-                    > checker.settings.line_length
-                {
-                    return;
-                }
+    // Check if any of the expressions contain an `await` expression.
+    if contains_await(loop_.target) || contains_await(loop_.iter) || contains_await(loop_.test) {
+        return;
+    }
 
-                let mut diagnostic = Diagnostic::new(
-                    ReimplementedBuiltin {
-                        repl: contents.clone(),
-                    },
-                    TextRange::new(stmt.start(), loop_info.terminal),
-                );
-                if checker.patch(diagnostic.kind.rule()) && checker.semantic().is_builtin("any") {
-                    diagnostic.set_fix(Fix::suggested(Edit::replacement(
-                        contents,
-                        stmt.start(),
-                        loop_info.terminal,
-                    )));
-                }
-                checker.diagnostics.push(diagnostic);
+    match (loop_.return_value, terminal.return_value) {
+        // Replace with `any`.
+        (true, false) => {
+            let contents = return_stmt(
+                "any",
+                loop_.test,
+                loop_.target,
+                loop_.iter,
+                checker.generator(),
+            );
+
+            // Don't flag if the resulting expression would exceed the maximum line length.
+            let line_start = checker.locator.line_start(stmt.start());
+            if LineWidth::new(checker.settings.tab_size)
+                .add_str(&checker.locator.contents()[TextRange::new(line_start, stmt.start())])
+                .add_str(&contents)
+                > checker.settings.line_length
+            {
+                return;
             }
-        }
 
-        if !loop_info.return_value && loop_info.next_return_value {
-            if checker.enabled(Rule::ReimplementedBuiltin) {
-                // Invert the condition.
-                let test = {
-                    if let Expr::UnaryOp(ast::ExprUnaryOp {
-                        op: UnaryOp::Not,
-                        operand,
-                        range: _,
-                    }) = &loop_info.test
-                    {
-                        *operand.clone()
-                    } else if let Expr::Compare(ast::ExprCompare {
-                        left,
-                        ops,
-                        comparators,
-                        range: _,
-                    }) = &loop_info.test
-                    {
-                        if let ([op], [comparator]) = (ops.as_slice(), comparators.as_slice()) {
-                            let op = match op {
-                                CmpOp::Eq => CmpOp::NotEq,
-                                CmpOp::NotEq => CmpOp::Eq,
-                                CmpOp::Lt => CmpOp::GtE,
-                                CmpOp::LtE => CmpOp::Gt,
-                                CmpOp::Gt => CmpOp::LtE,
-                                CmpOp::GtE => CmpOp::Lt,
-                                CmpOp::Is => CmpOp::IsNot,
-                                CmpOp::IsNot => CmpOp::Is,
-                                CmpOp::In => CmpOp::NotIn,
-                                CmpOp::NotIn => CmpOp::In,
-                            };
-                            let node = ast::ExprCompare {
-                                left: left.clone(),
-                                ops: vec![op],
-                                comparators: vec![comparator.clone()],
-                                range: TextRange::default(),
-                            };
-                            node.into()
-                        } else {
-                            let node = ast::ExprUnaryOp {
-                                op: UnaryOp::Not,
-                                operand: Box::new(loop_info.test.clone()),
-                                range: TextRange::default(),
-                            };
-                            node.into()
-                        }
+            let mut diagnostic = Diagnostic::new(
+                ReimplementedBuiltin {
+                    replacement: contents.to_string(),
+                },
+                TextRange::new(stmt.start(), terminal.stmt.end()),
+            );
+            if checker.patch(diagnostic.kind.rule()) && checker.semantic().is_builtin("any") {
+                diagnostic.set_fix(Fix::suggested(Edit::replacement(
+                    contents,
+                    stmt.start(),
+                    terminal.stmt.end(),
+                )));
+            }
+            checker.diagnostics.push(diagnostic);
+        }
+        // Replace with `all`.
+        (false, true) => {
+            // Invert the condition.
+            let test = {
+                if let Expr::UnaryOp(ast::ExprUnaryOp {
+                    op: UnaryOp::Not,
+                    operand,
+                    range: _,
+                }) = &loop_.test
+                {
+                    *operand.clone()
+                } else if let Expr::Compare(ast::ExprCompare {
+                    left,
+                    ops,
+                    comparators,
+                    range: _,
+                }) = &loop_.test
+                {
+                    if let ([op], [comparator]) = (ops.as_slice(), comparators.as_slice()) {
+                        let op = match op {
+                            CmpOp::Eq => CmpOp::NotEq,
+                            CmpOp::NotEq => CmpOp::Eq,
+                            CmpOp::Lt => CmpOp::GtE,
+                            CmpOp::LtE => CmpOp::Gt,
+                            CmpOp::Gt => CmpOp::LtE,
+                            CmpOp::GtE => CmpOp::Lt,
+                            CmpOp::Is => CmpOp::IsNot,
+                            CmpOp::IsNot => CmpOp::Is,
+                            CmpOp::In => CmpOp::NotIn,
+                            CmpOp::NotIn => CmpOp::In,
+                        };
+                        let node = ast::ExprCompare {
+                            left: left.clone(),
+                            ops: vec![op],
+                            comparators: vec![comparator.clone()],
+                            range: TextRange::default(),
+                        };
+                        node.into()
                     } else {
                         let node = ast::ExprUnaryOp {
                             op: UnaryOp::Not,
-                            operand: Box::new(loop_info.test.clone()),
+                            operand: Box::new(loop_.test.clone()),
                             range: TextRange::default(),
                         };
                         node.into()
                     }
-                };
-                let contents = return_stmt(
-                    "all",
-                    &test,
-                    loop_info.target,
-                    loop_info.iter,
-                    checker.generator(),
-                );
-
-                // Don't flag if the resulting expression would exceed the maximum line length.
-                let line_start = checker.locator.line_start(stmt.start());
-                if LineWidth::new(checker.settings.tab_size)
-                    .add_str(&checker.locator.contents()[TextRange::new(line_start, stmt.start())])
-                    .add_str(&contents)
-                    > checker.settings.line_length
-                {
-                    return;
+                } else {
+                    let node = ast::ExprUnaryOp {
+                        op: UnaryOp::Not,
+                        operand: Box::new(loop_.test.clone()),
+                        range: TextRange::default(),
+                    };
+                    node.into()
                 }
+            };
+            let contents = return_stmt("all", &test, loop_.target, loop_.iter, checker.generator());
 
-                let mut diagnostic = Diagnostic::new(
-                    ReimplementedBuiltin {
-                        repl: contents.clone(),
-                    },
-                    TextRange::new(stmt.start(), loop_info.terminal),
-                );
-                if checker.patch(diagnostic.kind.rule()) && checker.semantic().is_builtin("all") {
-                    diagnostic.set_fix(Fix::suggested(Edit::replacement(
-                        contents,
-                        stmt.start(),
-                        loop_info.terminal,
-                    )));
-                }
-                checker.diagnostics.push(diagnostic);
+            // Don't flag if the resulting expression would exceed the maximum line length.
+            let line_start = checker.locator.line_start(stmt.start());
+            if LineWidth::new(checker.settings.tab_size)
+                .add_str(&checker.locator.contents()[TextRange::new(line_start, stmt.start())])
+                .add_str(&contents)
+                > checker.settings.line_length
+            {
+                return;
+            }
+
+            let mut diagnostic = Diagnostic::new(
+                ReimplementedBuiltin {
+                    replacement: contents.to_string(),
+                },
+                TextRange::new(stmt.start(), terminal.stmt.end()),
+            );
+            if checker.patch(diagnostic.kind.rule()) && checker.semantic().is_builtin("all") {
+                diagnostic.set_fix(Fix::suggested(Edit::replacement(
+                    contents,
+                    stmt.start(),
+                    terminal.stmt.end(),
+                )));
             }
+            checker.diagnostics.push(diagnostic);
         }
+        _ => {}
     }
 }
 
+/// Represents a `for` loop with a conditional `return`, like:
+/// ```python
+/// for x in y:
+///     if x == 0:
+///         return True
+/// ```
+#[derive(Debug)]
 struct Loop<'a> {
+    /// The `return` value of the loop.
     return_value: bool,
-    next_return_value: bool,
+    /// The test condition in the loop.
     test: &'a Expr,
+    /// The target of the loop.
     target: &'a Expr,
+    /// The iterator of the loop.
     iter: &'a Expr,
-    terminal: TextSize,
 }
 
-/// Extract the returned boolean values a `Stmt::For` with an `else` body.
-fn return_values_for_else(stmt: &Stmt) -> Option<Loop> {
+/// Represents a `return` statement following a `for` loop, like:
+/// ```python
+/// for x in y:
+///     if x == 0:
+///         return True
+/// return False
+/// ```
+///
+/// Or:
+/// ```python
+/// for x in y:
+///     if x == 0:
+///         return True
+/// else:
+///     return False
+/// ```
+#[derive(Debug)]
+struct Terminal<'a> {
+    return_value: bool,
+    stmt: &'a Stmt,
+}
+
+fn match_loop(stmt: &Stmt) -> Option<Loop> {
     let Stmt::For(ast::StmtFor {
-        body,
-        target,
-        iter,
-        orelse,
-        ..
+        body, target, iter, ..
     }) = stmt
     else {
         return None;
     };
 
-    // The loop itself should contain a single `if` statement, with an `else`
-    // containing a single `return True` or `return False`.
-    if body.len() != 1 {
-        return None;
-    }
-    if orelse.len() != 1 {
-        return None;
-    }
-    let Stmt::If(ast::StmtIf {
+    // The loop itself should contain a single `if` statement, with a single `return` statement in
+    // the body.
+    let [Stmt::If(ast::StmtIf {
         body: nested_body,
         test: nested_test,
         elif_else_clauses: nested_elif_else_clauses,
         range: _,
-    }) = &body[0]
+    })] = body.as_slice()
     else {
         return None;
     };
@@ -263,17 +286,37 @@ fn return_values_for_else(stmt: &Stmt) -> Option<Loop> {
         return None;
     };
 
+    Some(Loop {
+        return_value: *value,
+        test: nested_test,
+        target,
+        iter,
+    })
+}
+
+/// If a `Stmt::For` contains an `else` with a single boolean `return`, return the [`Terminal`]
+/// representing that `return`.
+///
+/// For example, matches the `return` in:
+/// ```python
+/// for x in y:
+///     if x == 0:
+///         return True
+/// return False
+/// ```
+fn match_else_return(stmt: &Stmt) -> Option<Terminal> {
+    let Stmt::For(ast::StmtFor { orelse, .. }) = stmt else {
+        return None;
+    };
+
     // The `else` block has to contain a single `return True` or `return False`.
-    let Stmt::Return(ast::StmtReturn {
-        value: next_value,
+    let [Stmt::Return(ast::StmtReturn {
+        value: Some(next_value),
         range: _,
-    }) = &orelse[0]
+    })] = orelse.as_slice()
     else {
         return None;
     };
-    let Some(next_value) = next_value else {
-        return None;
-    };
     let Expr::Constant(ast::ExprConstant {
         value: Constant::Bool(next_value),
         ..
@@ -282,78 +325,41 @@ fn return_values_for_else(stmt: &Stmt) -> Option<Loop> {
         return None;
     };
 
-    Some(Loop {
-        return_value: *value,
-        next_return_value: *next_value,
-        test: nested_test,
-        target,
-        iter,
-        terminal: stmt.end(),
+    Some(Terminal {
+        return_value: *next_value,
+        stmt,
     })
 }
 
-/// Extract the returned boolean values from subsequent `Stmt::For` and
-/// `Stmt::Return` statements, or `None`.
-fn return_values_for_siblings<'a>(stmt: &'a Stmt, sibling: &'a Stmt) -> Option<Loop<'a>> {
-    let Stmt::For(ast::StmtFor {
-        body,
-        target,
-        iter,
-        orelse,
-        ..
-    }) = stmt
-    else {
+/// If a `Stmt::For` is followed by a boolean `return`, return the [`Terminal`] representing that
+/// `return`.
+///
+/// For example, matches the `return` in:
+/// ```python
+/// for x in y:
+///     if x == 0:
+///         return True
+/// else:
+///     return False
+/// ```
+fn match_sibling_return<'a>(stmt: &'a Stmt, sibling: &'a Stmt) -> Option<Terminal<'a>> {
+    let Stmt::For(ast::StmtFor { orelse, .. }) = stmt else {
         return None;
     };
 
-    // The loop itself should contain a single `if` statement, with a single `return
-    // True` or `return False`.
-    if body.len() != 1 {
-        return None;
-    }
+    // The loop itself shouldn't have an `else` block.
     if !orelse.is_empty() {
         return None;
     }
-    let Stmt::If(ast::StmtIf {
-        body: nested_body,
-        test: nested_test,
-        elif_else_clauses: nested_elif_else_clauses,
-        range: _,
-    }) = &body[0]
-    else {
-        return None;
-    };
-    if nested_body.len() != 1 {
-        return None;
-    }
-    if !nested_elif_else_clauses.is_empty() {
-        return None;
-    }
-    let Stmt::Return(ast::StmtReturn { value, range: _ }) = &nested_body[0] else {
-        return None;
-    };
-    let Some(value) = value else {
-        return None;
-    };
-    let Expr::Constant(ast::ExprConstant {
-        value: Constant::Bool(value),
-        ..
-    }) = value.as_ref()
-    else {
-        return None;
-    };
 
     // The next statement has to be a `return True` or `return False`.
     let Stmt::Return(ast::StmtReturn {
-        value: next_value,
+        value: Some(next_value),
         range: _,
     }) = &sibling
     else {
         return None;
     };
-    let Some(next_value) = next_value else {
-        return None;
-    };
     let Expr::Constant(ast::ExprConstant {
         value: Constant::Bool(next_value),
         ..
@@ -362,13 +368,9 @@ fn return_values_for_siblings<'a>(stmt: &'a Stmt, sibling: &'a Stmt) -> Option<L
         return None;
     };
 
-    Some(Loop {
-        return_value: *value,
-        next_return_value: *next_value,
-        test: nested_test,
-        target,
-        iter,
-        terminal: sibling.end(),
+    Some(Terminal {
+        return_value: *next_value,
+        stmt: sibling,
     })
 }
 
diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs
index 424e1be6ffd455..71c17096e438e5 100644
--- a/crates/ruff_python_ast/src/lib.rs
+++ b/crates/ruff_python_ast/src/lib.rs
@@ -15,6 +15,7 @@ pub mod statement_visitor;
 pub mod stmt_if;
 pub mod str;
 pub mod token_kind;
+pub mod traversal;
 pub mod types;
 pub mod typing;
 pub mod visitor;
diff --git a/crates/ruff_python_ast/src/traversal.rs b/crates/ruff_python_ast/src/traversal.rs
new file mode 100644
index 00000000000000..4e0f1108a578ee
--- /dev/null
+++ b/crates/ruff_python_ast/src/traversal.rs
@@ -0,0 +1,113 @@
+//! Utilities for manually traversing a Python AST.
+use rustpython_ast::{ExceptHandler, Stmt, Suite};
+use rustpython_parser::ast;
+
+/// Given a [`Stmt`] and its parent, return the [`Suite`] that contains the [`Stmt`].
+pub fn suite<'a>(stmt: &'a Stmt, parent: &'a Stmt) -> Option<&'a Suite> {
+    match parent {
+        Stmt::FunctionDef(ast::StmtFunctionDef { body, .. }) => Some(body),
+        Stmt::AsyncFunctionDef(ast::StmtAsyncFunctionDef { body, .. }) => Some(body),
+        Stmt::ClassDef(ast::StmtClassDef { body, .. }) => Some(body),
+        Stmt::For(ast::StmtFor { body, orelse, .. }) => {
+            if body.contains(stmt) {
+                Some(body)
+            } else if orelse.contains(stmt) {
+                Some(orelse)
+            } else {
+                None
+            }
+        }
+        Stmt::AsyncFor(ast::StmtAsyncFor { body, orelse, .. }) => {
+            if body.contains(stmt) {
+                Some(body)
+            } else if orelse.contains(stmt) {
+                Some(orelse)
+            } else {
+                None
+            }
+        }
+        Stmt::While(ast::StmtWhile { body, orelse, .. }) => {
+            if body.contains(stmt) {
+                Some(body)
+            } else if orelse.contains(stmt) {
+                Some(orelse)
+            } else {
+                None
+            }
+        }
+        Stmt::If(ast::StmtIf {
+            body,
+            elif_else_clauses,
+            ..
+        }) => {
+            if body.contains(stmt) {
+                Some(body)
+            } else {
+                elif_else_clauses
+                    .iter()
+                    .map(|elif_else_clause| &elif_else_clause.body)
+                    .find(|body| body.contains(stmt))
+            }
+        }
+        Stmt::With(ast::StmtWith { body, .. }) => Some(body),
+        Stmt::AsyncWith(ast::StmtAsyncWith { body, .. }) => Some(body),
+        Stmt::Match(ast::StmtMatch { cases, .. }) => cases
+            .iter()
+            .map(|case| &case.body)
+            .find(|body| body.contains(stmt)),
+        Stmt::Try(ast::StmtTry {
+            body,
+            handlers,
+            orelse,
+            finalbody,
+            ..
+        }) => {
+            if body.contains(stmt) {
+                Some(body)
+            } else if orelse.contains(stmt) {
+                Some(orelse)
+            } else if finalbody.contains(stmt) {
+                Some(finalbody)
+            } else {
+                handlers
+                    .iter()
+                    .filter_map(ExceptHandler::as_except_handler)
+                    .map(|handler| &handler.body)
+                    .find(|body| body.contains(stmt))
+            }
+        }
+        Stmt::TryStar(ast::StmtTryStar {
+            body,
+            handlers,
+            orelse,
+            finalbody,
+            ..
+        }) => {
+            if body.contains(stmt) {
+                Some(body)
+            } else if orelse.contains(stmt) {
+                Some(orelse)
+            } else if finalbody.contains(stmt) {
+                Some(finalbody)
+            } else {
+                handlers
+                    .iter()
+                    .filter_map(ExceptHandler::as_except_handler)
+                    .map(|handler| &handler.body)
+                    .find(|body| body.contains(stmt))
+            }
+        }
+        _ => None,
+    }
+}
+
+/// Given a [`Stmt`] and its containing [`Suite`], return the next [`Stmt`] in the [`Suite`].
+pub fn next_sibling<'a>(stmt: &'a Stmt, suite: &'a Suite) -> Option<&'a Stmt> {
+    let mut iter = suite.iter();
+    while let Some(sibling) = iter.next() {
+        if sibling == stmt {
+            return iter.next();
+        }
+    }
+    None
+}
diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs
index 53138dbf5d5c9f..0106dfcfc0d89a 100644
--- a/crates/ruff_python_semantic/src/model.rs
+++ b/crates/ruff_python_semantic/src/model.rs
@@ -108,10 +108,6 @@ pub struct SemanticModel<'a> {
     /// by way of the `global x` statement.
     rebinding_scopes: HashMap<BindingId, Vec<ScopeId>, BuildNoHashHasher<BindingId>>,
 
-    /// Body iteration; used to peek at siblings.
-    pub body: &'a [Stmt],
-    pub body_index: usize,
-
     /// Flags for the semantic model.
     pub flags: SemanticModelFlags,
 
@@ -137,8 +133,6 @@ impl<'a> SemanticModel<'a> {
             shadowed_bindings: IntMap::default(),
             delayed_annotations: IntMap::default(),
             rebinding_scopes: IntMap::default(),
-            body: &[],
-            body_index: 0,
             flags: SemanticModelFlags::new(path),
             handled_exceptions: Vec::default(),
         }
@@ -757,11 +751,6 @@ impl<'a> SemanticModel<'a> {
         self.exprs.iter().rev().skip(1)
     }
 
-    /// Return the `Stmt` that immediately follows the current `Stmt`, if any.
-    pub fn sibling_stmt(&self) -> Option<&'a Stmt> {
-        self.body.get(self.body_index + 1)
-    }
-
     /// Returns a reference to the global scope
     pub fn global_scope(&self) -> &Scope<'a> {
         self.scopes.global()

From 7ffcd93afd95ad8962da9eb84eacea83767f4f8b Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Tue, 18 Jul 2023 20:43:12 -0400
Subject: [PATCH 6/9] Move unused deletion tracking to deferred analysis
 (#5852)

## Summary

This PR moves the "unused exception" rule out of the visitor and into a
deferred check. When we can base rules solely on the semantic model, we
probably should, as it greatly simplifies the `Checker` itself.
---
 crates/ruff/src/checkers/ast/mod.rs        | 65 +++++++++++++---------
 crates/ruff/src/renamer.rs                 |  1 +
 crates/ruff/src/rules/pyflakes/fixes.rs    |  9 +--
 crates/ruff/src/rules/pyflakes/mod.rs      |  2 +-
 crates/ruff_python_semantic/src/binding.rs | 18 +++++-
 5 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs
index d979952140c270..95ada177cc393d 100644
--- a/crates/ruff/src/checkers/ast/mod.rs
+++ b/crates/ruff/src/checkers/ast/mod.rs
@@ -4057,7 +4057,7 @@ where
         }
 
         // Step 2: Binding
-        let bindings = match except_handler {
+        let binding = match except_handler {
             ExceptHandler::ExceptHandler(ast::ExceptHandlerExceptHandler {
                 type_: _,
                 name,
@@ -4066,17 +4066,17 @@ where
             }) => {
                 if let Some(name) = name {
                     // Store the existing binding, if any.
-                    let existing_id = self.semantic.lookup_symbol(name.as_str());
+                    let binding_id = self.semantic.lookup_symbol(name.as_str());
 
                     // Add the bound exception name to the scope.
-                    let binding_id = self.add_binding(
+                    self.add_binding(
                         name.as_str(),
                         name.range(),
-                        BindingKind::Assignment,
+                        BindingKind::BoundException,
                         BindingFlags::empty(),
                     );
 
-                    Some((name, existing_id, binding_id))
+                    Some((name, binding_id))
                 } else {
                     None
                 }
@@ -4087,30 +4087,11 @@ where
         walk_except_handler(self, except_handler);
 
         // Step 4: Clean-up
-        if let Some((name, existing_id, binding_id)) = bindings {
-            // If the exception name wasn't used in the scope, emit a diagnostic.
-            if !self.semantic.is_used(binding_id) {
-                if self.enabled(Rule::UnusedVariable) {
-                    let mut diagnostic = Diagnostic::new(
-                        pyflakes::rules::UnusedVariable {
-                            name: name.to_string(),
-                        },
-                        name.range(),
-                    );
-                    if self.patch(Rule::UnusedVariable) {
-                        diagnostic.try_set_fix(|| {
-                            pyflakes::fixes::remove_exception_handler_assignment(name, self.locator)
-                                .map(Fix::automatic)
-                        });
-                    }
-                    self.diagnostics.push(diagnostic);
-                }
-            }
-
+        if let Some((name, binding_id)) = binding {
             self.add_binding(
                 name.as_str(),
                 name.range(),
-                BindingKind::UnboundException(existing_id),
+                BindingKind::UnboundException(binding_id),
                 BindingFlags::empty(),
             );
         }
@@ -4797,6 +4778,37 @@ impl<'a> Checker<'a> {
         }
     }
 
+    /// Run any lint rules that operate over a single [`Binding`].
+    fn check_bindings(&mut self) {
+        if !self.any_enabled(&[Rule::UnusedVariable]) {
+            return;
+        }
+
+        for binding in self.semantic.bindings.iter() {
+            // F841
+            if self.enabled(Rule::UnusedVariable) {
+                if binding.kind.is_bound_exception() && !binding.is_used() {
+                    let mut diagnostic = Diagnostic::new(
+                        pyflakes::rules::UnusedVariable {
+                            name: binding.name(self.locator).to_string(),
+                        },
+                        binding.range,
+                    );
+                    if self.patch(Rule::UnusedVariable) {
+                        diagnostic.try_set_fix(|| {
+                            pyflakes::fixes::remove_exception_handler_assignment(
+                                binding,
+                                self.locator,
+                            )
+                            .map(Fix::automatic)
+                        });
+                    }
+                    self.diagnostics.push(diagnostic);
+                }
+            }
+        }
+    }
+
     fn check_deferred_scopes(&mut self) {
         if !self.any_enabled(&[
             Rule::GlobalVariableNotAssigned,
@@ -5475,6 +5487,7 @@ pub(crate) fn check_ast(
     checker.semantic.scope_id = ScopeId::global();
     checker.deferred.scopes.push(ScopeId::global());
     checker.check_deferred_scopes();
+    checker.check_bindings();
 
     checker.diagnostics
 }
diff --git a/crates/ruff/src/renamer.rs b/crates/ruff/src/renamer.rs
index 14aec66ef7ad9c..9274018a798452 100644
--- a/crates/ruff/src/renamer.rs
+++ b/crates/ruff/src/renamer.rs
@@ -245,6 +245,7 @@ impl Renamer {
             | BindingKind::NamedExprAssignment
             | BindingKind::UnpackedAssignment
             | BindingKind::Assignment
+            | BindingKind::BoundException
             | BindingKind::LoopVar
             | BindingKind::Global
             | BindingKind::Nonlocal(_)
diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs
index c6cdee0f5a3219..bad191b555e0b4 100644
--- a/crates/ruff/src/rules/pyflakes/fixes.rs
+++ b/crates/ruff/src/rules/pyflakes/fixes.rs
@@ -1,9 +1,10 @@
 use anyhow::{Context, Ok, Result};
 use ruff_text_size::TextRange;
-use rustpython_parser::ast::{Expr, Identifier, Ranged};
+use rustpython_parser::ast::{Expr, Ranged};
 
 use ruff_diagnostics::Edit;
 use ruff_python_ast::source_code::{Locator, Stylist};
+use ruff_python_semantic::Binding;
 use ruff_python_whitespace::{SimpleTokenizer, TokenKind};
 
 use crate::autofix::codemods::CodegenStylist;
@@ -90,12 +91,12 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call(
 
 /// Generate a [`Edit`] to remove the binding from an exception handler.
 pub(crate) fn remove_exception_handler_assignment(
-    bound_exception: &Identifier,
+    bound_exception: &Binding,
     locator: &Locator,
 ) -> Result<Edit> {
     // Lex backwards, to the token just before the `as`.
     let mut tokenizer =
-        SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia();
+        SimpleTokenizer::up_to(bound_exception.range.start(), locator.contents()).skip_trivia();
 
     // Eat the `as` token.
     let preceding = tokenizer
@@ -109,7 +110,7 @@ pub(crate) fn remove_exception_handler_assignment(
         .context("expected the exception name to be preceded by a token")?;
 
     // Lex forwards, to the `:` token.
-    let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents())
+    let following = SimpleTokenizer::starts_at(bound_exception.range.end(), locator.contents())
         .next()
         .context("expected the exception name to be followed by a colon")?;
     debug_assert!(matches!(following.kind, TokenKind::Colon));
diff --git a/crates/ruff/src/rules/pyflakes/mod.rs b/crates/ruff/src/rules/pyflakes/mod.rs
index 1f13edf9734575..885b4baf6b1df0 100644
--- a/crates/ruff/src/rules/pyflakes/mod.rs
+++ b/crates/ruff/src/rules/pyflakes/mod.rs
@@ -2115,7 +2115,7 @@ mod tests {
         try: pass
         except Exception as fu: pass
         "#,
-            &[Rule::UnusedVariable, Rule::RedefinedWhileUnused],
+            &[Rule::RedefinedWhileUnused, Rule::UnusedVariable],
         );
     }
 
diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs
index 862be2b617021b..f66da2a80a8012 100644
--- a/crates/ruff_python_semantic/src/binding.rs
+++ b/crates/ruff_python_semantic/src/binding.rs
@@ -5,6 +5,7 @@ use ruff_text_size::TextRange;
 use rustpython_parser::ast::Ranged;
 
 use ruff_index::{newtype_index, IndexSlice, IndexVec};
+use ruff_python_ast::source_code::Locator;
 
 use crate::context::ExecutionContext;
 use crate::model::SemanticModel;
@@ -163,6 +164,11 @@ impl<'a> Binding<'a> {
         }
     }
 
+    /// Returns the name of the binding (e.g., `x` in `x = 1`).
+    pub fn name<'b>(&self, locator: &'b Locator) -> &'b str {
+        locator.slice(self.range)
+    }
+
     /// Returns the range of the binding's parent.
     pub fn parent_range(&self, semantic: &SemanticModel) -> Option<TextRange> {
         self.source
@@ -417,7 +423,16 @@ pub enum BindingKind<'a> {
     /// ```
     Deletion,
 
-    /// A binding to unbind the local variable, like `x` in:
+    /// A binding to bind an exception to a local variable, like `x` in:
+    /// ```python
+    /// try:
+    ///    ...
+    /// except Exception as x:
+    ///   ...
+    /// ```
+    BoundException,
+
+    /// A binding to unbind a bound local exception, like `x` in:
     /// ```python
     /// try:
     ///    ...
@@ -428,7 +443,6 @@ pub enum BindingKind<'a> {
     /// After the `except` block, `x` is unbound, despite the lack
     /// of an explicit `del` statement.
     ///
-    ///
     /// Stores the ID of the binding that was shadowed in the enclosing
     /// scope, if any.
     UnboundException(Option<BindingId>),

From 626d8dc2cc6ae46d465cb8a236330700032b856d Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Tue, 18 Jul 2023 20:49:13 -0400
Subject: [PATCH 7/9] Use `.as_ref()` in lieu of `&**` (#5874)

I find this less opaque (and often more succinct).
---
 crates/ruff/src/checkers/ast/mod.rs                  | 10 +++-------
 crates/ruff/src/rules/flake8_annotations/helpers.rs  |  2 +-
 .../src/rules/pylint/rules/redefined_loop_name.rs    |  2 +-
 crates/ruff_python_ast/src/all.rs                    |  6 +++---
 crates/ruff_python_ast/src/comparable.rs             | 12 ++++++------
 crates/ruff_python_ast/src/source_code/generator.rs  |  2 +-
 .../src/analyze/type_inference.rs                    |  4 ++--
 7 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs
index 95ada177cc393d..c8077d7ab98615 100644
--- a/crates/ruff/src/checkers/ast/mod.rs
+++ b/crates/ruff/src/checkers/ast/mod.rs
@@ -411,7 +411,7 @@ where
                             stmt,
                             name,
                             decorator_list,
-                            returns.as_ref().map(|expr| &**expr),
+                            returns.as_ref().map(AsRef::as_ref),
                             args,
                             stmt.is_async_function_def_stmt(),
                         );
@@ -470,18 +470,14 @@ where
                     Rule::SuperfluousElseContinue,
                     Rule::SuperfluousElseBreak,
                 ]) {
-                    flake8_return::rules::function(
-                        self,
-                        body,
-                        returns.as_ref().map(|expr| &**expr),
-                    );
+                    flake8_return::rules::function(self, body, returns.as_ref().map(AsRef::as_ref));
                 }
                 if self.enabled(Rule::UselessReturn) {
                     pylint::rules::useless_return(
                         self,
                         stmt,
                         body,
-                        returns.as_ref().map(|expr| &**expr),
+                        returns.as_ref().map(AsRef::as_ref),
                     );
                 }
                 if self.enabled(Rule::ComplexStructure) {
diff --git a/crates/ruff/src/rules/flake8_annotations/helpers.rs b/crates/ruff/src/rules/flake8_annotations/helpers.rs
index c3db27fa2fec27..cf1139507269f7 100644
--- a/crates/ruff/src/rules/flake8_annotations/helpers.rs
+++ b/crates/ruff/src/rules/flake8_annotations/helpers.rs
@@ -26,7 +26,7 @@ pub(super) fn match_function_def(
         }) => (
             name,
             args,
-            returns.as_ref().map(|expr| &**expr),
+            returns.as_ref().map(AsRef::as_ref),
             body,
             decorator_list,
         ),
diff --git a/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs b/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs
index c0536daee125e0..ec9fd9e28d2997 100644
--- a/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs
+++ b/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs
@@ -274,7 +274,7 @@ fn assignment_targets_from_expr<'a>(
             ctx: ExprContext::Store,
             value,
             range: _,
-        }) => Box::new(iter::once(&**value)),
+        }) => Box::new(iter::once(value.as_ref())),
         Expr::Name(ast::ExprName {
             ctx: ExprContext::Store,
             id,
diff --git a/crates/ruff_python_ast/src/all.rs b/crates/ruff_python_ast/src/all.rs
index 09244135be5acc..903a56d7684992 100644
--- a/crates/ruff_python_ast/src/all.rs
+++ b/crates/ruff_python_ast/src/all.rs
@@ -65,7 +65,7 @@ where
             }) => {
                 // Allow `tuple()` and `list()` calls.
                 if keywords.is_empty() && args.len() <= 1 {
-                    if let Expr::Name(ast::ExprName { id, .. }) = &**func {
+                    if let Expr::Name(ast::ExprName { id, .. }) = func.as_ref() {
                         let id = id.as_str();
                         if id == "tuple" || id == "list" {
                             if is_builtin(id) {
@@ -106,7 +106,7 @@ where
         Stmt::AugAssign(ast::StmtAugAssign { value, .. }) => Some(value),
         _ => None,
     } {
-        if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = &**value {
+        if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = value.as_ref() {
             let mut current_left = left;
             let mut current_right = right;
             loop {
@@ -119,7 +119,7 @@ where
 
                 // Process the left side, which can be a "real" value or the "rest" of the
                 // binary operation.
-                if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = &**current_left {
+                if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = current_left.as_ref() {
                     current_left = left;
                     current_right = right;
                 } else {
diff --git a/crates/ruff_python_ast/src/comparable.rs b/crates/ruff_python_ast/src/comparable.rs
index af78faf2a4d59e..ceb2a1c48bf4ee 100644
--- a/crates/ruff_python_ast/src/comparable.rs
+++ b/crates/ruff_python_ast/src/comparable.rs
@@ -274,7 +274,7 @@ impl<'a> From<&'a ast::Pattern> for ComparablePattern<'a> {
 
 impl<'a> From<&'a Box<ast::Pattern>> for Box<ComparablePattern<'a>> {
     fn from(pattern: &'a Box<ast::Pattern>) -> Self {
-        Box::new((&**pattern).into())
+        Box::new((pattern.as_ref()).into())
     }
 }
 
@@ -362,13 +362,13 @@ impl<'a> From<&'a ast::Arguments> for ComparableArguments<'a> {
 
 impl<'a> From<&'a Box<ast::Arguments>> for ComparableArguments<'a> {
     fn from(arguments: &'a Box<ast::Arguments>) -> Self {
-        (&**arguments).into()
+        (arguments.as_ref()).into()
     }
 }
 
 impl<'a> From<&'a Box<ast::Arg>> for ComparableArg<'a> {
     fn from(arg: &'a Box<ast::Arg>) -> Self {
-        (&**arg).into()
+        (arg.as_ref()).into()
     }
 }
 
@@ -685,13 +685,13 @@ pub enum ComparableExpr<'a> {
 
 impl<'a> From<&'a Box<ast::Expr>> for Box<ComparableExpr<'a>> {
     fn from(expr: &'a Box<ast::Expr>) -> Self {
-        Box::new((&**expr).into())
+        Box::new((expr.as_ref()).into())
     }
 }
 
 impl<'a> From<&'a Box<ast::Expr>> for ComparableExpr<'a> {
     fn from(expr: &'a Box<ast::Expr>) -> Self {
-        (&**expr).into()
+        (expr.as_ref()).into()
     }
 }
 
@@ -737,7 +737,7 @@ impl<'a> From<&'a ast::Expr> for ComparableExpr<'a> {
                 body,
                 range: _range,
             }) => Self::Lambda(ExprLambda {
-                args: (&**args).into(),
+                args: (args.as_ref()).into(),
                 body: body.into(),
             }),
             ast::Expr::IfExp(ast::ExprIfExp {
diff --git a/crates/ruff_python_ast/src/source_code/generator.rs b/crates/ruff_python_ast/src/source_code/generator.rs
index 5efc6fe0c07a14..972238903ff07a 100644
--- a/crates/ruff_python_ast/src/source_code/generator.rs
+++ b/crates/ruff_python_ast/src/source_code/generator.rs
@@ -1156,7 +1156,7 @@ impl<'a> Generator<'a> {
                         range: _range,
                     })],
                     [],
-                ) = (&**args, &**keywords)
+                ) = (args.as_slice(), keywords.as_slice())
                 {
                     // Ensure that a single generator doesn't get double-parenthesized.
                     self.unparse_expr(elt, precedence::COMMA);
diff --git a/crates/ruff_python_semantic/src/analyze/type_inference.rs b/crates/ruff_python_semantic/src/analyze/type_inference.rs
index 6c7b0159e7bc3a..ff8e0fd60dc739 100644
--- a/crates/ruff_python_semantic/src/analyze/type_inference.rs
+++ b/crates/ruff_python_semantic/src/analyze/type_inference.rs
@@ -44,8 +44,8 @@ pub enum PythonType {
 impl From<&Expr> for PythonType {
     fn from(expr: &Expr) -> Self {
         match expr {
-            Expr::NamedExpr(ast::ExprNamedExpr { value, .. }) => (&**value).into(),
-            Expr::UnaryOp(ast::ExprUnaryOp { operand, .. }) => (&**operand).into(),
+            Expr::NamedExpr(ast::ExprNamedExpr { value, .. }) => (value.as_ref()).into(),
+            Expr::UnaryOp(ast::ExprUnaryOp { operand, .. }) => (operand.as_ref()).into(),
             Expr::Dict(_) => PythonType::Dict,
             Expr::DictComp(_) => PythonType::Dict,
             Expr::Set(_) => PythonType::Set,

From 1181d25e5a5192ef2374b272f4fb8e24aa5e568f Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Tue, 18 Jul 2023 20:59:02 -0400
Subject: [PATCH 8/9] Move a few more candidate rules to the deferred
 `Binding`-only pass (#5853)

## Summary

No behavior change, but this is in theory more efficient, since we can
just iterate over the flat `Binding` vector rather than having to
iterate over binding chains via the `Scope`.
---
 crates/ruff/src/checkers/ast/mod.rs           | 46 +++++++-------
 .../rules/unconventional_import_alias.rs      | 60 +++++++++----------
 .../unaliased_collections_abc_set_import.rs   | 48 +++++++--------
 crates/ruff_python_semantic/src/binding.rs    |  2 +
 crates/ruff_python_semantic/src/model.rs      |  3 +
 5 files changed, 82 insertions(+), 77 deletions(-)

diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs
index c8077d7ab98615..a628befeb7b561 100644
--- a/crates/ruff/src/checkers/ast/mod.rs
+++ b/crates/ruff/src/checkers/ast/mod.rs
@@ -4776,7 +4776,11 @@ impl<'a> Checker<'a> {
 
     /// Run any lint rules that operate over a single [`Binding`].
     fn check_bindings(&mut self) {
-        if !self.any_enabled(&[Rule::UnusedVariable]) {
+        if !self.any_enabled(&[
+            Rule::UnusedVariable,
+            Rule::UnconventionalImportAlias,
+            Rule::UnaliasedCollectionsAbcSetImport,
+        ]) {
             return;
         }
 
@@ -4802,6 +4806,27 @@ impl<'a> Checker<'a> {
                     self.diagnostics.push(diagnostic);
                 }
             }
+
+            if self.enabled(Rule::UnconventionalImportAlias) {
+                if let Some(diagnostic) =
+                    flake8_import_conventions::rules::unconventional_import_alias(
+                        self,
+                        binding,
+                        &self.settings.flake8_import_conventions.aliases,
+                    )
+                {
+                    self.diagnostics.push(diagnostic);
+                }
+            }
+            if self.is_stub {
+                if self.enabled(Rule::UnaliasedCollectionsAbcSetImport) {
+                    if let Some(diagnostic) =
+                        flake8_pyi::rules::unaliased_collections_abc_set_import(self, binding)
+                    {
+                        self.diagnostics.push(diagnostic);
+                    }
+                }
+            }
         }
     }
 
@@ -4814,8 +4839,6 @@ impl<'a> Checker<'a> {
             Rule::TypingOnlyFirstPartyImport,
             Rule::TypingOnlyStandardLibraryImport,
             Rule::TypingOnlyThirdPartyImport,
-            Rule::UnaliasedCollectionsAbcSetImport,
-            Rule::UnconventionalImportAlias,
             Rule::UndefinedExport,
             Rule::UndefinedLocalWithImportStarUsage,
             Rule::UndefinedLocalWithImportStarUsage,
@@ -5115,23 +5138,6 @@ impl<'a> Checker<'a> {
             if self.enabled(Rule::UnusedImport) {
                 pyflakes::rules::unused_import(self, scope, &mut diagnostics);
             }
-            if self.enabled(Rule::UnconventionalImportAlias) {
-                flake8_import_conventions::rules::unconventional_import_alias(
-                    self,
-                    scope,
-                    &mut diagnostics,
-                    &self.settings.flake8_import_conventions.aliases,
-                );
-            }
-            if self.is_stub {
-                if self.enabled(Rule::UnaliasedCollectionsAbcSetImport) {
-                    flake8_pyi::rules::unaliased_collections_abc_set_import(
-                        self,
-                        scope,
-                        &mut diagnostics,
-                    );
-                }
-            }
         }
         self.diagnostics.extend(diagnostics);
     }
diff --git a/crates/ruff/src/rules/flake8_import_conventions/rules/unconventional_import_alias.rs b/crates/ruff/src/rules/flake8_import_conventions/rules/unconventional_import_alias.rs
index dc5876dac31795..4ee7dcbb0d6b3e 100644
--- a/crates/ruff/src/rules/flake8_import_conventions/rules/unconventional_import_alias.rs
+++ b/crates/ruff/src/rules/flake8_import_conventions/rules/unconventional_import_alias.rs
@@ -2,7 +2,7 @@ use rustc_hash::FxHashMap;
 
 use ruff_diagnostics::{AutofixKind, Diagnostic, Fix, Violation};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_semantic::Scope;
+use ruff_python_semantic::Binding;
 
 use crate::checkers::ast::Checker;
 use crate::registry::AsRule;
@@ -53,42 +53,38 @@ impl Violation for UnconventionalImportAlias {
 /// ICN001
 pub(crate) fn unconventional_import_alias(
     checker: &Checker,
-    scope: &Scope,
-    diagnostics: &mut Vec<Diagnostic>,
+    binding: &Binding,
     conventions: &FxHashMap<String, String>,
 ) -> Option<Diagnostic> {
-    for (name, binding_id) in scope.all_bindings() {
-        let binding = checker.semantic().binding(binding_id);
+    let Some(qualified_name) = binding.qualified_name() else {
+        return None;
+    };
 
-        let Some(qualified_name) = binding.qualified_name() else {
-            continue;
-        };
+    let Some(expected_alias) = conventions.get(qualified_name) else {
+        return None;
+    };
 
-        let Some(expected_alias) = conventions.get(qualified_name) else {
-            continue;
-        };
-
-        if binding.is_alias() && name == expected_alias {
-            continue;
-        }
+    let name = binding.name(checker.locator);
+    if binding.is_alias() && name == expected_alias {
+        return None;
+    }
 
-        let mut diagnostic = Diagnostic::new(
-            UnconventionalImportAlias {
-                name: qualified_name.to_string(),
-                asname: expected_alias.to_string(),
-            },
-            binding.range,
-        );
-        if checker.patch(diagnostic.kind.rule()) {
-            if checker.semantic().is_available(expected_alias) {
-                diagnostic.try_set_fix(|| {
-                    let (edit, rest) =
-                        Renamer::rename(name, expected_alias, scope, checker.semantic())?;
-                    Ok(Fix::suggested_edits(edit, rest))
-                });
-            }
+    let mut diagnostic = Diagnostic::new(
+        UnconventionalImportAlias {
+            name: qualified_name.to_string(),
+            asname: expected_alias.to_string(),
+        },
+        binding.range,
+    );
+    if checker.patch(diagnostic.kind.rule()) {
+        if checker.semantic().is_available(expected_alias) {
+            diagnostic.try_set_fix(|| {
+                let scope = &checker.semantic().scopes[binding.scope];
+                let (edit, rest) =
+                    Renamer::rename(name, expected_alias, scope, checker.semantic())?;
+                Ok(Fix::suggested_edits(edit, rest))
+            });
         }
-        diagnostics.push(diagnostic);
     }
-    None
+    Some(diagnostic)
 }
diff --git a/crates/ruff/src/rules/flake8_pyi/rules/unaliased_collections_abc_set_import.rs b/crates/ruff/src/rules/flake8_pyi/rules/unaliased_collections_abc_set_import.rs
index 5fb0a3f5f602c9..6ebd42e3a97cd6 100644
--- a/crates/ruff/src/rules/flake8_pyi/rules/unaliased_collections_abc_set_import.rs
+++ b/crates/ruff/src/rules/flake8_pyi/rules/unaliased_collections_abc_set_import.rs
@@ -1,6 +1,6 @@
 use ruff_diagnostics::{AutofixKind, Diagnostic, Fix, Violation};
 use ruff_macros::{derive_message_formats, violation};
-use ruff_python_semantic::{BindingKind, FromImport, Scope};
+use ruff_python_semantic::{Binding, BindingKind, FromImport};
 
 use crate::checkers::ast::Checker;
 use crate::registry::AsRule;
@@ -48,31 +48,29 @@ impl Violation for UnaliasedCollectionsAbcSetImport {
 /// PYI025
 pub(crate) fn unaliased_collections_abc_set_import(
     checker: &Checker,
-    scope: &Scope,
-    diagnostics: &mut Vec<Diagnostic>,
-) {
-    for (name, binding_id) in scope.all_bindings() {
-        let binding = checker.semantic().binding(binding_id);
-        let BindingKind::FromImport(FromImport { qualified_name }) = &binding.kind else {
-            continue;
-        };
-        if qualified_name.as_str() != "collections.abc.Set" {
-            continue;
-        }
-        if name == "AbstractSet" {
-            continue;
-        }
+    binding: &Binding,
+) -> Option<Diagnostic> {
+    let BindingKind::FromImport(FromImport { qualified_name }) = &binding.kind else {
+        return None;
+    };
+    if qualified_name.as_str() != "collections.abc.Set" {
+        return None;
+    }
+
+    let name = binding.name(checker.locator);
+    if name == "AbstractSet" {
+        return None;
+    }
 
-        let mut diagnostic = Diagnostic::new(UnaliasedCollectionsAbcSetImport, binding.range);
-        if checker.patch(diagnostic.kind.rule()) {
-            if checker.semantic().is_available("AbstractSet") {
-                diagnostic.try_set_fix(|| {
-                    let (edit, rest) =
-                        Renamer::rename(name, "AbstractSet", scope, checker.semantic())?;
-                    Ok(Fix::suggested_edits(edit, rest))
-                });
-            }
+    let mut diagnostic = Diagnostic::new(UnaliasedCollectionsAbcSetImport, binding.range);
+    if checker.patch(diagnostic.kind.rule()) {
+        if checker.semantic().is_available("AbstractSet") {
+            diagnostic.try_set_fix(|| {
+                let scope = &checker.semantic().scopes[binding.scope];
+                let (edit, rest) = Renamer::rename(name, "AbstractSet", scope, checker.semantic())?;
+                Ok(Fix::suggested_edits(edit, rest))
+            });
         }
-        diagnostics.push(diagnostic);
     }
+    Some(diagnostic)
 }
diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs
index f66da2a80a8012..45002b87006681 100644
--- a/crates/ruff_python_semantic/src/binding.rs
+++ b/crates/ruff_python_semantic/src/binding.rs
@@ -17,6 +17,8 @@ use crate::ScopeId;
 pub struct Binding<'a> {
     pub kind: BindingKind<'a>,
     pub range: TextRange,
+    /// The [`ScopeId`] of the scope in which the [`Binding`] was defined.
+    pub scope: ScopeId,
     /// The context in which the [`Binding`] was created.
     pub context: ExecutionContext,
     /// The statement in which the [`Binding`] was defined.
diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs
index 0106dfcfc0d89a..d1aba8460dec17 100644
--- a/crates/ruff_python_semantic/src/model.rs
+++ b/crates/ruff_python_semantic/src/model.rs
@@ -189,6 +189,7 @@ impl<'a> SemanticModel<'a> {
         self.bindings.push(Binding {
             range: TextRange::default(),
             kind: BindingKind::Builtin,
+            scope: ScopeId::global(),
             references: Vec::new(),
             flags: BindingFlags::empty(),
             source: None,
@@ -209,6 +210,7 @@ impl<'a> SemanticModel<'a> {
             kind,
             flags,
             references: Vec::new(),
+            scope: self.scope_id,
             source: self.stmt_id,
             context: self.execution_context(),
             exceptions: self.exceptions(),
@@ -795,6 +797,7 @@ impl<'a> SemanticModel<'a> {
                     kind: BindingKind::Assignment,
                     range: *range,
                     references: Vec::new(),
+                    scope: self.scope_id,
                     source: self.stmt_id,
                     context: self.execution_context(),
                     exceptions: self.exceptions(),

From 7e6b472c5b1256a26bc589ca78acfe0b02713024 Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Wed, 19 Jul 2023 09:29:35 +0530
Subject: [PATCH 9/9] Make `lint_only` aware of the source kind (#5876)

---
 crates/ruff/src/linter.rs               |  3 ++-
 crates/ruff_benchmark/benches/linter.rs |  1 +
 crates/ruff_cli/src/diagnostics.rs      | 20 ++++++++++++++++++--
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs
index bc13cc67b209b8..9a42d37a3fe5b0 100644
--- a/crates/ruff/src/linter.rs
+++ b/crates/ruff/src/linter.rs
@@ -321,6 +321,7 @@ pub fn lint_only(
     package: Option<&Path>,
     settings: &Settings,
     noqa: flags::Noqa,
+    source_kind: Option<&SourceKind>,
 ) -> LinterResult<(Vec<Message>, Option<ImportMap>)> {
     // Tokenize once.
     let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
@@ -353,7 +354,7 @@ pub fn lint_only(
         &directives,
         settings,
         noqa,
-        None,
+        source_kind,
     );
 
     result.map(|(diagnostics, imports)| {
diff --git a/crates/ruff_benchmark/benches/linter.rs b/crates/ruff_benchmark/benches/linter.rs
index 98ea5c1bd33271..7abaa4fdafa251 100644
--- a/crates/ruff_benchmark/benches/linter.rs
+++ b/crates/ruff_benchmark/benches/linter.rs
@@ -63,6 +63,7 @@ fn benchmark_linter(mut group: BenchmarkGroup<WallTime>, settings: &Settings) {
                         None,
                         settings,
                         flags::Noqa::Enabled,
+                        None,
                     );
 
                     // Assert that file contains no parse errors
diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs
index f994b7a055e05b..1d058b9b98ff14 100644
--- a/crates/ruff_cli/src/diagnostics.rs
+++ b/crates/ruff_cli/src/diagnostics.rs
@@ -204,12 +204,26 @@ pub(crate) fn lint_path(
             (result, fixed)
         } else {
             // If we fail to autofix, lint the original source code.
-            let result = lint_only(&contents, path, package, &settings.lib, noqa);
+            let result = lint_only(
+                &contents,
+                path,
+                package,
+                &settings.lib,
+                noqa,
+                Some(&source_kind),
+            );
             let fixed = FxHashMap::default();
             (result, fixed)
         }
     } else {
-        let result = lint_only(&contents, path, package, &settings.lib, noqa);
+        let result = lint_only(
+            &contents,
+            path,
+            package,
+            &settings.lib,
+            noqa,
+            Some(&source_kind),
+        );
         let fixed = FxHashMap::default();
         (result, fixed)
     };
@@ -316,6 +330,7 @@ pub(crate) fn lint_stdin(
                 package,
                 settings,
                 noqa,
+                Some(&source_kind),
             );
             let fixed = FxHashMap::default();
 
@@ -333,6 +348,7 @@ pub(crate) fn lint_stdin(
             package,
             settings,
             noqa,
+            Some(&source_kind),
         );
         let fixed = FxHashMap::default();
         (result, fixed)