From 41da52a61bd3174580f13fef8b7b4169e00b5b0e Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Tue, 18 Jul 2023 13:21:51 -0500 Subject: [PATCH 1/9] Implement `TokenKind` for type aliases (#5870) Part of https://github.com/astral-sh/ruff/issues/5062 --- crates/ruff_python_ast/src/token_kind.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/ruff_python_ast/src/token_kind.rs b/crates/ruff_python_ast/src/token_kind.rs index 89b88c7fccc9d6..4c90f3c30fb0fc 100644 --- a/crates/ruff_python_ast/src/token_kind.rs +++ b/crates/ruff_python_ast/src/token_kind.rs @@ -156,6 +156,7 @@ pub enum TokenKind { Try, While, Match, + Type, Case, With, Yield, @@ -426,13 +427,13 @@ impl TokenKind { Tok::While => TokenKind::While, Tok::Match => TokenKind::Match, Tok::Case => TokenKind::Case, + Tok::Type => TokenKind::Type, Tok::With => TokenKind::With, Tok::Yield => TokenKind::Yield, Tok::StartModule => TokenKind::StartModule, Tok::StartInteractive => TokenKind::StartInteractive, Tok::StartExpression => TokenKind::StartExpression, Tok::MagicCommand { .. } => todo!(), - Tok::Type => todo!(), } } } From 4204fc002d67dcb002df6852ec89db9a999c2029 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 18 Jul 2023 14:27:46 -0400 Subject: [PATCH 2/9] Remove exception-handler lexing from `unused-bound-exception` fix (#5851) ## Summary The motivation here is that it will make this rule easier to rewrite as a deferred check. Right now, we can't run this rule in the deferred phase, because it depends on the `except_handler` to power its autofix. Instead of lexing the `except_handler`, we can use the `SimpleTokenizer` from the formatter, and just lex forwards and backwards. For context, this rule detects the unused `e` in: ```python try: pass except ValueError as e: pass ``` --- Cargo.lock | 3 +- crates/ruff/src/checkers/ast/mod.rs | 7 +-- crates/ruff/src/rules/pyflakes/fixes.rs | 54 +++++++++---------- crates/ruff_python_formatter/Cargo.toml | 1 - crates/ruff_python_formatter/src/builders.rs | 4 +- .../src/comments/format.rs | 2 +- .../src/comments/placement.rs | 6 ++- .../src/expression/expr_call.rs | 3 +- .../src/expression/expr_slice.rs | 17 +++--- .../src/expression/expr_unary_op.rs | 16 +++--- .../src/expression/parentheses.rs | 10 ++-- crates/ruff_python_formatter/src/lib.rs | 1 - .../src/other/arguments.rs | 4 +- .../src/statement/stmt_class_def.rs | 11 ++-- .../src/statement/stmt_function_def.rs | 2 +- .../src/statement/stmt_with.rs | 2 +- .../src/statement/suite.rs | 18 ++++--- crates/ruff_python_whitespace/Cargo.toml | 4 ++ crates/ruff_python_whitespace/src/lib.rs | 2 + ...hitespace__tokenizer__tests__Reverse.snap} | 2 +- ..._identifier_ending_in_non_start_char.snap} | 2 +- ...e_word_with_only_id_continuing_chars.snap} | 2 +- ...ce__tokenizer__tests__tokenize_bogus.snap} | 2 +- ...ce__tokenizer__tests__tokenize_comma.snap} | 2 +- ...enizer__tests__tokenize_continuation.snap} | 2 +- ...tokenizer__tests__tokenize_multichar.snap} | 2 +- ...kenizer__tests__tokenize_parentheses.snap} | 2 +- ...ce__tokenizer__tests__tokenize_slash.snap} | 2 +- ...tokenizer__tests__tokenize_substring.snap} | 2 +- ...e__tokenizer__tests__tokenize_trivia.snap} | 2 +- ...ce__tokenizer__tests__tricky_unicode.snap} | 2 +- .../src/tokenizer.rs} | 44 +++++++-------- 32 files changed, 124 insertions(+), 111 deletions(-) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap} (98%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap} (65%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap} (80%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap} (97%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap} (83%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap} (88%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap} (89%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap} (88%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap} (91%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap} (81%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap} (84%) rename crates/{ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap => ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap} (65%) rename crates/{ruff_python_formatter/src/trivia.rs => ruff_python_whitespace/src/tokenizer.rs} (93%) diff --git a/Cargo.lock b/Cargo.lock index b58fdb4ab8b968..4d904a5884caf1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2157,7 +2157,6 @@ dependencies = [ "similar", "smallvec", "thiserror", - "unic-ucd-ident", ] [[package]] @@ -2195,8 +2194,10 @@ version = "0.0.0" name = "ruff_python_whitespace" version = "0.0.0" dependencies = [ + "insta", "memchr", "ruff_text_size", + "unic-ucd-ident", ] [[package]] diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 455e62b52083bc..c1bf7fb8a5b675 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -4103,11 +4103,8 @@ where ); if self.patch(Rule::UnusedVariable) { diagnostic.try_set_fix(|| { - pyflakes::fixes::remove_exception_handler_assignment( - except_handler, - self.locator, - ) - .map(Fix::automatic) + pyflakes::fixes::remove_exception_handler_assignment(name, self.locator) + .map(Fix::automatic) }); } self.diagnostics.push(diagnostic); diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs index 694e03bf87eeac..c6cdee0f5a3219 100644 --- a/crates/ruff/src/rules/pyflakes/fixes.rs +++ b/crates/ruff/src/rules/pyflakes/fixes.rs @@ -1,10 +1,10 @@ -use anyhow::{bail, Ok, Result}; +use anyhow::{Context, Ok, Result}; use ruff_text_size::TextRange; -use rustpython_parser::ast::{ExceptHandler, Expr, Ranged}; -use rustpython_parser::{lexer, Mode}; +use rustpython_parser::ast::{Expr, Identifier, Ranged}; use ruff_diagnostics::Edit; use ruff_python_ast::source_code::{Locator, Stylist}; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; use crate::autofix::codemods::CodegenStylist; use crate::cst::matchers::{match_call_mut, match_dict, match_expression}; @@ -90,31 +90,29 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call( /// Generate a [`Edit`] to remove the binding from an exception handler. pub(crate) fn remove_exception_handler_assignment( - except_handler: &ExceptHandler, + bound_exception: &Identifier, locator: &Locator, ) -> Result { - let contents = locator.slice(except_handler.range()); - let mut fix_start = None; - let mut fix_end = None; - - // End of the token just before the `as` to the semicolon. - let mut prev = None; - for (tok, range) in - lexer::lex_starts_at(contents, Mode::Module, except_handler.start()).flatten() - { - if tok.is_as() { - fix_start = prev; - } - if tok.is_colon() { - fix_end = Some(range.start()); - break; - } - prev = Some(range.end()); - } - - if let (Some(start), Some(end)) = (fix_start, fix_end) { - Ok(Edit::deletion(start, end)) - } else { - bail!("Could not find span of exception handler") - } + // Lex backwards, to the token just before the `as`. + let mut tokenizer = + SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia(); + + // Eat the `as` token. + let preceding = tokenizer + .next_back() + .context("expected the exception name to be preceded by `as`")?; + debug_assert!(matches!(preceding.kind, TokenKind::As)); + + // Lex to the end of the preceding token, which should be the exception value. + let preceding = tokenizer + .next_back() + .context("expected the exception name to be preceded by a token")?; + + // Lex forwards, to the `:` token. + let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents()) + .next() + .context("expected the exception name to be followed by a colon")?; + debug_assert!(matches!(following.kind, TokenKind::Colon)); + + Ok(Edit::deletion(preceding.end(), following.start())) } diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml index 381c3ec6c9cca5..ae36a021198b8b 100644 --- a/crates/ruff_python_formatter/Cargo.toml +++ b/crates/ruff_python_formatter/Cargo.toml @@ -28,7 +28,6 @@ rustpython-parser = { workspace = true } serde = { workspace = true, optional = true } smallvec = { workspace = true } thiserror = { workspace = true } -unic-ucd-ident = "0.9.0" [dev-dependencies] ruff_formatter = { path = "../ruff_formatter", features = ["serde"]} diff --git a/crates/ruff_python_formatter/src/builders.rs b/crates/ruff_python_formatter/src/builders.rs index 9f24a49ca2a7d1..0456d58df9af77 100644 --- a/crates/ruff_python_formatter/src/builders.rs +++ b/crates/ruff_python_formatter/src/builders.rs @@ -2,10 +2,12 @@ use ruff_text_size::{TextRange, TextSize}; use rustpython_parser::ast::Ranged; use ruff_formatter::{format_args, write, Argument, Arguments}; +use ruff_python_whitespace::{ + lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind, +}; use crate::context::NodeLevel; use crate::prelude::*; -use crate::trivia::{lines_after, skip_trailing_trivia, SimpleTokenizer, Token, TokenKind}; use crate::MagicTrailingComma; /// Adds parentheses and indents `content` if it doesn't fit on a line. diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index 84b0e3b654a6d1..aa7d7296f10592 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -3,11 +3,11 @@ use rustpython_parser::ast::Ranged; use ruff_formatter::{format_args, write, FormatError, SourceCode}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; +use ruff_python_whitespace::{lines_after, lines_before, skip_trailing_trivia}; use crate::comments::SourceComment; use crate::context::NodeLevel; use crate::prelude::*; -use crate::trivia::{lines_after, lines_before, skip_trailing_trivia}; /// Formats the leading comments of a node. pub(crate) fn leading_node_comments(node: &T) -> FormatLeadingComments diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index e4d7ec9d82aa03..27ae9688c86159 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -7,14 +7,16 @@ use rustpython_parser::ast::{Expr, ExprIfExp, ExprSlice, Ranged}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_ast::source_code::Locator; use ruff_python_ast::whitespace; -use ruff_python_whitespace::{PythonWhitespace, UniversalNewlines}; +use ruff_python_whitespace::{ + first_non_trivia_token_rev, PythonWhitespace, SimpleTokenizer, Token, TokenKind, + UniversalNewlines, +}; use crate::comments::visitor::{CommentPlacement, DecoratedComment}; use crate::expression::expr_slice::{assign_comment_in_slice, ExprSliceCommentSection}; use crate::other::arguments::{ assign_argument_separator_comment_placement, find_argument_separators, }; -use crate::trivia::{first_non_trivia_token_rev, SimpleTokenizer, Token, TokenKind}; /// Implements the custom comment placement logic. pub(super) fn place_comment<'a>( diff --git a/crates/ruff_python_formatter/src/expression/expr_call.rs b/crates/ruff_python_formatter/src/expression/expr_call.rs index c46aa374f7c41f..4054208bafae3c 100644 --- a/crates/ruff_python_formatter/src/expression/expr_call.rs +++ b/crates/ruff_python_formatter/src/expression/expr_call.rs @@ -3,14 +3,13 @@ use rustpython_parser::ast::{Expr, ExprCall, Ranged}; use ruff_formatter::write; use ruff_python_ast::node::AnyNodeRef; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; use crate::comments::dangling_comments; - use crate::expression::parentheses::{ parenthesized, NeedsParentheses, OptionalParentheses, Parentheses, }; use crate::prelude::*; -use crate::trivia::{SimpleTokenizer, TokenKind}; use crate::FormatNodeRule; #[derive(Default)] diff --git a/crates/ruff_python_formatter/src/expression/expr_slice.rs b/crates/ruff_python_formatter/src/expression/expr_slice.rs index 0d9dd7445fd8f2..93434b87777fd1 100644 --- a/crates/ruff_python_formatter/src/expression/expr_slice.rs +++ b/crates/ruff_python_formatter/src/expression/expr_slice.rs @@ -1,15 +1,16 @@ +use ruff_text_size::TextRange; +use rustpython_parser::ast::ExprSlice; +use rustpython_parser::ast::{Expr, Ranged}; + +use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text}; +use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult}; +use ruff_python_ast::node::{AnyNodeRef, AstNode}; +use ruff_python_whitespace::{first_non_trivia_token, Token, TokenKind}; + use crate::comments::{dangling_comments, SourceComment}; use crate::context::PyFormatContext; use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; -use crate::trivia::Token; -use crate::trivia::{first_non_trivia_token, TokenKind}; use crate::{AsFormat, FormatNodeRule, PyFormatter}; -use ruff_formatter::prelude::{hard_line_break, line_suffix_boundary, space, text}; -use ruff_formatter::{write, Buffer, Format, FormatError, FormatResult}; -use ruff_python_ast::node::{AnyNodeRef, AstNode}; -use ruff_text_size::TextRange; -use rustpython_parser::ast::ExprSlice; -use rustpython_parser::ast::{Expr, Ranged}; #[derive(Default)] pub struct FormatExprSlice; diff --git a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs index 97462c4d7f9b97..ffe5f0f69cff9a 100644 --- a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs +++ b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs @@ -1,14 +1,16 @@ +use ruff_text_size::{TextLen, TextRange}; +use rustpython_parser::ast::UnaryOp; +use rustpython_parser::ast::{ExprUnaryOp, Ranged}; + +use ruff_formatter::prelude::{hard_line_break, space, text}; +use ruff_formatter::{Format, FormatContext, FormatResult}; +use ruff_python_ast::node::AnyNodeRef; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; + use crate::comments::trailing_comments; use crate::context::PyFormatContext; use crate::expression::parentheses::{NeedsParentheses, OptionalParentheses}; -use crate::trivia::{SimpleTokenizer, TokenKind}; use crate::{AsFormat, FormatNodeRule, PyFormatter}; -use ruff_formatter::prelude::{hard_line_break, space, text}; -use ruff_formatter::{Format, FormatContext, FormatResult}; -use ruff_python_ast::node::AnyNodeRef; -use ruff_text_size::{TextLen, TextRange}; -use rustpython_parser::ast::UnaryOp; -use rustpython_parser::ast::{ExprUnaryOp, Ranged}; #[derive(Default)] pub struct FormatExprUnaryOp; diff --git a/crates/ruff_python_formatter/src/expression/parentheses.rs b/crates/ruff_python_formatter/src/expression/parentheses.rs index 281d1896b8ab28..85981345f4124c 100644 --- a/crates/ruff_python_formatter/src/expression/parentheses.rs +++ b/crates/ruff_python_formatter/src/expression/parentheses.rs @@ -1,10 +1,12 @@ -use crate::context::NodeLevel; -use crate::prelude::*; -use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind}; +use rustpython_parser::ast::Ranged; + use ruff_formatter::prelude::tag::Condition; use ruff_formatter::{format_args, write, Argument, Arguments}; use ruff_python_ast::node::AnyNodeRef; -use rustpython_parser::ast::Ranged; +use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind}; + +use crate::context::NodeLevel; +use crate::prelude::*; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub(crate) enum OptionalParentheses { diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 7055bab2faf569..d1a7420291177d 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -33,7 +33,6 @@ pub(crate) mod other; pub(crate) mod pattern; mod prelude; pub(crate) mod statement; -mod trivia; include!("../../ruff_formatter/shared_traits.rs"); diff --git a/crates/ruff_python_formatter/src/other/arguments.rs b/crates/ruff_python_formatter/src/other/arguments.rs index 3e84558ad2c6a4..5e4d7fe6f34901 100644 --- a/crates/ruff_python_formatter/src/other/arguments.rs +++ b/crates/ruff_python_formatter/src/other/arguments.rs @@ -1,9 +1,11 @@ use std::usize; +use ruff_text_size::{TextRange, TextSize}; use rustpython_parser::ast::{Arguments, Ranged}; use ruff_formatter::{format_args, write}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; +use ruff_python_whitespace::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind}; use crate::comments::{ dangling_comments, leading_comments, leading_node_comments, trailing_comments, @@ -12,9 +14,7 @@ use crate::comments::{ use crate::context::NodeLevel; use crate::expression::parentheses::parenthesized; use crate::prelude::*; -use crate::trivia::{first_non_trivia_token, SimpleTokenizer, Token, TokenKind}; use crate::FormatNodeRule; -use ruff_text_size::{TextRange, TextSize}; #[derive(Default)] pub struct FormatArguments; diff --git a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs index 86c76882849767..0876ac7347a6bc 100644 --- a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs +++ b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs @@ -1,11 +1,12 @@ -use crate::comments::trailing_comments; +use ruff_text_size::TextRange; +use rustpython_parser::ast::{Ranged, StmtClassDef}; + +use ruff_formatter::write; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; +use crate::comments::trailing_comments; use crate::expression::parentheses::{parenthesized, Parentheses}; use crate::prelude::*; -use crate::trivia::{SimpleTokenizer, TokenKind}; -use ruff_formatter::write; -use ruff_text_size::TextRange; -use rustpython_parser::ast::{Ranged, StmtClassDef}; #[derive(Default)] pub struct FormatStmtClassDef; diff --git a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs index 69f370e7c10e32..f7fa032174d2b6 100644 --- a/crates/ruff_python_formatter/src/statement/stmt_function_def.rs +++ b/crates/ruff_python_formatter/src/statement/stmt_function_def.rs @@ -2,12 +2,12 @@ use rustpython_parser::ast::{Ranged, StmtFunctionDef}; use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule}; use ruff_python_ast::function::AnyFunctionDefinition; +use ruff_python_whitespace::{lines_after, skip_trailing_trivia}; use crate::comments::{leading_comments, trailing_comments}; use crate::context::NodeLevel; use crate::expression::parentheses::{optional_parentheses, Parentheses}; use crate::prelude::*; -use crate::trivia::{lines_after, skip_trailing_trivia}; use crate::FormatNodeRule; #[derive(Default)] diff --git a/crates/ruff_python_formatter/src/statement/stmt_with.rs b/crates/ruff_python_formatter/src/statement/stmt_with.rs index 56eca66b17236d..2c610f029d5883 100644 --- a/crates/ruff_python_formatter/src/statement/stmt_with.rs +++ b/crates/ruff_python_formatter/src/statement/stmt_with.rs @@ -3,13 +3,13 @@ use rustpython_parser::ast::{Ranged, StmtAsyncWith, StmtWith, Suite, WithItem}; use ruff_formatter::{format_args, write, FormatError}; use ruff_python_ast::node::AnyNodeRef; +use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; use crate::comments::trailing_comments; use crate::expression::parentheses::{ in_parentheses_only_soft_line_break_or_space, optional_parentheses, }; use crate::prelude::*; -use crate::trivia::{SimpleTokenizer, TokenKind}; use crate::FormatNodeRule; pub(super) enum AnyStatementWith<'a> { diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 92fc32ed4e6018..09a12f6697e148 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -1,10 +1,12 @@ -use crate::context::NodeLevel; -use crate::prelude::*; -use crate::trivia::lines_before; +use rustpython_parser::ast::{Ranged, Stmt, Suite}; + use ruff_formatter::{ format_args, write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions, }; -use rustpython_parser::ast::{Ranged, Stmt, Suite}; +use ruff_python_whitespace::lines_before; + +use crate::context::NodeLevel; +use crate::prelude::*; /// Level at which the [`Suite`] appears in the source code. #[derive(Copy, Clone, Debug)] @@ -185,13 +187,15 @@ impl<'ast> IntoFormat> for Suite { #[cfg(test)] mod tests { + use rustpython_parser::ast::Suite; + use rustpython_parser::Parse; + + use ruff_formatter::format; + use crate::comments::Comments; use crate::prelude::*; use crate::statement::suite::SuiteLevel; use crate::PyFormatOptions; - use ruff_formatter::format; - use rustpython_parser::ast::Suite; - use rustpython_parser::Parse; fn format_suite(level: SuiteLevel) -> String { let source = r#" diff --git a/crates/ruff_python_whitespace/Cargo.toml b/crates/ruff_python_whitespace/Cargo.toml index cbfc1aea2470ce..22b36562d39382 100644 --- a/crates/ruff_python_whitespace/Cargo.toml +++ b/crates/ruff_python_whitespace/Cargo.toml @@ -16,3 +16,7 @@ license = { workspace = true } ruff_text_size = { workspace = true } memchr = { workspace = true } +unic-ucd-ident = "0.9.0" + +[dev-dependencies] +insta = { workspace = true } diff --git a/crates/ruff_python_whitespace/src/lib.rs b/crates/ruff_python_whitespace/src/lib.rs index b8c95e351c5fe5..4e16d7ca2dcbac 100644 --- a/crates/ruff_python_whitespace/src/lib.rs +++ b/crates/ruff_python_whitespace/src/lib.rs @@ -1,7 +1,9 @@ mod cursor; mod newlines; +mod tokenizer; mod whitespace; pub use cursor::*; pub use newlines::*; +pub use tokenizer::*; pub use whitespace::*; diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap similarity index 98% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap index ec701539c60a62..3ae643205e83fa 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__Reverse.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__Reverse.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokenize_reverse() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap similarity index 65% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap index 15e9d84407efd2..6f19b912734087 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__identifier_ending_in_non_start_char.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__identifier_ending_in_non_start_char.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap similarity index 80% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap index 26e9fd18bcbeb3..ccb0282831ef1c 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__ignore_word_with_only_id_continuing_chars.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__ignore_word_with_only_id_continuing_chars.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap similarity index 97% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap index 79368160895185..f5005ec2c91376 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_bogus.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_bogus.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap similarity index 83% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap index 38d1fed60a6609..a1f98abd4eaea3 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_comma.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_comma.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap similarity index 88% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap index 83079fe81aeb6e..5e9802280d882d 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_continuation.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_continuation.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap similarity index 89% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap index 16a1293b4420a9..ff371d781f317a 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_multichar.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_multichar.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap similarity index 88% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap index ccd6969c2db2db..6c792f7cf07822 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_parentheses.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_parentheses.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap similarity index 91% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap index 093715cf177983..f82f501d652efe 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_slash.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_slash.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap similarity index 81% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap index 181b438c3f3301..9b06f81cb9747c 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_substring.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_substring.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap similarity index 84% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap index f1d708d6cb8bce..79f9130287c804 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tokenize_trivia.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tokenize_trivia.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap similarity index 65% rename from crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap rename to crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap index 91b9cb397a04c3..c8aab65b39d49b 100644 --- a/crates/ruff_python_formatter/src/snapshots/ruff_python_formatter__trivia__tests__tricky_unicode.snap +++ b/crates/ruff_python_whitespace/src/snapshots/ruff_python_whitespace__tokenizer__tests__tricky_unicode.snap @@ -1,5 +1,5 @@ --- -source: crates/ruff_python_formatter/src/trivia.rs +source: crates/ruff_python_whitespace/src/tokenizer.rs expression: test_case.tokens() --- [ diff --git a/crates/ruff_python_formatter/src/trivia.rs b/crates/ruff_python_whitespace/src/tokenizer.rs similarity index 93% rename from crates/ruff_python_formatter/src/trivia.rs rename to crates/ruff_python_whitespace/src/tokenizer.rs index 63f92b6e2f8019..c8aa15dbb76c02 100644 --- a/crates/ruff_python_formatter/src/trivia.rs +++ b/crates/ruff_python_whitespace/src/tokenizer.rs @@ -1,7 +1,7 @@ use ruff_text_size::{TextLen, TextRange, TextSize}; use unic_ucd_ident::{is_xid_continue, is_xid_start}; -use ruff_python_whitespace::{is_python_whitespace, Cursor}; +use crate::{is_python_whitespace, Cursor}; /// Searches for the first non-trivia character in `range`. /// @@ -11,7 +11,7 @@ use ruff_python_whitespace::{is_python_whitespace, Cursor}; /// of the character, the second item the non-trivia character. /// /// Returns `None` if the range is empty or only contains trivia (whitespace or comments). -pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option { +pub fn first_non_trivia_token(offset: TextSize, code: &str) -> Option { SimpleTokenizer::starts_at(offset, code) .skip_trivia() .next() @@ -23,14 +23,14 @@ pub(crate) fn first_non_trivia_token(offset: TextSize, code: &str) -> Option Option { +pub fn first_non_trivia_token_rev(offset: TextSize, code: &str) -> Option { SimpleTokenizer::up_to(offset, code) .skip_trivia() .next_back() } /// Returns the number of newlines between `offset` and the first non whitespace character in the source code. -pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 { +pub fn lines_before(offset: TextSize, code: &str) -> u32 { let tokens = SimpleTokenizer::up_to(offset, code); let mut newlines = 0u32; @@ -52,7 +52,7 @@ pub(crate) fn lines_before(offset: TextSize, code: &str) -> u32 { } /// Counts the empty lines between `offset` and the first non-whitespace character. -pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 { +pub fn lines_after(offset: TextSize, code: &str) -> u32 { let tokens = SimpleTokenizer::starts_at(offset, code); let mut newlines = 0u32; @@ -74,7 +74,7 @@ pub(crate) fn lines_after(offset: TextSize, code: &str) -> u32 { } /// Returns the position after skipping any trailing trivia up to, but not including the newline character. -pub(crate) fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize { +pub fn skip_trailing_trivia(offset: TextSize, code: &str) -> TextSize { let tokenizer = SimpleTokenizer::starts_at(offset, code); for token in tokenizer { @@ -110,32 +110,32 @@ fn is_non_ascii_identifier_start(c: char) -> bool { } #[derive(Clone, Debug, Eq, PartialEq, Hash)] -pub(crate) struct Token { - pub(crate) kind: TokenKind, - pub(crate) range: TextRange, +pub struct Token { + pub kind: TokenKind, + pub range: TextRange, } impl Token { - pub(crate) const fn kind(&self) -> TokenKind { + pub const fn kind(&self) -> TokenKind { self.kind } #[allow(unused)] - pub(crate) const fn range(&self) -> TextRange { + pub const fn range(&self) -> TextRange { self.range } - pub(crate) const fn start(&self) -> TextSize { + pub const fn start(&self) -> TextSize { self.range.start() } - pub(crate) const fn end(&self) -> TextSize { + pub const fn end(&self) -> TextSize { self.range.end() } } #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] -pub(crate) enum TokenKind { +pub enum TokenKind { /// A comment, not including the trailing new line. Comment, @@ -247,7 +247,7 @@ impl TokenKind { /// /// The tokenizer doesn't guarantee any correctness after it returned a [`TokenKind::Other`]. That's why it /// will return [`TokenKind::Bogus`] for every character after until it reaches the end of the file. -pub(crate) struct SimpleTokenizer<'a> { +pub struct SimpleTokenizer<'a> { offset: TextSize, back_offset: TextSize, /// `true` when it is known that the current `back` line has no comment for sure. @@ -258,7 +258,7 @@ pub(crate) struct SimpleTokenizer<'a> { } impl<'a> SimpleTokenizer<'a> { - pub(crate) fn new(source: &'a str, range: TextRange) -> Self { + pub fn new(source: &'a str, range: TextRange) -> Self { Self { offset: range.start(), back_offset: range.end(), @@ -269,20 +269,20 @@ impl<'a> SimpleTokenizer<'a> { } } - pub(crate) fn starts_at(offset: TextSize, source: &'a str) -> Self { + pub fn starts_at(offset: TextSize, source: &'a str) -> Self { let range = TextRange::new(offset, source.text_len()); Self::new(source, range) } /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`. - pub(crate) fn up_to(offset: TextSize, source: &'a str) -> Self { + pub fn up_to(offset: TextSize, source: &'a str) -> Self { Self::new(source, TextRange::up_to(offset)) } /// Creates a tokenizer that lexes tokens from the start of `source` up to `offset`, and informs /// the lexer that the line at `offset` contains no comments. This can significantly speed up backwards lexing /// because the lexer doesn't need to scan for comments. - pub(crate) fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self { + pub fn up_to_without_back_comment(offset: TextSize, source: &'a str) -> Self { let mut tokenizer = Self::up_to(offset, source); tokenizer.back_line_has_no_comment = true; tokenizer @@ -375,7 +375,7 @@ impl<'a> SimpleTokenizer<'a> { /// Returns the next token from the back. Prefer iterating forwards. Iterating backwards is significantly more expensive /// because it needs to check if the line has any comments when encountering any non-trivia token. - pub(crate) fn next_token_back(&mut self) -> Token { + pub fn next_token_back(&mut self) -> Token { self.cursor.start_token(); let Some(last) = self.cursor.bump_back() else { @@ -503,7 +503,7 @@ impl<'a> SimpleTokenizer<'a> { token } - pub(crate) fn skip_trivia(self) -> impl Iterator + DoubleEndedIterator + 'a { + pub fn skip_trivia(self) -> impl Iterator + DoubleEndedIterator + 'a { self.filter(|t| !t.kind().is_trivia()) } } @@ -539,7 +539,7 @@ mod tests { use insta::assert_debug_snapshot; use ruff_text_size::{TextLen, TextRange, TextSize}; - use crate::trivia::{lines_after, lines_before, SimpleTokenizer, Token}; + use crate::tokenizer::{lines_after, lines_before, SimpleTokenizer, Token}; struct TokenizationTestCase { source: &'static str, From c577045f2e280796591b1afcf867322d6db58062 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 18 Jul 2023 23:05:55 +0200 Subject: [PATCH 3/9] perf(formatter): Use memchar for faster back tokenization (#5823) --- .../ruff_python_whitespace/src/tokenizer.rs | 52 +++++++++++-------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/crates/ruff_python_whitespace/src/tokenizer.rs b/crates/ruff_python_whitespace/src/tokenizer.rs index c8aa15dbb76c02..cb56ba5a839f4d 100644 --- a/crates/ruff_python_whitespace/src/tokenizer.rs +++ b/crates/ruff_python_whitespace/src/tokenizer.rs @@ -1,3 +1,4 @@ +use memchr::memrchr3_iter; use ruff_text_size::{TextLen, TextRange, TextSize}; use unic_ucd_ident::{is_xid_continue, is_xid_start}; @@ -419,36 +420,45 @@ impl<'a> SimpleTokenizer<'a> { // For all other tokens, test if the character isn't part of a comment. c => { - let mut comment_offset = None; - // Skip the test whether there's a preceding comment if it has been performed before. - if !self.back_line_has_no_comment { - for (back_index, c) in self.cursor.chars().rev().enumerate() { - match c { - '#' => { - // Potentially a comment - comment_offset = Some(back_index + 1); - } - '\r' | '\n' | '\\' => { - break; - } - c => { - if !is_python_whitespace(c) - && TokenKind::from_non_trivia_char(c) == TokenKind::Other - { - comment_offset = None; - } - } + let comment_offset = if self.back_line_has_no_comment { + None + } else { + let bytes = self.cursor.chars().as_str().as_bytes(); + let mut line_start = 0; + let mut last_comment_offset = None; + + // Find the start of the line, or any potential comments. + for index in memrchr3_iter(b'\n', b'\r', b'#', bytes) { + if bytes[index] == b'#' { + // Potentially a comment, but not guaranteed + last_comment_offset = Some(index); + } else { + line_start = index + 1; + break; } } - } + + // Verify if this is indeed a comment. Doing this only when we've found a comment is significantly + // faster because comments are rare. + last_comment_offset.filter(|last_comment_offset| { + let before_comment = + &self.cursor.chars().as_str()[line_start..*last_comment_offset]; + + before_comment.chars().all(|c| { + is_python_whitespace(c) + || TokenKind::from_non_trivia_char(c) != TokenKind::Other + }) + }) + }; // From here on it is guaranteed that this line has no other comment. self.back_line_has_no_comment = true; if let Some(comment_offset) = comment_offset { + let comment_length = self.cursor.chars().as_str().len() - comment_offset; // It is a comment, bump all tokens - for _ in 0..comment_offset { + for _ in 0..comment_length { self.cursor.bump_back().unwrap(); } From a93254f026b8b3473386f7e7cf96a1b779a07305 Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Tue, 18 Jul 2023 16:25:49 -0500 Subject: [PATCH 4/9] Implement `unparse` for type aliases and parameters (#5869) Part of https://github.com/astral-sh/ruff/issues/5062 --- .../src/source_code/generator.rs | 98 ++++++++++++++++++- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/crates/ruff_python_ast/src/source_code/generator.rs b/crates/ruff_python_ast/src/source_code/generator.rs index 412b897eedd767..5efc6fe0c07a14 100644 --- a/crates/ruff_python_ast/src/source_code/generator.rs +++ b/crates/ruff_python_ast/src/source_code/generator.rs @@ -6,7 +6,8 @@ use std::ops::Deref; use rustpython_literal::escape::{AsciiEscape, Escape, UnicodeEscape}; use rustpython_parser::ast::{ self, Alias, Arg, Arguments, BoolOp, CmpOp, Comprehension, Constant, ConversionFlag, - ExceptHandler, Expr, Identifier, MatchCase, Operator, Pattern, Stmt, Suite, WithItem, + ExceptHandler, Expr, Identifier, MatchCase, Operator, Pattern, Stmt, Suite, TypeParam, + TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem, }; use ruff_python_whitespace::LineEnding; @@ -207,6 +208,7 @@ impl<'a> Generator<'a> { body, returns, decorator_list, + type_params, .. }) => { self.newlines(if self.indent_depth == 0 { 2 } else { 1 }); @@ -219,6 +221,7 @@ impl<'a> Generator<'a> { statement!({ self.p("def "); self.p_id(name); + self.unparse_type_params(type_params); self.p("("); self.unparse_args(args); self.p(")"); @@ -239,6 +242,7 @@ impl<'a> Generator<'a> { body, returns, decorator_list, + type_params, .. }) => { self.newlines(if self.indent_depth == 0 { 2 } else { 1 }); @@ -251,6 +255,7 @@ impl<'a> Generator<'a> { statement!({ self.p("async def "); self.p_id(name); + self.unparse_type_params(type_params); self.p("("); self.unparse_args(args); self.p(")"); @@ -271,8 +276,8 @@ impl<'a> Generator<'a> { keywords, body, decorator_list, + type_params, range: _, - type_params: _, }) => { self.newlines(if self.indent_depth == 0 { 2 } else { 1 }); for decorator in decorator_list { @@ -284,6 +289,7 @@ impl<'a> Generator<'a> { statement!({ self.p("class "); self.p_id(name); + self.unparse_type_params(type_params); let mut first = true; for base in bases { self.p_if(first, "("); @@ -525,6 +531,18 @@ impl<'a> Generator<'a> { self.indent_depth = self.indent_depth.saturating_sub(1); } } + Stmt::TypeAlias(ast::StmtTypeAlias { + name, + range: _range, + type_params, + value, + }) => { + self.p("type "); + self.unparse_expr(name, precedence::MAX); + self.unparse_type_params(type_params); + self.p(" = "); + self.unparse_expr(value, precedence::ASSIGN); + } Stmt::Raise(ast::StmtRaise { exc, cause, @@ -702,7 +720,6 @@ impl<'a> Generator<'a> { self.p("continue"); }); } - Stmt::TypeAlias(_) => todo!(), } } @@ -830,6 +847,38 @@ impl<'a> Generator<'a> { self.body(&ast.body); } + fn unparse_type_params(&mut self, type_params: &Vec) { + if !type_params.is_empty() { + self.p("["); + let mut first = true; + for type_param in type_params { + self.p_delim(&mut first, ", "); + self.unparse_type_param(type_param); + } + self.p("]"); + } + } + + pub(crate) fn unparse_type_param(&mut self, ast: &TypeParam) { + match ast { + TypeParam::TypeVar(TypeParamTypeVar { name, bound, .. }) => { + self.p_id(name); + if let Some(expr) = bound { + self.p(": "); + self.unparse_expr(expr, precedence::MAX); + } + } + TypeParam::TypeVarTuple(TypeParamTypeVarTuple { name, .. }) => { + self.p("*"); + self.p_id(name); + } + TypeParam::ParamSpec(TypeParamParamSpec { name, .. }) => { + self.p("**"); + self.p_id(name); + } + } + } + pub(crate) fn unparse_expr(&mut self, ast: &Expr, level: u8) { macro_rules! opprec { ($opty:ident, $x:expr, $enu:path, $($var:ident($op:literal, $prec:ident)),*$(,)?) => { @@ -1510,6 +1559,26 @@ mod tests { ); assert_round_trip!( r#"class Foo(Bar, object): + pass"# + ); + assert_round_trip!( + r#"class Foo[T]: + pass"# + ); + assert_round_trip!( + r#"class Foo[T](Bar): + pass"# + ); + assert_round_trip!( + r#"class Foo[*Ts]: + pass"# + ); + assert_round_trip!( + r#"class Foo[**P]: + pass"# + ); + assert_round_trip!( + r#"class Foo[T, U, *Ts, **P]: pass"# ); assert_round_trip!( @@ -1541,6 +1610,22 @@ mod tests { ); assert_round_trip!( r#"def test(a, b=4, /, c=8, d=9): + pass"# + ); + assert_round_trip!( + r#"def test[T](): + pass"# + ); + assert_round_trip!( + r#"def test[*Ts](): + pass"# + ); + assert_round_trip!( + r#"def test[**P](): + pass"# + ); + assert_round_trip!( + r#"def test[T, U, *Ts, **P](): pass"# ); assert_round_trip!( @@ -1596,6 +1681,13 @@ class Foo: def f(): pass"# ); + + // Type aliases + assert_round_trip!(r#"type Foo = int | str"#); + assert_round_trip!(r#"type Foo[T] = list[T]"#); + assert_round_trip!(r#"type Foo[*Ts] = ..."#); + assert_round_trip!(r#"type Foo[**P] = ..."#); + assert_round_trip!(r#"type Foo[T, U, *Ts, **P] = ..."#); } #[test] From 2d505e2b0443a46b1e5a3016821f22a8c7307913 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 18 Jul 2023 18:58:31 -0400 Subject: [PATCH 5/9] Remove suite body tracking from `SemanticModel` (#5848) ## Summary The `SemanticModel` currently stores the "body" of a given `Suite`, along with the current statement index. This is used to support "next sibling" queries, but we only use this in exactly one place -- the rule that simplifies constructs like this to `any` or `all`: ```python for x in y: if x == 0: return True return False ``` Instead of tracking the state, we can just do a (slightly more expensive) traversal, by finding the node within its parent and returning the next node in the body. Note that we'll only have to do this extremely rarely -- namely, for functions that contain something like: ```python for x in y: if x == 0: return True ``` --- crates/ruff/src/checkers/ast/mod.rs | 17 +- .../rules/reimplemented_builtin.rs | 446 +++++++++--------- crates/ruff_python_ast/src/lib.rs | 1 + crates/ruff_python_ast/src/traversal.rs | 113 +++++ crates/ruff_python_semantic/src/model.rs | 11 - 5 files changed, 339 insertions(+), 249 deletions(-) create mode 100644 crates/ruff_python_ast/src/traversal.rs diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index c1bf7fb8a5b675..d979952140c270 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -1404,11 +1404,7 @@ where } if stmt.is_for_stmt() { if self.enabled(Rule::ReimplementedBuiltin) { - flake8_simplify::rules::convert_for_loop_to_any_all( - self, - stmt, - self.semantic.sibling_stmt(), - ); + flake8_simplify::rules::convert_for_loop_to_any_all(self, stmt); } if self.enabled(Rule::InDictKeys) { flake8_simplify::rules::key_in_dict_for(self, target, iter); @@ -4237,21 +4233,10 @@ where flake8_pie::rules::no_unnecessary_pass(self, body); } - // Step 2: Binding - let prev_body = self.semantic.body; - let prev_body_index = self.semantic.body_index; - self.semantic.body = body; - self.semantic.body_index = 0; - // Step 3: Traversal for stmt in body { self.visit_stmt(stmt); - self.semantic.body_index += 1; } - - // Step 4: Clean-up - self.semantic.body = prev_body; - self.semantic.body_index = prev_body_index; } } diff --git a/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs b/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs index 4a30d94859b9a8..953e77b8439ad1 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/reimplemented_builtin.rs @@ -1,4 +1,4 @@ -use ruff_text_size::{TextRange, TextSize}; +use ruff_text_size::TextRange; use rustpython_parser::ast::{ self, CmpOp, Comprehension, Constant, Expr, ExprContext, Ranged, Stmt, UnaryOp, }; @@ -7,10 +7,11 @@ use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::any_over_expr; use ruff_python_ast::source_code::Generator; +use ruff_python_ast::traversal; use crate::checkers::ast::Checker; use crate::line_width::LineWidth; -use crate::registry::{AsRule, Rule}; +use crate::registry::AsRule; /// ## What it does /// Checks for `for` loops that can be replaced with a builtin function, like @@ -38,7 +39,7 @@ use crate::registry::{AsRule, Rule}; /// - [Python documentation: `all`](https://docs.python.org/3/library/functions.html#all) #[violation] pub struct ReimplementedBuiltin { - repl: String, + replacement: String, } impl Violation for ReimplementedBuiltin { @@ -46,200 +47,222 @@ impl Violation for ReimplementedBuiltin { #[derive_message_formats] fn message(&self) -> String { - let ReimplementedBuiltin { repl } = self; - format!("Use `{repl}` instead of `for` loop") + let ReimplementedBuiltin { replacement } = self; + format!("Use `{replacement}` instead of `for` loop") } fn autofix_title(&self) -> Option { - let ReimplementedBuiltin { repl } = self; - Some(format!("Replace with `{repl}`")) + let ReimplementedBuiltin { replacement } = self; + Some(format!("Replace with `{replacement}`")) } } /// SIM110, SIM111 -pub(crate) fn convert_for_loop_to_any_all( - checker: &mut Checker, - stmt: &Stmt, - sibling: Option<&Stmt>, -) { - // There are two cases to consider: +pub(crate) fn convert_for_loop_to_any_all(checker: &mut Checker, stmt: &Stmt) { + if !checker.semantic().scope().kind.is_any_function() { + return; + } + + // The `for` loop itself must consist of an `if` with a `return`. + let Some(loop_) = match_loop(stmt) else { + return; + }; + + // Afterwards, there are two cases to consider: // - `for` loop with an `else: return True` or `else: return False`. - // - `for` loop followed by `return True` or `return False` - if let Some(loop_info) = return_values_for_else(stmt) - .or_else(|| sibling.and_then(|sibling| return_values_for_siblings(stmt, sibling))) - { - // Check if loop_info.target, loop_info.iter, or loop_info.test contains `await`. - if contains_await(loop_info.target) - || contains_await(loop_info.iter) - || contains_await(loop_info.test) - { - return; - } - if loop_info.return_value && !loop_info.next_return_value { - if checker.enabled(Rule::ReimplementedBuiltin) { - let contents = return_stmt( - "any", - loop_info.test, - loop_info.target, - loop_info.iter, - checker.generator(), - ); + // - `for` loop followed by `return True` or `return False`. + let Some(terminal) = match_else_return(stmt).or_else(|| { + let parent = checker.semantic().stmt_parent()?; + let suite = traversal::suite(stmt, parent)?; + let sibling = traversal::next_sibling(stmt, suite)?; + match_sibling_return(stmt, sibling) + }) else { + return; + }; - // Don't flag if the resulting expression would exceed the maximum line length. - let line_start = checker.locator.line_start(stmt.start()); - if LineWidth::new(checker.settings.tab_size) - .add_str(&checker.locator.contents()[TextRange::new(line_start, stmt.start())]) - .add_str(&contents) - > checker.settings.line_length - { - return; - } + // Check if any of the expressions contain an `await` expression. + if contains_await(loop_.target) || contains_await(loop_.iter) || contains_await(loop_.test) { + return; + } - let mut diagnostic = Diagnostic::new( - ReimplementedBuiltin { - repl: contents.clone(), - }, - TextRange::new(stmt.start(), loop_info.terminal), - ); - if checker.patch(diagnostic.kind.rule()) && checker.semantic().is_builtin("any") { - diagnostic.set_fix(Fix::suggested(Edit::replacement( - contents, - stmt.start(), - loop_info.terminal, - ))); - } - checker.diagnostics.push(diagnostic); + match (loop_.return_value, terminal.return_value) { + // Replace with `any`. + (true, false) => { + let contents = return_stmt( + "any", + loop_.test, + loop_.target, + loop_.iter, + checker.generator(), + ); + + // Don't flag if the resulting expression would exceed the maximum line length. + let line_start = checker.locator.line_start(stmt.start()); + if LineWidth::new(checker.settings.tab_size) + .add_str(&checker.locator.contents()[TextRange::new(line_start, stmt.start())]) + .add_str(&contents) + > checker.settings.line_length + { + return; } - } - if !loop_info.return_value && loop_info.next_return_value { - if checker.enabled(Rule::ReimplementedBuiltin) { - // Invert the condition. - let test = { - if let Expr::UnaryOp(ast::ExprUnaryOp { - op: UnaryOp::Not, - operand, - range: _, - }) = &loop_info.test - { - *operand.clone() - } else if let Expr::Compare(ast::ExprCompare { - left, - ops, - comparators, - range: _, - }) = &loop_info.test - { - if let ([op], [comparator]) = (ops.as_slice(), comparators.as_slice()) { - let op = match op { - CmpOp::Eq => CmpOp::NotEq, - CmpOp::NotEq => CmpOp::Eq, - CmpOp::Lt => CmpOp::GtE, - CmpOp::LtE => CmpOp::Gt, - CmpOp::Gt => CmpOp::LtE, - CmpOp::GtE => CmpOp::Lt, - CmpOp::Is => CmpOp::IsNot, - CmpOp::IsNot => CmpOp::Is, - CmpOp::In => CmpOp::NotIn, - CmpOp::NotIn => CmpOp::In, - }; - let node = ast::ExprCompare { - left: left.clone(), - ops: vec![op], - comparators: vec![comparator.clone()], - range: TextRange::default(), - }; - node.into() - } else { - let node = ast::ExprUnaryOp { - op: UnaryOp::Not, - operand: Box::new(loop_info.test.clone()), - range: TextRange::default(), - }; - node.into() - } + let mut diagnostic = Diagnostic::new( + ReimplementedBuiltin { + replacement: contents.to_string(), + }, + TextRange::new(stmt.start(), terminal.stmt.end()), + ); + if checker.patch(diagnostic.kind.rule()) && checker.semantic().is_builtin("any") { + diagnostic.set_fix(Fix::suggested(Edit::replacement( + contents, + stmt.start(), + terminal.stmt.end(), + ))); + } + checker.diagnostics.push(diagnostic); + } + // Replace with `all`. + (false, true) => { + // Invert the condition. + let test = { + if let Expr::UnaryOp(ast::ExprUnaryOp { + op: UnaryOp::Not, + operand, + range: _, + }) = &loop_.test + { + *operand.clone() + } else if let Expr::Compare(ast::ExprCompare { + left, + ops, + comparators, + range: _, + }) = &loop_.test + { + if let ([op], [comparator]) = (ops.as_slice(), comparators.as_slice()) { + let op = match op { + CmpOp::Eq => CmpOp::NotEq, + CmpOp::NotEq => CmpOp::Eq, + CmpOp::Lt => CmpOp::GtE, + CmpOp::LtE => CmpOp::Gt, + CmpOp::Gt => CmpOp::LtE, + CmpOp::GtE => CmpOp::Lt, + CmpOp::Is => CmpOp::IsNot, + CmpOp::IsNot => CmpOp::Is, + CmpOp::In => CmpOp::NotIn, + CmpOp::NotIn => CmpOp::In, + }; + let node = ast::ExprCompare { + left: left.clone(), + ops: vec![op], + comparators: vec![comparator.clone()], + range: TextRange::default(), + }; + node.into() } else { let node = ast::ExprUnaryOp { op: UnaryOp::Not, - operand: Box::new(loop_info.test.clone()), + operand: Box::new(loop_.test.clone()), range: TextRange::default(), }; node.into() } - }; - let contents = return_stmt( - "all", - &test, - loop_info.target, - loop_info.iter, - checker.generator(), - ); - - // Don't flag if the resulting expression would exceed the maximum line length. - let line_start = checker.locator.line_start(stmt.start()); - if LineWidth::new(checker.settings.tab_size) - .add_str(&checker.locator.contents()[TextRange::new(line_start, stmt.start())]) - .add_str(&contents) - > checker.settings.line_length - { - return; + } else { + let node = ast::ExprUnaryOp { + op: UnaryOp::Not, + operand: Box::new(loop_.test.clone()), + range: TextRange::default(), + }; + node.into() } + }; + let contents = return_stmt("all", &test, loop_.target, loop_.iter, checker.generator()); - let mut diagnostic = Diagnostic::new( - ReimplementedBuiltin { - repl: contents.clone(), - }, - TextRange::new(stmt.start(), loop_info.terminal), - ); - if checker.patch(diagnostic.kind.rule()) && checker.semantic().is_builtin("all") { - diagnostic.set_fix(Fix::suggested(Edit::replacement( - contents, - stmt.start(), - loop_info.terminal, - ))); - } - checker.diagnostics.push(diagnostic); + // Don't flag if the resulting expression would exceed the maximum line length. + let line_start = checker.locator.line_start(stmt.start()); + if LineWidth::new(checker.settings.tab_size) + .add_str(&checker.locator.contents()[TextRange::new(line_start, stmt.start())]) + .add_str(&contents) + > checker.settings.line_length + { + return; + } + + let mut diagnostic = Diagnostic::new( + ReimplementedBuiltin { + replacement: contents.to_string(), + }, + TextRange::new(stmt.start(), terminal.stmt.end()), + ); + if checker.patch(diagnostic.kind.rule()) && checker.semantic().is_builtin("all") { + diagnostic.set_fix(Fix::suggested(Edit::replacement( + contents, + stmt.start(), + terminal.stmt.end(), + ))); } + checker.diagnostics.push(diagnostic); } + _ => {} } } +/// Represents a `for` loop with a conditional `return`, like: +/// ```python +/// for x in y: +/// if x == 0: +/// return True +/// ``` +#[derive(Debug)] struct Loop<'a> { + /// The `return` value of the loop. return_value: bool, - next_return_value: bool, + /// The test condition in the loop. test: &'a Expr, + /// The target of the loop. target: &'a Expr, + /// The iterator of the loop. iter: &'a Expr, - terminal: TextSize, } -/// Extract the returned boolean values a `Stmt::For` with an `else` body. -fn return_values_for_else(stmt: &Stmt) -> Option { +/// Represents a `return` statement following a `for` loop, like: +/// ```python +/// for x in y: +/// if x == 0: +/// return True +/// return False +/// ``` +/// +/// Or: +/// ```python +/// for x in y: +/// if x == 0: +/// return True +/// else: +/// return False +/// ``` +#[derive(Debug)] +struct Terminal<'a> { + return_value: bool, + stmt: &'a Stmt, +} + +fn match_loop(stmt: &Stmt) -> Option { let Stmt::For(ast::StmtFor { - body, - target, - iter, - orelse, - .. + body, target, iter, .. }) = stmt else { return None; }; - // The loop itself should contain a single `if` statement, with an `else` - // containing a single `return True` or `return False`. - if body.len() != 1 { - return None; - } - if orelse.len() != 1 { - return None; - } - let Stmt::If(ast::StmtIf { + // The loop itself should contain a single `if` statement, with a single `return` statement in + // the body. + let [Stmt::If(ast::StmtIf { body: nested_body, test: nested_test, elif_else_clauses: nested_elif_else_clauses, range: _, - }) = &body[0] + })] = body.as_slice() else { return None; }; @@ -263,17 +286,37 @@ fn return_values_for_else(stmt: &Stmt) -> Option { return None; }; + Some(Loop { + return_value: *value, + test: nested_test, + target, + iter, + }) +} + +/// If a `Stmt::For` contains an `else` with a single boolean `return`, return the [`Terminal`] +/// representing that `return`. +/// +/// For example, matches the `return` in: +/// ```python +/// for x in y: +/// if x == 0: +/// return True +/// return False +/// ``` +fn match_else_return(stmt: &Stmt) -> Option { + let Stmt::For(ast::StmtFor { orelse, .. }) = stmt else { + return None; + }; + // The `else` block has to contain a single `return True` or `return False`. - let Stmt::Return(ast::StmtReturn { - value: next_value, + let [Stmt::Return(ast::StmtReturn { + value: Some(next_value), range: _, - }) = &orelse[0] + })] = orelse.as_slice() else { return None; }; - let Some(next_value) = next_value else { - return None; - }; let Expr::Constant(ast::ExprConstant { value: Constant::Bool(next_value), .. @@ -282,78 +325,41 @@ fn return_values_for_else(stmt: &Stmt) -> Option { return None; }; - Some(Loop { - return_value: *value, - next_return_value: *next_value, - test: nested_test, - target, - iter, - terminal: stmt.end(), + Some(Terminal { + return_value: *next_value, + stmt, }) } -/// Extract the returned boolean values from subsequent `Stmt::For` and -/// `Stmt::Return` statements, or `None`. -fn return_values_for_siblings<'a>(stmt: &'a Stmt, sibling: &'a Stmt) -> Option> { - let Stmt::For(ast::StmtFor { - body, - target, - iter, - orelse, - .. - }) = stmt - else { +/// If a `Stmt::For` is followed by a boolean `return`, return the [`Terminal`] representing that +/// `return`. +/// +/// For example, matches the `return` in: +/// ```python +/// for x in y: +/// if x == 0: +/// return True +/// else: +/// return False +/// ``` +fn match_sibling_return<'a>(stmt: &'a Stmt, sibling: &'a Stmt) -> Option> { + let Stmt::For(ast::StmtFor { orelse, .. }) = stmt else { return None; }; - // The loop itself should contain a single `if` statement, with a single `return - // True` or `return False`. - if body.len() != 1 { - return None; - } + // The loop itself shouldn't have an `else` block. if !orelse.is_empty() { return None; } - let Stmt::If(ast::StmtIf { - body: nested_body, - test: nested_test, - elif_else_clauses: nested_elif_else_clauses, - range: _, - }) = &body[0] - else { - return None; - }; - if nested_body.len() != 1 { - return None; - } - if !nested_elif_else_clauses.is_empty() { - return None; - } - let Stmt::Return(ast::StmtReturn { value, range: _ }) = &nested_body[0] else { - return None; - }; - let Some(value) = value else { - return None; - }; - let Expr::Constant(ast::ExprConstant { - value: Constant::Bool(value), - .. - }) = value.as_ref() - else { - return None; - }; // The next statement has to be a `return True` or `return False`. let Stmt::Return(ast::StmtReturn { - value: next_value, + value: Some(next_value), range: _, }) = &sibling else { return None; }; - let Some(next_value) = next_value else { - return None; - }; let Expr::Constant(ast::ExprConstant { value: Constant::Bool(next_value), .. @@ -362,13 +368,9 @@ fn return_values_for_siblings<'a>(stmt: &'a Stmt, sibling: &'a Stmt) -> Option(stmt: &'a Stmt, parent: &'a Stmt) -> Option<&'a Suite> { + match parent { + Stmt::FunctionDef(ast::StmtFunctionDef { body, .. }) => Some(body), + Stmt::AsyncFunctionDef(ast::StmtAsyncFunctionDef { body, .. }) => Some(body), + Stmt::ClassDef(ast::StmtClassDef { body, .. }) => Some(body), + Stmt::For(ast::StmtFor { body, orelse, .. }) => { + if body.contains(stmt) { + Some(body) + } else if orelse.contains(stmt) { + Some(orelse) + } else { + None + } + } + Stmt::AsyncFor(ast::StmtAsyncFor { body, orelse, .. }) => { + if body.contains(stmt) { + Some(body) + } else if orelse.contains(stmt) { + Some(orelse) + } else { + None + } + } + Stmt::While(ast::StmtWhile { body, orelse, .. }) => { + if body.contains(stmt) { + Some(body) + } else if orelse.contains(stmt) { + Some(orelse) + } else { + None + } + } + Stmt::If(ast::StmtIf { + body, + elif_else_clauses, + .. + }) => { + if body.contains(stmt) { + Some(body) + } else { + elif_else_clauses + .iter() + .map(|elif_else_clause| &elif_else_clause.body) + .find(|body| body.contains(stmt)) + } + } + Stmt::With(ast::StmtWith { body, .. }) => Some(body), + Stmt::AsyncWith(ast::StmtAsyncWith { body, .. }) => Some(body), + Stmt::Match(ast::StmtMatch { cases, .. }) => cases + .iter() + .map(|case| &case.body) + .find(|body| body.contains(stmt)), + Stmt::Try(ast::StmtTry { + body, + handlers, + orelse, + finalbody, + .. + }) => { + if body.contains(stmt) { + Some(body) + } else if orelse.contains(stmt) { + Some(orelse) + } else if finalbody.contains(stmt) { + Some(finalbody) + } else { + handlers + .iter() + .filter_map(ExceptHandler::as_except_handler) + .map(|handler| &handler.body) + .find(|body| body.contains(stmt)) + } + } + Stmt::TryStar(ast::StmtTryStar { + body, + handlers, + orelse, + finalbody, + .. + }) => { + if body.contains(stmt) { + Some(body) + } else if orelse.contains(stmt) { + Some(orelse) + } else if finalbody.contains(stmt) { + Some(finalbody) + } else { + handlers + .iter() + .filter_map(ExceptHandler::as_except_handler) + .map(|handler| &handler.body) + .find(|body| body.contains(stmt)) + } + } + _ => None, + } +} + +/// Given a [`Stmt`] and its containing [`Suite`], return the next [`Stmt`] in the [`Suite`]. +pub fn next_sibling<'a>(stmt: &'a Stmt, suite: &'a Suite) -> Option<&'a Stmt> { + let mut iter = suite.iter(); + while let Some(sibling) = iter.next() { + if sibling == stmt { + return iter.next(); + } + } + None +} diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index 53138dbf5d5c9f..0106dfcfc0d89a 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -108,10 +108,6 @@ pub struct SemanticModel<'a> { /// by way of the `global x` statement. rebinding_scopes: HashMap, BuildNoHashHasher>, - /// Body iteration; used to peek at siblings. - pub body: &'a [Stmt], - pub body_index: usize, - /// Flags for the semantic model. pub flags: SemanticModelFlags, @@ -137,8 +133,6 @@ impl<'a> SemanticModel<'a> { shadowed_bindings: IntMap::default(), delayed_annotations: IntMap::default(), rebinding_scopes: IntMap::default(), - body: &[], - body_index: 0, flags: SemanticModelFlags::new(path), handled_exceptions: Vec::default(), } @@ -757,11 +751,6 @@ impl<'a> SemanticModel<'a> { self.exprs.iter().rev().skip(1) } - /// Return the `Stmt` that immediately follows the current `Stmt`, if any. - pub fn sibling_stmt(&self) -> Option<&'a Stmt> { - self.body.get(self.body_index + 1) - } - /// Returns a reference to the global scope pub fn global_scope(&self) -> &Scope<'a> { self.scopes.global() From 7ffcd93afd95ad8962da9eb84eacea83767f4f8b Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 18 Jul 2023 20:43:12 -0400 Subject: [PATCH 6/9] Move unused deletion tracking to deferred analysis (#5852) ## Summary This PR moves the "unused exception" rule out of the visitor and into a deferred check. When we can base rules solely on the semantic model, we probably should, as it greatly simplifies the `Checker` itself. --- crates/ruff/src/checkers/ast/mod.rs | 65 +++++++++++++--------- crates/ruff/src/renamer.rs | 1 + crates/ruff/src/rules/pyflakes/fixes.rs | 9 +-- crates/ruff/src/rules/pyflakes/mod.rs | 2 +- crates/ruff_python_semantic/src/binding.rs | 18 +++++- 5 files changed, 62 insertions(+), 33 deletions(-) diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index d979952140c270..95ada177cc393d 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -4057,7 +4057,7 @@ where } // Step 2: Binding - let bindings = match except_handler { + let binding = match except_handler { ExceptHandler::ExceptHandler(ast::ExceptHandlerExceptHandler { type_: _, name, @@ -4066,17 +4066,17 @@ where }) => { if let Some(name) = name { // Store the existing binding, if any. - let existing_id = self.semantic.lookup_symbol(name.as_str()); + let binding_id = self.semantic.lookup_symbol(name.as_str()); // Add the bound exception name to the scope. - let binding_id = self.add_binding( + self.add_binding( name.as_str(), name.range(), - BindingKind::Assignment, + BindingKind::BoundException, BindingFlags::empty(), ); - Some((name, existing_id, binding_id)) + Some((name, binding_id)) } else { None } @@ -4087,30 +4087,11 @@ where walk_except_handler(self, except_handler); // Step 4: Clean-up - if let Some((name, existing_id, binding_id)) = bindings { - // If the exception name wasn't used in the scope, emit a diagnostic. - if !self.semantic.is_used(binding_id) { - if self.enabled(Rule::UnusedVariable) { - let mut diagnostic = Diagnostic::new( - pyflakes::rules::UnusedVariable { - name: name.to_string(), - }, - name.range(), - ); - if self.patch(Rule::UnusedVariable) { - diagnostic.try_set_fix(|| { - pyflakes::fixes::remove_exception_handler_assignment(name, self.locator) - .map(Fix::automatic) - }); - } - self.diagnostics.push(diagnostic); - } - } - + if let Some((name, binding_id)) = binding { self.add_binding( name.as_str(), name.range(), - BindingKind::UnboundException(existing_id), + BindingKind::UnboundException(binding_id), BindingFlags::empty(), ); } @@ -4797,6 +4778,37 @@ impl<'a> Checker<'a> { } } + /// Run any lint rules that operate over a single [`Binding`]. + fn check_bindings(&mut self) { + if !self.any_enabled(&[Rule::UnusedVariable]) { + return; + } + + for binding in self.semantic.bindings.iter() { + // F841 + if self.enabled(Rule::UnusedVariable) { + if binding.kind.is_bound_exception() && !binding.is_used() { + let mut diagnostic = Diagnostic::new( + pyflakes::rules::UnusedVariable { + name: binding.name(self.locator).to_string(), + }, + binding.range, + ); + if self.patch(Rule::UnusedVariable) { + diagnostic.try_set_fix(|| { + pyflakes::fixes::remove_exception_handler_assignment( + binding, + self.locator, + ) + .map(Fix::automatic) + }); + } + self.diagnostics.push(diagnostic); + } + } + } + } + fn check_deferred_scopes(&mut self) { if !self.any_enabled(&[ Rule::GlobalVariableNotAssigned, @@ -5475,6 +5487,7 @@ pub(crate) fn check_ast( checker.semantic.scope_id = ScopeId::global(); checker.deferred.scopes.push(ScopeId::global()); checker.check_deferred_scopes(); + checker.check_bindings(); checker.diagnostics } diff --git a/crates/ruff/src/renamer.rs b/crates/ruff/src/renamer.rs index 14aec66ef7ad9c..9274018a798452 100644 --- a/crates/ruff/src/renamer.rs +++ b/crates/ruff/src/renamer.rs @@ -245,6 +245,7 @@ impl Renamer { | BindingKind::NamedExprAssignment | BindingKind::UnpackedAssignment | BindingKind::Assignment + | BindingKind::BoundException | BindingKind::LoopVar | BindingKind::Global | BindingKind::Nonlocal(_) diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs index c6cdee0f5a3219..bad191b555e0b4 100644 --- a/crates/ruff/src/rules/pyflakes/fixes.rs +++ b/crates/ruff/src/rules/pyflakes/fixes.rs @@ -1,9 +1,10 @@ use anyhow::{Context, Ok, Result}; use ruff_text_size::TextRange; -use rustpython_parser::ast::{Expr, Identifier, Ranged}; +use rustpython_parser::ast::{Expr, Ranged}; use ruff_diagnostics::Edit; use ruff_python_ast::source_code::{Locator, Stylist}; +use ruff_python_semantic::Binding; use ruff_python_whitespace::{SimpleTokenizer, TokenKind}; use crate::autofix::codemods::CodegenStylist; @@ -90,12 +91,12 @@ pub(crate) fn remove_unused_positional_arguments_from_format_call( /// Generate a [`Edit`] to remove the binding from an exception handler. pub(crate) fn remove_exception_handler_assignment( - bound_exception: &Identifier, + bound_exception: &Binding, locator: &Locator, ) -> Result { // Lex backwards, to the token just before the `as`. let mut tokenizer = - SimpleTokenizer::up_to(bound_exception.start(), locator.contents()).skip_trivia(); + SimpleTokenizer::up_to(bound_exception.range.start(), locator.contents()).skip_trivia(); // Eat the `as` token. let preceding = tokenizer @@ -109,7 +110,7 @@ pub(crate) fn remove_exception_handler_assignment( .context("expected the exception name to be preceded by a token")?; // Lex forwards, to the `:` token. - let following = SimpleTokenizer::starts_at(bound_exception.end(), locator.contents()) + let following = SimpleTokenizer::starts_at(bound_exception.range.end(), locator.contents()) .next() .context("expected the exception name to be followed by a colon")?; debug_assert!(matches!(following.kind, TokenKind::Colon)); diff --git a/crates/ruff/src/rules/pyflakes/mod.rs b/crates/ruff/src/rules/pyflakes/mod.rs index 1f13edf9734575..885b4baf6b1df0 100644 --- a/crates/ruff/src/rules/pyflakes/mod.rs +++ b/crates/ruff/src/rules/pyflakes/mod.rs @@ -2115,7 +2115,7 @@ mod tests { try: pass except Exception as fu: pass "#, - &[Rule::UnusedVariable, Rule::RedefinedWhileUnused], + &[Rule::RedefinedWhileUnused, Rule::UnusedVariable], ); } diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs index 862be2b617021b..f66da2a80a8012 100644 --- a/crates/ruff_python_semantic/src/binding.rs +++ b/crates/ruff_python_semantic/src/binding.rs @@ -5,6 +5,7 @@ use ruff_text_size::TextRange; use rustpython_parser::ast::Ranged; use ruff_index::{newtype_index, IndexSlice, IndexVec}; +use ruff_python_ast::source_code::Locator; use crate::context::ExecutionContext; use crate::model::SemanticModel; @@ -163,6 +164,11 @@ impl<'a> Binding<'a> { } } + /// Returns the name of the binding (e.g., `x` in `x = 1`). + pub fn name<'b>(&self, locator: &'b Locator) -> &'b str { + locator.slice(self.range) + } + /// Returns the range of the binding's parent. pub fn parent_range(&self, semantic: &SemanticModel) -> Option { self.source @@ -417,7 +423,16 @@ pub enum BindingKind<'a> { /// ``` Deletion, - /// A binding to unbind the local variable, like `x` in: + /// A binding to bind an exception to a local variable, like `x` in: + /// ```python + /// try: + /// ... + /// except Exception as x: + /// ... + /// ``` + BoundException, + + /// A binding to unbind a bound local exception, like `x` in: /// ```python /// try: /// ... @@ -428,7 +443,6 @@ pub enum BindingKind<'a> { /// After the `except` block, `x` is unbound, despite the lack /// of an explicit `del` statement. /// - /// /// Stores the ID of the binding that was shadowed in the enclosing /// scope, if any. UnboundException(Option), From 626d8dc2cc6ae46d465cb8a236330700032b856d Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 18 Jul 2023 20:49:13 -0400 Subject: [PATCH 7/9] Use `.as_ref()` in lieu of `&**` (#5874) I find this less opaque (and often more succinct). --- crates/ruff/src/checkers/ast/mod.rs | 10 +++------- crates/ruff/src/rules/flake8_annotations/helpers.rs | 2 +- .../src/rules/pylint/rules/redefined_loop_name.rs | 2 +- crates/ruff_python_ast/src/all.rs | 6 +++--- crates/ruff_python_ast/src/comparable.rs | 12 ++++++------ crates/ruff_python_ast/src/source_code/generator.rs | 2 +- .../src/analyze/type_inference.rs | 4 ++-- 7 files changed, 17 insertions(+), 21 deletions(-) diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 95ada177cc393d..c8077d7ab98615 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -411,7 +411,7 @@ where stmt, name, decorator_list, - returns.as_ref().map(|expr| &**expr), + returns.as_ref().map(AsRef::as_ref), args, stmt.is_async_function_def_stmt(), ); @@ -470,18 +470,14 @@ where Rule::SuperfluousElseContinue, Rule::SuperfluousElseBreak, ]) { - flake8_return::rules::function( - self, - body, - returns.as_ref().map(|expr| &**expr), - ); + flake8_return::rules::function(self, body, returns.as_ref().map(AsRef::as_ref)); } if self.enabled(Rule::UselessReturn) { pylint::rules::useless_return( self, stmt, body, - returns.as_ref().map(|expr| &**expr), + returns.as_ref().map(AsRef::as_ref), ); } if self.enabled(Rule::ComplexStructure) { diff --git a/crates/ruff/src/rules/flake8_annotations/helpers.rs b/crates/ruff/src/rules/flake8_annotations/helpers.rs index c3db27fa2fec27..cf1139507269f7 100644 --- a/crates/ruff/src/rules/flake8_annotations/helpers.rs +++ b/crates/ruff/src/rules/flake8_annotations/helpers.rs @@ -26,7 +26,7 @@ pub(super) fn match_function_def( }) => ( name, args, - returns.as_ref().map(|expr| &**expr), + returns.as_ref().map(AsRef::as_ref), body, decorator_list, ), diff --git a/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs b/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs index c0536daee125e0..ec9fd9e28d2997 100644 --- a/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs +++ b/crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs @@ -274,7 +274,7 @@ fn assignment_targets_from_expr<'a>( ctx: ExprContext::Store, value, range: _, - }) => Box::new(iter::once(&**value)), + }) => Box::new(iter::once(value.as_ref())), Expr::Name(ast::ExprName { ctx: ExprContext::Store, id, diff --git a/crates/ruff_python_ast/src/all.rs b/crates/ruff_python_ast/src/all.rs index 09244135be5acc..903a56d7684992 100644 --- a/crates/ruff_python_ast/src/all.rs +++ b/crates/ruff_python_ast/src/all.rs @@ -65,7 +65,7 @@ where }) => { // Allow `tuple()` and `list()` calls. if keywords.is_empty() && args.len() <= 1 { - if let Expr::Name(ast::ExprName { id, .. }) = &**func { + if let Expr::Name(ast::ExprName { id, .. }) = func.as_ref() { let id = id.as_str(); if id == "tuple" || id == "list" { if is_builtin(id) { @@ -106,7 +106,7 @@ where Stmt::AugAssign(ast::StmtAugAssign { value, .. }) => Some(value), _ => None, } { - if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = &**value { + if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = value.as_ref() { let mut current_left = left; let mut current_right = right; loop { @@ -119,7 +119,7 @@ where // Process the left side, which can be a "real" value or the "rest" of the // binary operation. - if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = &**current_left { + if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = current_left.as_ref() { current_left = left; current_right = right; } else { diff --git a/crates/ruff_python_ast/src/comparable.rs b/crates/ruff_python_ast/src/comparable.rs index af78faf2a4d59e..ceb2a1c48bf4ee 100644 --- a/crates/ruff_python_ast/src/comparable.rs +++ b/crates/ruff_python_ast/src/comparable.rs @@ -274,7 +274,7 @@ impl<'a> From<&'a ast::Pattern> for ComparablePattern<'a> { impl<'a> From<&'a Box> for Box> { fn from(pattern: &'a Box) -> Self { - Box::new((&**pattern).into()) + Box::new((pattern.as_ref()).into()) } } @@ -362,13 +362,13 @@ impl<'a> From<&'a ast::Arguments> for ComparableArguments<'a> { impl<'a> From<&'a Box> for ComparableArguments<'a> { fn from(arguments: &'a Box) -> Self { - (&**arguments).into() + (arguments.as_ref()).into() } } impl<'a> From<&'a Box> for ComparableArg<'a> { fn from(arg: &'a Box) -> Self { - (&**arg).into() + (arg.as_ref()).into() } } @@ -685,13 +685,13 @@ pub enum ComparableExpr<'a> { impl<'a> From<&'a Box> for Box> { fn from(expr: &'a Box) -> Self { - Box::new((&**expr).into()) + Box::new((expr.as_ref()).into()) } } impl<'a> From<&'a Box> for ComparableExpr<'a> { fn from(expr: &'a Box) -> Self { - (&**expr).into() + (expr.as_ref()).into() } } @@ -737,7 +737,7 @@ impl<'a> From<&'a ast::Expr> for ComparableExpr<'a> { body, range: _range, }) => Self::Lambda(ExprLambda { - args: (&**args).into(), + args: (args.as_ref()).into(), body: body.into(), }), ast::Expr::IfExp(ast::ExprIfExp { diff --git a/crates/ruff_python_ast/src/source_code/generator.rs b/crates/ruff_python_ast/src/source_code/generator.rs index 5efc6fe0c07a14..972238903ff07a 100644 --- a/crates/ruff_python_ast/src/source_code/generator.rs +++ b/crates/ruff_python_ast/src/source_code/generator.rs @@ -1156,7 +1156,7 @@ impl<'a> Generator<'a> { range: _range, })], [], - ) = (&**args, &**keywords) + ) = (args.as_slice(), keywords.as_slice()) { // Ensure that a single generator doesn't get double-parenthesized. self.unparse_expr(elt, precedence::COMMA); diff --git a/crates/ruff_python_semantic/src/analyze/type_inference.rs b/crates/ruff_python_semantic/src/analyze/type_inference.rs index 6c7b0159e7bc3a..ff8e0fd60dc739 100644 --- a/crates/ruff_python_semantic/src/analyze/type_inference.rs +++ b/crates/ruff_python_semantic/src/analyze/type_inference.rs @@ -44,8 +44,8 @@ pub enum PythonType { impl From<&Expr> for PythonType { fn from(expr: &Expr) -> Self { match expr { - Expr::NamedExpr(ast::ExprNamedExpr { value, .. }) => (&**value).into(), - Expr::UnaryOp(ast::ExprUnaryOp { operand, .. }) => (&**operand).into(), + Expr::NamedExpr(ast::ExprNamedExpr { value, .. }) => (value.as_ref()).into(), + Expr::UnaryOp(ast::ExprUnaryOp { operand, .. }) => (operand.as_ref()).into(), Expr::Dict(_) => PythonType::Dict, Expr::DictComp(_) => PythonType::Dict, Expr::Set(_) => PythonType::Set, From 1181d25e5a5192ef2374b272f4fb8e24aa5e568f Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 18 Jul 2023 20:59:02 -0400 Subject: [PATCH 8/9] Move a few more candidate rules to the deferred `Binding`-only pass (#5853) ## Summary No behavior change, but this is in theory more efficient, since we can just iterate over the flat `Binding` vector rather than having to iterate over binding chains via the `Scope`. --- crates/ruff/src/checkers/ast/mod.rs | 46 +++++++------- .../rules/unconventional_import_alias.rs | 60 +++++++++---------- .../unaliased_collections_abc_set_import.rs | 48 +++++++-------- crates/ruff_python_semantic/src/binding.rs | 2 + crates/ruff_python_semantic/src/model.rs | 3 + 5 files changed, 82 insertions(+), 77 deletions(-) diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index c8077d7ab98615..a628befeb7b561 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -4776,7 +4776,11 @@ impl<'a> Checker<'a> { /// Run any lint rules that operate over a single [`Binding`]. fn check_bindings(&mut self) { - if !self.any_enabled(&[Rule::UnusedVariable]) { + if !self.any_enabled(&[ + Rule::UnusedVariable, + Rule::UnconventionalImportAlias, + Rule::UnaliasedCollectionsAbcSetImport, + ]) { return; } @@ -4802,6 +4806,27 @@ impl<'a> Checker<'a> { self.diagnostics.push(diagnostic); } } + + if self.enabled(Rule::UnconventionalImportAlias) { + if let Some(diagnostic) = + flake8_import_conventions::rules::unconventional_import_alias( + self, + binding, + &self.settings.flake8_import_conventions.aliases, + ) + { + self.diagnostics.push(diagnostic); + } + } + if self.is_stub { + if self.enabled(Rule::UnaliasedCollectionsAbcSetImport) { + if let Some(diagnostic) = + flake8_pyi::rules::unaliased_collections_abc_set_import(self, binding) + { + self.diagnostics.push(diagnostic); + } + } + } } } @@ -4814,8 +4839,6 @@ impl<'a> Checker<'a> { Rule::TypingOnlyFirstPartyImport, Rule::TypingOnlyStandardLibraryImport, Rule::TypingOnlyThirdPartyImport, - Rule::UnaliasedCollectionsAbcSetImport, - Rule::UnconventionalImportAlias, Rule::UndefinedExport, Rule::UndefinedLocalWithImportStarUsage, Rule::UndefinedLocalWithImportStarUsage, @@ -5115,23 +5138,6 @@ impl<'a> Checker<'a> { if self.enabled(Rule::UnusedImport) { pyflakes::rules::unused_import(self, scope, &mut diagnostics); } - if self.enabled(Rule::UnconventionalImportAlias) { - flake8_import_conventions::rules::unconventional_import_alias( - self, - scope, - &mut diagnostics, - &self.settings.flake8_import_conventions.aliases, - ); - } - if self.is_stub { - if self.enabled(Rule::UnaliasedCollectionsAbcSetImport) { - flake8_pyi::rules::unaliased_collections_abc_set_import( - self, - scope, - &mut diagnostics, - ); - } - } } self.diagnostics.extend(diagnostics); } diff --git a/crates/ruff/src/rules/flake8_import_conventions/rules/unconventional_import_alias.rs b/crates/ruff/src/rules/flake8_import_conventions/rules/unconventional_import_alias.rs index dc5876dac31795..4ee7dcbb0d6b3e 100644 --- a/crates/ruff/src/rules/flake8_import_conventions/rules/unconventional_import_alias.rs +++ b/crates/ruff/src/rules/flake8_import_conventions/rules/unconventional_import_alias.rs @@ -2,7 +2,7 @@ use rustc_hash::FxHashMap; use ruff_diagnostics::{AutofixKind, Diagnostic, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_semantic::Scope; +use ruff_python_semantic::Binding; use crate::checkers::ast::Checker; use crate::registry::AsRule; @@ -53,42 +53,38 @@ impl Violation for UnconventionalImportAlias { /// ICN001 pub(crate) fn unconventional_import_alias( checker: &Checker, - scope: &Scope, - diagnostics: &mut Vec, + binding: &Binding, conventions: &FxHashMap, ) -> Option { - for (name, binding_id) in scope.all_bindings() { - let binding = checker.semantic().binding(binding_id); + let Some(qualified_name) = binding.qualified_name() else { + return None; + }; - let Some(qualified_name) = binding.qualified_name() else { - continue; - }; + let Some(expected_alias) = conventions.get(qualified_name) else { + return None; + }; - let Some(expected_alias) = conventions.get(qualified_name) else { - continue; - }; - - if binding.is_alias() && name == expected_alias { - continue; - } + let name = binding.name(checker.locator); + if binding.is_alias() && name == expected_alias { + return None; + } - let mut diagnostic = Diagnostic::new( - UnconventionalImportAlias { - name: qualified_name.to_string(), - asname: expected_alias.to_string(), - }, - binding.range, - ); - if checker.patch(diagnostic.kind.rule()) { - if checker.semantic().is_available(expected_alias) { - diagnostic.try_set_fix(|| { - let (edit, rest) = - Renamer::rename(name, expected_alias, scope, checker.semantic())?; - Ok(Fix::suggested_edits(edit, rest)) - }); - } + let mut diagnostic = Diagnostic::new( + UnconventionalImportAlias { + name: qualified_name.to_string(), + asname: expected_alias.to_string(), + }, + binding.range, + ); + if checker.patch(diagnostic.kind.rule()) { + if checker.semantic().is_available(expected_alias) { + diagnostic.try_set_fix(|| { + let scope = &checker.semantic().scopes[binding.scope]; + let (edit, rest) = + Renamer::rename(name, expected_alias, scope, checker.semantic())?; + Ok(Fix::suggested_edits(edit, rest)) + }); } - diagnostics.push(diagnostic); } - None + Some(diagnostic) } diff --git a/crates/ruff/src/rules/flake8_pyi/rules/unaliased_collections_abc_set_import.rs b/crates/ruff/src/rules/flake8_pyi/rules/unaliased_collections_abc_set_import.rs index 5fb0a3f5f602c9..6ebd42e3a97cd6 100644 --- a/crates/ruff/src/rules/flake8_pyi/rules/unaliased_collections_abc_set_import.rs +++ b/crates/ruff/src/rules/flake8_pyi/rules/unaliased_collections_abc_set_import.rs @@ -1,6 +1,6 @@ use ruff_diagnostics::{AutofixKind, Diagnostic, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_semantic::{BindingKind, FromImport, Scope}; +use ruff_python_semantic::{Binding, BindingKind, FromImport}; use crate::checkers::ast::Checker; use crate::registry::AsRule; @@ -48,31 +48,29 @@ impl Violation for UnaliasedCollectionsAbcSetImport { /// PYI025 pub(crate) fn unaliased_collections_abc_set_import( checker: &Checker, - scope: &Scope, - diagnostics: &mut Vec, -) { - for (name, binding_id) in scope.all_bindings() { - let binding = checker.semantic().binding(binding_id); - let BindingKind::FromImport(FromImport { qualified_name }) = &binding.kind else { - continue; - }; - if qualified_name.as_str() != "collections.abc.Set" { - continue; - } - if name == "AbstractSet" { - continue; - } + binding: &Binding, +) -> Option { + let BindingKind::FromImport(FromImport { qualified_name }) = &binding.kind else { + return None; + }; + if qualified_name.as_str() != "collections.abc.Set" { + return None; + } + + let name = binding.name(checker.locator); + if name == "AbstractSet" { + return None; + } - let mut diagnostic = Diagnostic::new(UnaliasedCollectionsAbcSetImport, binding.range); - if checker.patch(diagnostic.kind.rule()) { - if checker.semantic().is_available("AbstractSet") { - diagnostic.try_set_fix(|| { - let (edit, rest) = - Renamer::rename(name, "AbstractSet", scope, checker.semantic())?; - Ok(Fix::suggested_edits(edit, rest)) - }); - } + let mut diagnostic = Diagnostic::new(UnaliasedCollectionsAbcSetImport, binding.range); + if checker.patch(diagnostic.kind.rule()) { + if checker.semantic().is_available("AbstractSet") { + diagnostic.try_set_fix(|| { + let scope = &checker.semantic().scopes[binding.scope]; + let (edit, rest) = Renamer::rename(name, "AbstractSet", scope, checker.semantic())?; + Ok(Fix::suggested_edits(edit, rest)) + }); } - diagnostics.push(diagnostic); } + Some(diagnostic) } diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs index f66da2a80a8012..45002b87006681 100644 --- a/crates/ruff_python_semantic/src/binding.rs +++ b/crates/ruff_python_semantic/src/binding.rs @@ -17,6 +17,8 @@ use crate::ScopeId; pub struct Binding<'a> { pub kind: BindingKind<'a>, pub range: TextRange, + /// The [`ScopeId`] of the scope in which the [`Binding`] was defined. + pub scope: ScopeId, /// The context in which the [`Binding`] was created. pub context: ExecutionContext, /// The statement in which the [`Binding`] was defined. diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index 0106dfcfc0d89a..d1aba8460dec17 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -189,6 +189,7 @@ impl<'a> SemanticModel<'a> { self.bindings.push(Binding { range: TextRange::default(), kind: BindingKind::Builtin, + scope: ScopeId::global(), references: Vec::new(), flags: BindingFlags::empty(), source: None, @@ -209,6 +210,7 @@ impl<'a> SemanticModel<'a> { kind, flags, references: Vec::new(), + scope: self.scope_id, source: self.stmt_id, context: self.execution_context(), exceptions: self.exceptions(), @@ -795,6 +797,7 @@ impl<'a> SemanticModel<'a> { kind: BindingKind::Assignment, range: *range, references: Vec::new(), + scope: self.scope_id, source: self.stmt_id, context: self.execution_context(), exceptions: self.exceptions(), From 7e6b472c5b1256a26bc589ca78acfe0b02713024 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 19 Jul 2023 09:29:35 +0530 Subject: [PATCH 9/9] Make `lint_only` aware of the source kind (#5876) --- crates/ruff/src/linter.rs | 3 ++- crates/ruff_benchmark/benches/linter.rs | 1 + crates/ruff_cli/src/diagnostics.rs | 20 ++++++++++++++++++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index bc13cc67b209b8..9a42d37a3fe5b0 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -321,6 +321,7 @@ pub fn lint_only( package: Option<&Path>, settings: &Settings, noqa: flags::Noqa, + source_kind: Option<&SourceKind>, ) -> LinterResult<(Vec, Option)> { // Tokenize once. let tokens: Vec = ruff_rustpython::tokenize(contents); @@ -353,7 +354,7 @@ pub fn lint_only( &directives, settings, noqa, - None, + source_kind, ); result.map(|(diagnostics, imports)| { diff --git a/crates/ruff_benchmark/benches/linter.rs b/crates/ruff_benchmark/benches/linter.rs index 98ea5c1bd33271..7abaa4fdafa251 100644 --- a/crates/ruff_benchmark/benches/linter.rs +++ b/crates/ruff_benchmark/benches/linter.rs @@ -63,6 +63,7 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &Settings) { None, settings, flags::Noqa::Enabled, + None, ); // Assert that file contains no parse errors diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index f994b7a055e05b..1d058b9b98ff14 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -204,12 +204,26 @@ pub(crate) fn lint_path( (result, fixed) } else { // If we fail to autofix, lint the original source code. - let result = lint_only(&contents, path, package, &settings.lib, noqa); + let result = lint_only( + &contents, + path, + package, + &settings.lib, + noqa, + Some(&source_kind), + ); let fixed = FxHashMap::default(); (result, fixed) } } else { - let result = lint_only(&contents, path, package, &settings.lib, noqa); + let result = lint_only( + &contents, + path, + package, + &settings.lib, + noqa, + Some(&source_kind), + ); let fixed = FxHashMap::default(); (result, fixed) }; @@ -316,6 +330,7 @@ pub(crate) fn lint_stdin( package, settings, noqa, + Some(&source_kind), ); let fixed = FxHashMap::default(); @@ -333,6 +348,7 @@ pub(crate) fn lint_stdin( package, settings, noqa, + Some(&source_kind), ); let fixed = FxHashMap::default(); (result, fixed)