From 2c84f911c4d57f3b436458f4d0e34fe10655cf76 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Mon, 30 Oct 2023 09:07:14 +0900 Subject: [PATCH] Preserve trailing statement semicolons when using `fmt: skip` (#8273) --- .../fixtures/ruff/fmt_skip/trailing_semi.py | 10 ++++++ .../src/statement/mod.rs | 26 +++++++++++++- crates/ruff_python_formatter/src/verbatim.rs | 12 ++++++- .../format@fmt_skip__trailing_semi.py.snap | 34 +++++++++++++++++++ crates/ruff_python_trivia/src/tokenizer.rs | 6 +++- 5 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 crates/ruff_python_formatter/resources/test/fixtures/ruff/fmt_skip/trailing_semi.py create mode 100644 crates/ruff_python_formatter/tests/snapshots/format@fmt_skip__trailing_semi.py.snap diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/fmt_skip/trailing_semi.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/fmt_skip/trailing_semi.py new file mode 100644 index 0000000000000..f50c08cf9f7f6 --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/fmt_skip/trailing_semi.py @@ -0,0 +1,10 @@ +x = 1; # fmt: skip + +x = 1 ; # fmt: skip + +x = 1 \ + ; # fmt: skip + +x = 1 # ; # fmt: skip + +_; #unrelated semicolon diff --git a/crates/ruff_python_formatter/src/statement/mod.rs b/crates/ruff_python_formatter/src/statement/mod.rs index 7bc0a4c27ff12..0e822acdaa452 100644 --- a/crates/ruff_python_formatter/src/statement/mod.rs +++ b/crates/ruff_python_formatter/src/statement/mod.rs @@ -1,5 +1,7 @@ use ruff_formatter::{FormatOwnedWithRule, FormatRefWithRule}; -use ruff_python_ast::Stmt; +use ruff_python_ast::{AnyNodeRef, Stmt}; +use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer}; +use ruff_text_size::{Ranged, TextRange}; use crate::prelude::*; @@ -81,3 +83,25 @@ impl<'ast> IntoFormat> for Stmt { FormatOwnedWithRule::new(self, FormatStmt) } } + +/// Returns the range of the semicolon terminating the statement or `None` if the statement +/// isn't terminated by a semicolon. +pub(super) fn trailing_semicolon(node: AnyNodeRef, source: &str) -> Option { + debug_assert!(node.is_statement()); + + let tokenizer = SimpleTokenizer::starts_at(node.end(), source); + + let next_token = tokenizer + .take_while(|token| !token.kind().is_comment()) + .find(|token| !token.kind().is_trivia()); + + if let Some(SimpleToken { + kind: SimpleTokenKind::Semi, + range, + }) = next_token + { + Some(range) + } else { + None + } +} diff --git a/crates/ruff_python_formatter/src/verbatim.rs b/crates/ruff_python_formatter/src/verbatim.rs index 4e2a532ce0f07..00f8e149609ec 100644 --- a/crates/ruff_python_formatter/src/verbatim.rs +++ b/crates/ruff_python_formatter/src/verbatim.rs @@ -17,6 +17,7 @@ use crate::comments::{leading_comments, trailing_comments, SourceComment}; use crate::prelude::*; use crate::statement::clause::ClauseHeader; use crate::statement::suite::SuiteChildStatement; +use crate::statement::trailing_semicolon; /// Disables formatting for all statements between the `first_suppressed` that has a leading `fmt: off` comment /// and the first trailing or leading `fmt: on` comment. The statements are formatted as they appear in the source code. @@ -902,6 +903,15 @@ impl Format> for FormatSuppressedNode<'_> { } } + // Some statements may end with a semicolon. Preserve the semicolon + let semicolon_range = self + .node + .is_statement() + .then(|| trailing_semicolon(self.node, f.context().source())) + .flatten(); + let verbatim_range = semicolon_range.map_or(self.node.range(), |semicolon| { + TextRange::new(self.node.start(), semicolon.end()) + }); comments.mark_verbatim_node_comments_formatted(self.node); // Write the outer comments and format the node as verbatim @@ -909,7 +919,7 @@ impl Format> for FormatSuppressedNode<'_> { f, [ leading_comments(node_comments.leading), - verbatim_text(self.node), + verbatim_text(verbatim_range), trailing_comments(node_comments.trailing) ] ) diff --git a/crates/ruff_python_formatter/tests/snapshots/format@fmt_skip__trailing_semi.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@fmt_skip__trailing_semi.py.snap new file mode 100644 index 0000000000000..79c46bf7fce7d --- /dev/null +++ b/crates/ruff_python_formatter/tests/snapshots/format@fmt_skip__trailing_semi.py.snap @@ -0,0 +1,34 @@ +--- +source: crates/ruff_python_formatter/tests/fixtures.rs +input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/fmt_skip/trailing_semi.py +--- +## Input +```py +x = 1; # fmt: skip + +x = 1 ; # fmt: skip + +x = 1 \ + ; # fmt: skip + +x = 1 # ; # fmt: skip + +_; #unrelated semicolon +``` + +## Output +```py +x = 1; # fmt: skip + +x = 1 ; # fmt: skip + +x = 1 \ + ; # fmt: skip + +x = 1 # ; # fmt: skip + +_ # unrelated semicolon +``` + + + diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs index 7f6835edfe0c6..f19b30ed4cfd3 100644 --- a/crates/ruff_python_trivia/src/tokenizer.rs +++ b/crates/ruff_python_trivia/src/tokenizer.rs @@ -473,7 +473,7 @@ pub enum SimpleTokenKind { } impl SimpleTokenKind { - const fn is_trivia(self) -> bool { + pub const fn is_trivia(self) -> bool { matches!( self, SimpleTokenKind::Whitespace @@ -482,6 +482,10 @@ impl SimpleTokenKind { | SimpleTokenKind::Continuation ) } + + pub const fn is_comment(self) -> bool { + matches!(self, SimpleTokenKind::Comment) + } } /// Simple zero allocation tokenizer handling most tokens.