diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json new file mode 100644 index 00000000000000..e3c32249eeffb6 --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json @@ -0,0 +1,8 @@ +[ + { + "preview": "enabled" + }, + { + "preview": "disabled" + } +] diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py index 017d243f1f08ea..4ea3243046b08b 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py @@ -62,3 +62,109 @@ x = f'''a{""}b''' y = f'''c{1}d"""e''' z = f'''a{""}b''' f'''c{1}d"""e''' + +# F-String formatting test cases (Preview) + +# Expression which does not exceed the line length limit +x = f"{a}" +x = f"{ + a = }" +x = f"{ # comment + a }" +x = f"{ # comment + a = }" + +# Remove the parenthese as adding them doesn't make then fit within the line length limit. +# This is +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd } cccccccccc" +) +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" + +# Multiple larger expressions which exceeds the line length limit. Here, we need to decide +# whether to split at the first or second expression. This should work similarly to the +# assignment statement formatting where we split from left to right. +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" + +# But, in this case, we would split at the first expression because there's already a +# comment which splits it. +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb # comment + } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" + +# Here, the expression part itself starts with a curly brace so we need to add an extra +# space between the opening curly brace and the expression. +x = f"{ {'x': 1, 'y': 2} }" +x = f"{ {'x': 1, 'y': 2} = }" +x = f"{ # comment + {'x': 1, 'y': 2} }" +x = f"{ # comment + {'x': 1, 'y': 2} = }" + +# But, in this case, we would split the expression itself because it exceeds the line +# length limit so we need not add the extra space. +x = f"{ {'aaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccc'} }" + +# Comments + +# comment 0 +f" +{ # comment 1 + # comment 2 + foo # comment 3 + # comment 4 +}" # comment 5 + +# Conversion flags +# +# This is not a valid Python code because of the additional whitespace between the `!` +# and conversion type. But, our parser isn't strict about this. This should probably be +# removed once we have a strict parser. +x = f"aaaaaaaaa { x ! r }" + +# Even in the case of debug expresions, we only need to preserve the whitespace within +# the expression part of the replacement field. +x = f"aaaaaaaaa { x = ! r }" + +# Combine conversion flags with format specifiers +x = f"{x = ! s + :>0 + + }" +# Well, this is new. There can be a comment after the format specifier but only if it's +# on it's own line. Refer to https://github.com/astral-sh/ruff/pull/7787 for more details. +# We'll format is as trailing comments. +x = f"{x !s + :>0 + # comment + }" + +x = f""" +{ # dangling comment 1 + x = :.0{y # dangling comment 2 + }f}""" + +# Here, the debug expression is in a nested f-string so we should start preserving +# whitespaces from that point onwards. This means we should format the outer f-string. +x = f"""{"foo " + # comment 1 + f"{ x = + + }" # comment 2 + } + """ + +# Mix of various features. +f"{ # dangling comment 1 + foo # after foo + :>{ + x # after x + } + # dangling comment 2 + # dangling comment 3 +} woah {x}" + +x = f"{f"{f'{x}'}"}" diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index 2d958ebee98f33..d8835ef011dc65 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -289,6 +289,18 @@ fn handle_enclosed_comment<'a>( } } AnyNodeRef::FString(fstring) => CommentPlacement::dangling(fstring, comment), + AnyNodeRef::FStringExpressionElement(_) => { + if matches!( + comment.preceding_node(), + Some( + AnyNodeRef::FStringExpressionElement(_) | AnyNodeRef::FStringLiteralElement(_) + ) + ) { + CommentPlacement::dangling(comment.enclosing_node(), comment) + } else { + handle_bracketed_end_of_line_comment(comment, locator) + } + } AnyNodeRef::ExprList(_) | AnyNodeRef::ExprSet(_) | AnyNodeRef::ExprListComp(_) diff --git a/crates/ruff_python_formatter/src/expression/expr_f_string.rs b/crates/ruff_python_formatter/src/expression/expr_f_string.rs index dcbb85520e9d1b..356131d42d722e 100644 --- a/crates/ruff_python_formatter/src/expression/expr_f_string.rs +++ b/crates/ruff_python_formatter/src/expression/expr_f_string.rs @@ -48,6 +48,24 @@ impl NeedsParentheses for ExprFString { ) -> OptionalParentheses { if self.value.is_implicit_concatenated() { OptionalParentheses::Multiline + // TODO(dhruvmanila): Ideally what we want here is a new variant which + // is something like: + // - If the expression fits by just adding the parentheses, then add them and + // avoid breaking the f-string expression. So, + // ``` + // xxxxxxxxx = ( + // f"aaaaaaaaaaaa { xxxxxxx + yyyyyyyy } bbbbbbbbbbbbb" + // ) + // ``` + // - But, if the expression is too long to fit even with parentheses, then + // don't add the parenthese and instead break the expression at `soft_line_break`. + // ``` + // xxxxxxxxx = f"aaaaaaaaaaaa { + // xxxxxxxxx + yyyyyyyyyy + // } bbbbbbbbbbbbb" + // ``` + // This isn't completely decided yet, refer to the relevant discussion: + // https://github.com/astral-sh/ruff/discussions/9785 } else if AnyString::FString(self).is_multiline(context.source()) { OptionalParentheses::Never } else { diff --git a/crates/ruff_python_formatter/src/options.rs b/crates/ruff_python_formatter/src/options.rs index 8deaf926e41125..7b74c7b0d5a3cc 100644 --- a/crates/ruff_python_formatter/src/options.rs +++ b/crates/ruff_python_formatter/src/options.rs @@ -466,3 +466,12 @@ pub enum PythonVersion { Py311, Py312, } + +impl PythonVersion { + /// Return `true` if the current version supports [PEP 701]. + /// + /// [PEP 701]: https://peps.python.org/pep-0701/ + pub fn supports_pep_701(self) -> bool { + self >= Self::Py312 + } +} diff --git a/crates/ruff_python_formatter/src/other/f_string.rs b/crates/ruff_python_formatter/src/other/f_string.rs index eb5458c1c83247..be6c633ca0c9c7 100644 --- a/crates/ruff_python_formatter/src/other/f_string.rs +++ b/crates/ruff_python_formatter/src/other/f_string.rs @@ -1,9 +1,15 @@ +use ruff_formatter::write; use ruff_python_ast::FString; use ruff_text_size::Ranged; use crate::prelude::*; use crate::preview::is_hex_codes_in_unicode_sequences_enabled; -use crate::string::{Quoting, StringNormalizer}; +use crate::preview::is_pep_701_enabled; +use crate::string::{ + choose_quotes, Quoting, StringNormalizer, StringPart, StringPrefix, StringQuotes, +}; + +use super::f_string_element::FormatFStringElement; /// Formats an f-string which is part of a larger f-string expression. /// @@ -25,27 +31,85 @@ impl<'a> FormatFString<'a> { impl Format> for FormatFString<'_> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { let locator = f.context().locator(); + let comments = f.context().comments().clone(); + + if !is_pep_701_enabled(f.context()) { + let result = StringNormalizer::from_source(self.value.range(), &locator) + .normalize( + self.quoting, + &locator, + f.options().quote_style(), + f.context().docstring(), + is_hex_codes_in_unicode_sequences_enabled(f.context()), + ) + .fmt(f); + self.value.elements.iter().for_each(|value| { + comments.mark_verbatim_node_comments_formatted(value.into()); + }); + return result; + } + + let string = StringPart::from_source(self.value.range(), &locator); + + // TODO(dhruvmanila): We could always use the same quotes for Python 3.12. + // But, care needs to be taken for when formatting inside a docstring. + let quotes = choose_quotes( + &string, + &locator, + self.quoting, + f.options().quote_style(), + f.context().docstring(), + ); + + let is_multiline = + memchr::memchr2(b'\n', b'\r', locator.slice(self.value).as_bytes()).is_some(); + let context = FStringContext::new(string.prefix(), quotes, is_multiline); + + // Starting prefix and quote + write!(f, [string.prefix(), quotes])?; + + format_with(|f| { + f.join() + .entries( + self.value + .elements + .iter() + .map(|element| FormatFStringElement::new(element, context)), + ) + .finish() + }) + .fmt(f)?; + + // Ending quote + quotes.fmt(f) + } +} + +#[derive(Clone, Copy, Debug)] +pub(crate) struct FStringContext { + prefix: StringPrefix, + quotes: StringQuotes, + is_multiline: bool, +} + +impl FStringContext { + const fn new(prefix: StringPrefix, quotes: StringQuotes, is_multiline: bool) -> Self { + Self { + prefix, + quotes, + is_multiline, + } + } + + pub(crate) const fn quotes(self) -> StringQuotes { + self.quotes + } + + pub(crate) const fn prefix(self) -> StringPrefix { + self.prefix + } - let result = StringNormalizer::from_source(self.value.range(), &locator) - .normalize( - self.quoting, - &locator, - f.options().quote_style(), - f.context().docstring(), - is_hex_codes_in_unicode_sequences_enabled(f.context()), - ) - .fmt(f); - - // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings. - // This is to mark all of those comments as formatted but we need to - // figure out how to handle them. Note that this needs to be done only - // after the f-string is formatted, so only for all the non-formatted - // comments. - let comments = f.context().comments(); - self.value.elements.iter().for_each(|value| { - comments.mark_verbatim_node_comments_formatted(value.into()); - }); - - result + pub(crate) const fn should_remove_soft_line_breaks(self) -> bool { + !self.is_multiline } } diff --git a/crates/ruff_python_formatter/src/other/f_string_element.rs b/crates/ruff_python_formatter/src/other/f_string_element.rs new file mode 100644 index 00000000000000..7f59fda57eb37e --- /dev/null +++ b/crates/ruff_python_formatter/src/other/f_string_element.rs @@ -0,0 +1,222 @@ +use std::borrow::Cow; + +use ruff_formatter::{write, RemoveSoftLinesBuffer}; +use ruff_python_ast::{ + ConversionFlag, Expr, FStringElement, FStringExpressionElement, FStringLiteralElement, +}; +use ruff_text_size::Ranged; + +use crate::comments::{dangling_open_parenthesis_comments, trailing_comments}; +use crate::context::{NodeLevel, WithNodeLevel}; +use crate::prelude::*; +use crate::preview::is_hex_codes_in_unicode_sequences_enabled; +use crate::string::normalize_string; +use crate::verbatim::suppressed_node; + +use super::f_string::FStringContext; + +/// Formats an f-string element which is either a literal or a formatted expression. +/// +/// This delegates the actual formatting to the appropriate formatter. +pub(crate) struct FormatFStringElement<'a> { + element: &'a FStringElement, + context: FStringContext, +} + +impl<'a> FormatFStringElement<'a> { + pub(crate) fn new(element: &'a FStringElement, context: FStringContext) -> Self { + Self { element, context } + } +} + +impl Format> for FormatFStringElement<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + match self.element { + FStringElement::Literal(string_literal) => { + FormatFStringLiteralElement::new(string_literal, self.context).fmt(f) + } + FStringElement::Expression(expression) => { + FormatFStringExpressionElement::new(expression, self.context).fmt(f) + } + } + } +} + +pub(crate) struct FormatFStringLiteralElement<'a> { + element: &'a FStringLiteralElement, + context: FStringContext, +} + +impl<'a> FormatFStringLiteralElement<'a> { + pub(crate) fn new(element: &'a FStringLiteralElement, context: FStringContext) -> Self { + Self { element, context } + } +} + +impl Format> for FormatFStringLiteralElement<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + let literal_content = f.context().locator().slice(self.element.range()); + let normalized = normalize_string( + literal_content, + self.context.quotes(), + self.context.prefix(), + is_hex_codes_in_unicode_sequences_enabled(f.context()), + ); + match &normalized { + Cow::Borrowed(_) => source_text_slice(self.element.range()).fmt(f), + Cow::Owned(normalized) => text(normalized).fmt(f), + } + } +} + +pub(crate) struct FormatFStringExpressionElement<'a> { + element: &'a FStringExpressionElement, + context: FStringContext, +} + +impl<'a> FormatFStringExpressionElement<'a> { + pub(crate) fn new(element: &'a FStringExpressionElement, context: FStringContext) -> Self { + Self { element, context } + } +} + +impl Format> for FormatFStringExpressionElement<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + let FStringExpressionElement { + expression, + debug_text, + conversion, + format_spec, + .. + } = self.element; + + let comments = f.context().comments().clone(); + + if let Some(debug_text) = debug_text { + token("{").fmt(f)?; + + // If debug text is present in a f-string, we'll mark all of the comments + // in this f-string as formatted. + comments.mark_verbatim_node_comments_formatted(self.element.into()); + + write!( + f, + [ + text(&debug_text.leading), + suppressed_node(&**expression), + text(&debug_text.trailing), + ] + )?; + + // Even if debug text is present, any whitespace between the + // conversion flag and the format spec doesn't need to be preserved. + match conversion { + ConversionFlag::Str => text("!s").fmt(f)?, + ConversionFlag::Ascii => text("!a").fmt(f)?, + ConversionFlag::Repr => text("!r").fmt(f)?, + ConversionFlag::None => (), + } + + if let Some(format_spec) = format_spec.as_deref() { + write!(f, [token(":"), suppressed_node(format_spec)])?; + } + + token("}").fmt(f) + } else { + let dangling_item_comments = comments.dangling(self.element); + let (dangling_open_parentheses_comments, trailing_format_spec_comments) = + dangling_item_comments.split_at( + dangling_item_comments + .partition_point(|comment| comment.start() < expression.start()), + ); + + let item = format_with(|f| { + let line_break_or_space = match expression.as_ref() { + // If an expression starts with a `{`, we need to add a space before the + // curly brace to avoid turning it into a literal curly with `{{`. + // + // For example, + // ```python + // f"{ {'x': 1, 'y': 2} }" + // # ^ ^ + // ``` + // + // We need to preserve the space highlighted by `^`. + Expr::Dict(_) | Expr::DictComp(_) | Expr::Set(_) | Expr::SetComp(_) => { + Some(soft_line_break_or_space()) + } + _ => None, + }; + + write!(f, [line_break_or_space, expression.format()])?; + + // Conversion comes first, then the format spec. + match conversion { + ConversionFlag::Str => text("!s").fmt(f)?, + ConversionFlag::Ascii => text("!a").fmt(f)?, + ConversionFlag::Repr => text("!r").fmt(f)?, + ConversionFlag::None => (), + } + + if let Some(format_spec) = format_spec.as_deref() { + let elements = + format_with(|f| { + f.join() + .entries(format_spec.elements.iter().map(|element| { + FormatFStringElement::new(element, self.context) + })) + .finish() + }); + write!( + f, + [ + token(":"), + elements, + trailing_comments(trailing_format_spec_comments) + ] + )?; + } + + line_break_or_space.fmt(f) + }); + + let indented = format_with(|f| { + let mut buffer = RemoveSoftLinesBuffer::new(f); + + if dangling_open_parentheses_comments.is_empty() { + if self.context.should_remove_soft_line_breaks() { + write!(buffer, [&soft_block_indent(&item)]) + } else { + write!(f, [&soft_block_indent(&item)]) + } + } else { + if self.context.should_remove_soft_line_breaks() { + write!( + buffer, + [ + dangling_open_parenthesis_comments( + dangling_open_parentheses_comments + ), + group(&item) + ] + ) + } else { + write!( + f, + [ + dangling_open_parenthesis_comments( + dangling_open_parentheses_comments + ), + soft_block_indent(&item), + ] + ) + } + } + }); + + let mut f = WithNodeLevel::new(NodeLevel::ParenthesizedExpression, f); + + write!(f, [token("{"), indented, token("}")]) + } + } +} diff --git a/crates/ruff_python_formatter/src/other/mod.rs b/crates/ruff_python_formatter/src/other/mod.rs index d07339f717cbf5..2aace837913c4f 100644 --- a/crates/ruff_python_formatter/src/other/mod.rs +++ b/crates/ruff_python_formatter/src/other/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod decorator; pub(crate) mod elif_else_clause; pub(crate) mod except_handler_except_handler; pub(crate) mod f_string; +pub(crate) mod f_string_element; pub(crate) mod f_string_part; pub(crate) mod identifier; pub(crate) mod keyword; diff --git a/crates/ruff_python_formatter/src/preview.rs b/crates/ruff_python_formatter/src/preview.rs index 712a7da4170881..f4a63921549c17 100644 --- a/crates/ruff_python_formatter/src/preview.rs +++ b/crates/ruff_python_formatter/src/preview.rs @@ -81,3 +81,8 @@ pub(crate) const fn is_multiline_string_handling_enabled(context: &PyFormatConte pub(crate) const fn is_format_module_docstring_enabled(context: &PyFormatContext) -> bool { context.is_preview() } + +/// Returns `true` if the [`PEP 701`](https://github.com/astral-sh/ruff/issues/7594) preview style is enabled. +pub(crate) fn is_pep_701_enabled(context: &PyFormatContext) -> bool { + context.is_preview() +}