Preview minimal f-string formatting

astral-sh · Feb 12, 2024 · 4216d2e · 4216d2e
1 parent 8627f40
commit 4216d2e
Show file tree

Hide file tree

Showing 10 changed files with 1,217 additions and 23 deletions.
diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json
@@ -0,0 +1,8 @@
+[
+  {
+    "preview": "enabled"
+  },
+  {
+    "preview": "disabled"
+  }
+]
diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py
@@ -62,3 +62,138 @@
 x = f'''a{""}b'''
 y = f'''c{1}d"""e'''
 z = f'''a{""}b''' f'''c{1}d"""e'''
+
+# F-String formatting test cases (Preview)
+
+# Simple expression with a mix of debug expression and comments.
+x = f"{a}"
+x = f"{
+    a = }"
+x = f"{ # comment
+    a }"
+x = f"{   # comment
+    a = }"
+
+# Remove the parentheses as adding them doesn't make then fit within the line length limit.
+# This is similar to how we format it before f-string formatting.
+aaaaaaaaaaa = (
+    f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd } cccccccccc"
+)
+# Here, we would use the best fit layout to put the f-string indented on the next line
+# similar to the next example.
+aaaaaaaaaaa = f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc"
+aaaaaaaaaaa = (
+    f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc"
+)
+
+# This should never add the optional parentheses because even after adding them, the
+# f-string exceeds the line length limit.
+x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc"
+x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc"
+x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment
+                                             "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc"
+x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment
+                                             "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc"
+
+# Multiple larger expressions which exceeds the line length limit. Here, we need to decide
+# whether to split at the first or second expression. This should work similarly to the
+# assignment statement formatting where we split from right to left in preview mode.
+x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee"
+
+# The above example won't split but when we start introducing line breaks:
+x = f"aaaaaaaaaaaa {
+        bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee"
+x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb
+                    } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee"
+x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc {
+        ddddddddddddddd } eeeeeeeeeeeeee"
+x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd
+                                                            } eeeeeeeeeeeeee"
+
+# But, in case comments are present, we would split at the expression containing the
+# comments:
+x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb # comment
+                    } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee"
+x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb
+                    } cccccccccccccccccccc { # comment
+                                            ddddddddddddddd } eeeeeeeeeeeeee"
+
+# Here, the expression part itself starts with a curly brace so we need to add an extra
+# space between the opening curly brace and the expression.
+x = f"{ {'x': 1, 'y': 2} }"
+# Although the extra space isn't required before the ending curly brace, we add it for
+# consistency.
+x = f"{ {'x': 1, 'y': 2}}"
+x = f"{ {'x': 1, 'y': 2} = }"
+x = f"{  # comment
+    {'x': 1, 'y': 2} }"
+x = f"{    # comment
+    {'x': 1, 'y': 2} = }"
+
+# But, in this case, we would split the expression itself because it exceeds the line
+# length limit so we need not add the extra space.
+xxxxxxx = f"{
+    {'aaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbb', 'ccccccccccccccccccccc'}
+}"
+# And, split the expression itself because it exceeds the line length.
+xxxxxxx = f"{
+    {'aaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccc'}
+}"
+
+# Comments
+
+# No comments should be dropped!
+f"{ # comment 1
+    # comment 2
+    foo # comment 3
+    # comment 4
+}"  # comment 5
+# comment 6
+
+# Conversion flags
+#
+# This is not a valid Python code because of the additional whitespace between the `!`
+# and conversion type. But, our parser isn't strict about this. This should probably be
+# removed once we have a strict parser.
+x = f"aaaaaaaaa { x !  r }"
+
+# Even in the case of debug expresions, we only need to preserve the whitespace within
+# the expression part of the replacement field.
+x = f"aaaaaaaaa { x   = !  r  }"
+
+# Combine conversion flags with format specifiers
+x = f"{x   =   !  s
+         :>0
+
+         }"
+# This is interesting. There can be a comment after the format specifier but only if it's
+# on it's own line. Refer to https://github.com/astral-sh/ruff/pull/7787 for more details.
+# We'll format is as trailing comments.
+x = f"{x  !s
+         :>0
+         # comment
+         }"
+
+x = f"""
+{              # dangling comment 1
+ x =   :.0{y # dangling comment 2
+           }f}"""
+
+# Here, the debug expression is in a nested f-string so we should start preserving
+# whitespaces from that point onwards. This means we should format the outer f-string.
+x = f"""{"foo " +    # comment 1
+    f"{   x =
+
+       }"    # comment 2
+ }
+        """
+
+# Mix of various features.
+f"{  # dangling comment 1
+    foo # after foo
+   :>{
+          x # after x
+          }
+    # dangling comment 2
+    # dangling comment 3
+} woah {x}"
diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs
@@ -289,6 +289,18 @@ fn handle_enclosed_comment<'a>(
             }
         }
         AnyNodeRef::FString(fstring) => CommentPlacement::dangling(fstring, comment),
+        AnyNodeRef::FStringExpressionElement(_) => {
+            if matches!(
+                comment.preceding_node(),
+                Some(
+                    AnyNodeRef::FStringExpressionElement(_) | AnyNodeRef::FStringLiteralElement(_)
+                )
+            ) {
+                CommentPlacement::dangling(comment.enclosing_node(), comment)
+            } else {
+                handle_bracketed_end_of_line_comment(comment, locator)
+            }
+        }
         AnyNodeRef::ExprList(_)
         | AnyNodeRef::ExprSet(_)
         | AnyNodeRef::ExprListComp(_)

diff --git a/crates/ruff_python_formatter/src/expression/expr_f_string.rs b/crates/ruff_python_formatter/src/expression/expr_f_string.rs
@@ -48,6 +48,24 @@ impl NeedsParentheses for ExprFString {
     ) -> OptionalParentheses {
         if self.value.is_implicit_concatenated() {
             OptionalParentheses::Multiline
+        // TODO(dhruvmanila): Ideally what we want here is a new variant which
+        // is something like:
+        // - If the expression fits by just adding the parentheses, then add them and
+        //   avoid breaking the f-string expression. So,
+        //   ```
+        //   xxxxxxxxx = (
+        //       f"aaaaaaaaaaaa { xxxxxxx + yyyyyyyy } bbbbbbbbbbbbb"
+        //   )
+        //   ```
+        // - But, if the expression is too long to fit even with parentheses, then
+        //   don't add the parenthese and instead break the expression at `soft_line_break`.
+        //   ```
+        //   xxxxxxxxx = f"aaaaaaaaaaaa {
+        //       xxxxxxxxx + yyyyyyyyyy
+        //   } bbbbbbbbbbbbb"
+        //   ```
+        // This isn't completely decided yet, refer to the relevant discussion:
+        // https://github.com/astral-sh/ruff/discussions/9785
         } else if AnyString::FString(self).is_multiline(context.source()) {
             OptionalParentheses::Never
         } else {

diff --git a/crates/ruff_python_formatter/src/options.rs b/crates/ruff_python_formatter/src/options.rs
@@ -466,3 +466,12 @@ pub enum PythonVersion {
     Py311,
     Py312,
 }
+
+impl PythonVersion {
+    /// Return `true` if the current version supports [PEP 701].
+    ///
+    /// [PEP 701]: https://peps.python.org/pep-0701/
+    pub fn supports_pep_701(self) -> bool {
+        self >= Self::Py312
+    }
+}
diff --git a/crates/ruff_python_formatter/src/other/f_string.rs b/crates/ruff_python_formatter/src/other/f_string.rs
@@ -1,9 +1,15 @@
+use ruff_formatter::write;
 use ruff_python_ast::FString;
 use ruff_text_size::Ranged;
 
 use crate::prelude::*;
 use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
-use crate::string::{Quoting, StringNormalizer};
+use crate::preview::is_pep_701_enabled;
+use crate::string::{
+    choose_quotes, Quoting, StringNormalizer, StringPart, StringPrefix, StringQuotes,
+};
+
+use super::f_string_element::FormatFStringElement;
 
 /// Formats an f-string which is part of a larger f-string expression.
 ///
@@ -25,27 +31,85 @@ impl<'a> FormatFString<'a> {
 impl Format<PyFormatContext<'_>> for FormatFString<'_> {
     fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
         let locator = f.context().locator();
+        let comments = f.context().comments().clone();
+
+        if !is_pep_701_enabled(f.context()) {
+            let result = StringNormalizer::from_source(self.value.range(), &locator)
+                .normalize(
+                    self.quoting,
+                    &locator,
+                    f.options().quote_style(),
+                    f.context().docstring(),
+                    is_hex_codes_in_unicode_sequences_enabled(f.context()),
+                )
+                .fmt(f);
+            self.value.elements.iter().for_each(|value| {
+                comments.mark_verbatim_node_comments_formatted(value.into());
+            });
+            return result;
+        }
+
+        let string = StringPart::from_source(self.value.range(), &locator);
+
+        // TODO(dhruvmanila): We could always use the same quotes for Python 3.12.
+        // But, care needs to be taken for when formatting inside a docstring.
+        let quotes = choose_quotes(
+            &string,
+            &locator,
+            self.quoting,
+            f.options().quote_style(),
+            f.context().docstring(),
+        );
+
+        let is_multiline =
+            memchr::memchr2(b'\n', b'\r', locator.slice(self.value).as_bytes()).is_some();
+        let context = FStringContext::new(string.prefix(), quotes, is_multiline);
+
+        // Starting prefix and quote
+        write!(f, [string.prefix(), quotes])?;
+
+        format_with(|f| {
+            f.join()
+                .entries(
+                    self.value
+                        .elements
+                        .iter()
+                        .map(|element| FormatFStringElement::new(element, context)),
+                )
+                .finish()
+        })
+        .fmt(f)?;
+
+        // Ending quote
+        quotes.fmt(f)
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct FStringContext {
+    prefix: StringPrefix,
+    quotes: StringQuotes,
+    is_multiline: bool,
+}
+
+impl FStringContext {
+    const fn new(prefix: StringPrefix, quotes: StringQuotes, is_multiline: bool) -> Self {
+        Self {
+            prefix,
+            quotes,
+            is_multiline,
+        }
+    }
+
+    pub(crate) const fn quotes(self) -> StringQuotes {
+        self.quotes
+    }
+
+    pub(crate) const fn prefix(self) -> StringPrefix {
+        self.prefix
+    }
 
-        let result = StringNormalizer::from_source(self.value.range(), &locator)
-            .normalize(
-                self.quoting,
-                &locator,
-                f.options().quote_style(),
-                f.context().docstring(),
-                is_hex_codes_in_unicode_sequences_enabled(f.context()),
-            )
-            .fmt(f);
-
-        // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
-        // This is to mark all of those comments as formatted but we need to
-        // figure out how to handle them. Note that this needs to be done only
-        // after the f-string is formatted, so only for all the non-formatted
-        // comments.
-        let comments = f.context().comments();
-        self.value.elements.iter().for_each(|value| {
-            comments.mark_verbatim_node_comments_formatted(value.into());
-        });
-
-        result
+    pub(crate) const fn should_remove_soft_line_breaks(self) -> bool {
+        !self.is_multiline
     }
 }