Skip to content

Commit

Permalink
Preview minimal f-string formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Feb 12, 2024
1 parent 8627f40 commit 4216d2e
Show file tree
Hide file tree
Showing 10 changed files with 1,217 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[
{
"preview": "enabled"
},
{
"preview": "disabled"
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,138 @@
x = f'''a{""}b'''
y = f'''c{1}d"""e'''
z = f'''a{""}b''' f'''c{1}d"""e'''

# F-String formatting test cases (Preview)

# Simple expression with a mix of debug expression and comments.
x = f"{a}"
x = f"{
a = }"
x = f"{ # comment
a }"
x = f"{ # comment
a = }"

# Remove the parentheses as adding them doesn't make then fit within the line length limit.
# This is similar to how we format it before f-string formatting.
aaaaaaaaaaa = (
f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd } cccccccccc"
)
# Here, we would use the best fit layout to put the f-string indented on the next line
# similar to the next example.
aaaaaaaaaaa = f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc"
aaaaaaaaaaa = (
f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc"
)

# This should never add the optional parentheses because even after adding them, the
# f-string exceeds the line length limit.
x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc"
x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc"
x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment
"bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc"
x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment
"bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc"

# Multiple larger expressions which exceeds the line length limit. Here, we need to decide
# whether to split at the first or second expression. This should work similarly to the
# assignment statement formatting where we split from right to left in preview mode.
x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee"

# The above example won't split but when we start introducing line breaks:
x = f"aaaaaaaaaaaa {
bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee"
x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb
} cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee"
x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc {
ddddddddddddddd } eeeeeeeeeeeeee"
x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd
} eeeeeeeeeeeeee"

# But, in case comments are present, we would split at the expression containing the
# comments:
x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb # comment
} cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee"
x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb
} cccccccccccccccccccc { # comment
ddddddddddddddd } eeeeeeeeeeeeee"

# Here, the expression part itself starts with a curly brace so we need to add an extra
# space between the opening curly brace and the expression.
x = f"{ {'x': 1, 'y': 2} }"
# Although the extra space isn't required before the ending curly brace, we add it for
# consistency.
x = f"{ {'x': 1, 'y': 2}}"
x = f"{ {'x': 1, 'y': 2} = }"
x = f"{ # comment
{'x': 1, 'y': 2} }"
x = f"{ # comment
{'x': 1, 'y': 2} = }"

# But, in this case, we would split the expression itself because it exceeds the line
# length limit so we need not add the extra space.
xxxxxxx = f"{
{'aaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbb', 'ccccccccccccccccccccc'}
}"
# And, split the expression itself because it exceeds the line length.
xxxxxxx = f"{
{'aaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccc'}
}"

# Comments

# No comments should be dropped!
f"{ # comment 1
# comment 2
foo # comment 3
# comment 4
}" # comment 5
# comment 6

# Conversion flags
#
# This is not a valid Python code because of the additional whitespace between the `!`
# and conversion type. But, our parser isn't strict about this. This should probably be
# removed once we have a strict parser.
x = f"aaaaaaaaa { x ! r }"

# Even in the case of debug expresions, we only need to preserve the whitespace within
# the expression part of the replacement field.
x = f"aaaaaaaaa { x = ! r }"

# Combine conversion flags with format specifiers
x = f"{x = ! s
:>0
}"
# This is interesting. There can be a comment after the format specifier but only if it's
# on it's own line. Refer to https://github.com/astral-sh/ruff/pull/7787 for more details.
# We'll format is as trailing comments.
x = f"{x !s
:>0
# comment
}"

x = f"""
{ # dangling comment 1
x = :.0{y # dangling comment 2
}f}"""

# Here, the debug expression is in a nested f-string so we should start preserving
# whitespaces from that point onwards. This means we should format the outer f-string.
x = f"""{"foo " + # comment 1
f"{ x =
}" # comment 2
}
"""

# Mix of various features.
f"{ # dangling comment 1
foo # after foo
:>{
x # after x
}
# dangling comment 2
# dangling comment 3
} woah {x}"
12 changes: 12 additions & 0 deletions crates/ruff_python_formatter/src/comments/placement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,18 @@ fn handle_enclosed_comment<'a>(
}
}
AnyNodeRef::FString(fstring) => CommentPlacement::dangling(fstring, comment),
AnyNodeRef::FStringExpressionElement(_) => {
if matches!(
comment.preceding_node(),
Some(
AnyNodeRef::FStringExpressionElement(_) | AnyNodeRef::FStringLiteralElement(_)
)
) {
CommentPlacement::dangling(comment.enclosing_node(), comment)
} else {
handle_bracketed_end_of_line_comment(comment, locator)
}
}
AnyNodeRef::ExprList(_)
| AnyNodeRef::ExprSet(_)
| AnyNodeRef::ExprListComp(_)
Expand Down
18 changes: 18 additions & 0 deletions crates/ruff_python_formatter/src/expression/expr_f_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,24 @@ impl NeedsParentheses for ExprFString {
) -> OptionalParentheses {
if self.value.is_implicit_concatenated() {
OptionalParentheses::Multiline
// TODO(dhruvmanila): Ideally what we want here is a new variant which
// is something like:
// - If the expression fits by just adding the parentheses, then add them and
// avoid breaking the f-string expression. So,
// ```
// xxxxxxxxx = (
// f"aaaaaaaaaaaa { xxxxxxx + yyyyyyyy } bbbbbbbbbbbbb"
// )
// ```
// - But, if the expression is too long to fit even with parentheses, then
// don't add the parenthese and instead break the expression at `soft_line_break`.
// ```
// xxxxxxxxx = f"aaaaaaaaaaaa {
// xxxxxxxxx + yyyyyyyyyy
// } bbbbbbbbbbbbb"
// ```
// This isn't completely decided yet, refer to the relevant discussion:
// https://github.com/astral-sh/ruff/discussions/9785
} else if AnyString::FString(self).is_multiline(context.source()) {
OptionalParentheses::Never
} else {
Expand Down
9 changes: 9 additions & 0 deletions crates/ruff_python_formatter/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,3 +466,12 @@ pub enum PythonVersion {
Py311,
Py312,
}

impl PythonVersion {
/// Return `true` if the current version supports [PEP 701].
///
/// [PEP 701]: https://peps.python.org/pep-0701/
pub fn supports_pep_701(self) -> bool {
self >= Self::Py312
}
}
108 changes: 86 additions & 22 deletions crates/ruff_python_formatter/src/other/f_string.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
use ruff_formatter::write;
use ruff_python_ast::FString;
use ruff_text_size::Ranged;

use crate::prelude::*;
use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
use crate::string::{Quoting, StringNormalizer};
use crate::preview::is_pep_701_enabled;
use crate::string::{
choose_quotes, Quoting, StringNormalizer, StringPart, StringPrefix, StringQuotes,
};

use super::f_string_element::FormatFStringElement;

/// Formats an f-string which is part of a larger f-string expression.
///
Expand All @@ -25,27 +31,85 @@ impl<'a> FormatFString<'a> {
impl Format<PyFormatContext<'_>> for FormatFString<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
let comments = f.context().comments().clone();

if !is_pep_701_enabled(f.context()) {
let result = StringNormalizer::from_source(self.value.range(), &locator)
.normalize(
self.quoting,
&locator,
f.options().quote_style(),
f.context().docstring(),
is_hex_codes_in_unicode_sequences_enabled(f.context()),
)
.fmt(f);
self.value.elements.iter().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});
return result;
}

let string = StringPart::from_source(self.value.range(), &locator);

// TODO(dhruvmanila): We could always use the same quotes for Python 3.12.
// But, care needs to be taken for when formatting inside a docstring.
let quotes = choose_quotes(
&string,
&locator,
self.quoting,
f.options().quote_style(),
f.context().docstring(),
);

let is_multiline =
memchr::memchr2(b'\n', b'\r', locator.slice(self.value).as_bytes()).is_some();
let context = FStringContext::new(string.prefix(), quotes, is_multiline);

// Starting prefix and quote
write!(f, [string.prefix(), quotes])?;

format_with(|f| {
f.join()
.entries(
self.value
.elements
.iter()
.map(|element| FormatFStringElement::new(element, context)),
)
.finish()
})
.fmt(f)?;

// Ending quote
quotes.fmt(f)
}
}

#[derive(Clone, Copy, Debug)]
pub(crate) struct FStringContext {
prefix: StringPrefix,
quotes: StringQuotes,
is_multiline: bool,
}

impl FStringContext {
const fn new(prefix: StringPrefix, quotes: StringQuotes, is_multiline: bool) -> Self {
Self {
prefix,
quotes,
is_multiline,
}
}

pub(crate) const fn quotes(self) -> StringQuotes {
self.quotes
}

pub(crate) const fn prefix(self) -> StringPrefix {
self.prefix
}

let result = StringNormalizer::from_source(self.value.range(), &locator)
.normalize(
self.quoting,
&locator,
f.options().quote_style(),
f.context().docstring(),
is_hex_codes_in_unicode_sequences_enabled(f.context()),
)
.fmt(f);

// TODO(dhruvmanila): With PEP 701, comments can be inside f-strings.
// This is to mark all of those comments as formatted but we need to
// figure out how to handle them. Note that this needs to be done only
// after the f-string is formatted, so only for all the non-formatted
// comments.
let comments = f.context().comments();
self.value.elements.iter().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
});

result
pub(crate) const fn should_remove_soft_line_breaks(self) -> bool {
!self.is_multiline
}
}
Loading

0 comments on commit 4216d2e

Please sign in to comment.