Format single string part

astral-sh · Jun 22, 2023 · 0a3ed21 · 0a3ed21
1 parent 52dc57e
commit 0a3ed21
Show file tree

Hide file tree

Showing 21 changed files with 574 additions and 319 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/ruff_python_formatter/Cargo.toml b/crates/ruff_python_formatter/Cargo.toml
@@ -17,6 +17,7 @@ ruff_python_ast = { path = "../ruff_python_ast" }
 ruff_text_size = { workspace = true }
 
 anyhow = { workspace = true }
+bitflags = { workspace = true }
 clap = { workspace = true }
 countme = "3.0.1"
 is-macro = { workspace = true }

diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/string.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/string.py
@@ -0,0 +1,29 @@
+"' test"
+'" test'
+
+"\" test"
+'\' test'
+
+# Prefer single quotes for string with more double quotes
+"' \" \" '' \" \" '"
+
+# Prefer double quotes for string with more single quotes
+'\' " " \'\' " " \''
+
+# Prefer double quotes for string with equal amount of single and double quotes
+'" \' " " \'\''
+"' \" '' \" \" '"
+
+
+u"Test"
+U"Test"
+
+r"Test"
+R"Test"
+
+'This string will not include \
+backslashes or newline characters.'
+
+if True:
+    'This string will not include \
+        backslashes or newline characters.'
diff --git a/crates/ruff_python_formatter/src/expression/expr_constant.rs b/crates/ruff_python_formatter/src/expression/expr_constant.rs
@@ -2,15 +2,11 @@ use crate::comments::Comments;
 use crate::expression::parentheses::{
     default_expression_needs_parentheses, NeedsParentheses, Parentheses, Parenthesize,
 };
+use crate::expression::string::FormatString;
 use crate::prelude::*;
-use crate::trivia::SimpleTokenizer;
 use crate::{not_yet_implemented_custom_text, verbatim_text, FormatNodeRule};
-use ruff_formatter::{write, FormatContext, FormatError};
-use ruff_python_ast::str::{is_implicit_concatenation, leading_quote};
-use ruff_text_size::TextRange;
-use rustpython_parser::ast::{Constant, ExprConstant, Ranged};
-use rustpython_parser::lexer::{lex_starts_at, Lexer};
-use rustpython_parser::{Mode, Tok};
+use ruff_formatter::write;
+use rustpython_parser::ast::{Constant, ExprConstant};
 
 #[derive(Default)]
 pub struct FormatExprConstant;
@@ -33,7 +29,7 @@ impl FormatNodeRule<ExprConstant> for FormatExprConstant {
             Constant::Int(_) | Constant::Float(_) | Constant::Complex { .. } => {
                 write!(f, [verbatim_text(item)])
             }
-            Constant::Str(_) => FormatString { constant: item }.fmt(f),
+            Constant::Str(_) => FormatString::new(item).fmt(f),
             Constant::Bytes(_) => {
                 not_yet_implemented_custom_text(r#"b"NOT_YET_IMPLEMENTED_BYTE_STRING""#).fmt(f)
             }
@@ -73,22 +69,3 @@ impl NeedsParentheses for ExprConstant {
         }
     }
 }
-
-struct FormatString<'a> {
-    constant: &'a ExprConstant,
-}
-
-impl Format<PyFormatContext<'_>> for FormatString<'_> {
-    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
-        let constant = self.constant;
-        debug_assert!(constant.value.is_str());
-
-        let string_content = f.context().locator().slice(constant.range());
-
-        if is_implicit_concatenation(string_content) {
-            not_yet_implemented_custom_text(r#""NOT_YET_IMPLEMENTED_STRING""#).fmt(f)
-        } else {
-            source_text_slice(constant.range(), ContainsNewlines::Detect).fmt(f)
-        }
-    }
-}
diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs
@@ -37,6 +37,7 @@ pub(crate) mod expr_unary_op;
 pub(crate) mod expr_yield;
 pub(crate) mod expr_yield_from;
 pub(crate) mod parentheses;
+mod string;
 
 #[derive(Default)]
 pub struct FormatExpr {

diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs
@@ -0,0 +1,249 @@
+use crate::prelude::*;
+use crate::{not_yet_implemented_custom_text, QuoteStyle};
+use bitflags::bitflags;
+use ruff_formatter::{write, FormatError};
+use ruff_python_ast::str::is_implicit_concatenation;
+use ruff_text_size::{TextLen, TextRange, TextSize};
+use rustpython_parser::ast::{ExprConstant, Ranged};
+use std::borrow::Cow;
+
+pub(super) struct FormatString {
+    string_range: TextRange,
+}
+
+impl FormatString {
+    pub(super) fn new(constant: &ExprConstant) -> Self {
+        debug_assert!(constant.value.is_str());
+        Self {
+            string_range: constant.range(),
+        }
+    }
+}
+
+impl Format<PyFormatContext<'_>> for FormatString {
+    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
+        let string_content = f.context().locator().slice(self.string_range);
+
+        if is_implicit_concatenation(string_content) {
+            not_yet_implemented_custom_text(r#""NOT_YET_IMPLEMENTED" "IMPLICIT_CONCATENATION""#)
+                .fmt(f)
+        } else {
+            FormatStringPart::new(self.string_range).fmt(f)
+        }
+    }
+}
+
+struct FormatStringPart {
+    part_range: TextRange,
+}
+
+impl FormatStringPart {
+    const fn new(range: TextRange) -> Self {
+        Self { part_range: range }
+    }
+}
+
+impl Format<PyFormatContext<'_>> for FormatStringPart {
+    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
+        let string_content = f.context().locator().slice(self.part_range);
+
+        let prefix = StringPrefix::parse(string_content);
+        let after_prefix = &string_content[usize::from(prefix.text_len())..];
+
+        let quotes = StringQuotes::parse(after_prefix).ok_or(FormatError::SyntaxError)?;
+        let relative_raw_content_range = TextRange::new(
+            prefix.text_len() + quotes.text_len(),
+            string_content.text_len() - quotes.text_len(),
+        );
+        let raw_content_range = relative_raw_content_range + self.part_range.start();
+
+        let raw_content = &string_content[relative_raw_content_range];
+        let preferred_quote = preferred_quotes(raw_content);
+
+        let preferred_quotes = StringQuotes {
+            style: preferred_quote,
+            triple: quotes.triple,
+        };
+
+        write!(f, [prefix, preferred_quotes])?;
+
+        let normalized = normalize_quotes(raw_content, preferred_quote);
+
+        match normalized {
+            Cow::Borrowed(_) => {
+                source_text_slice(raw_content_range, ContainsNewlines::Detect).fmt(f)?;
+            }
+            Cow::Owned(normalized) => {
+                dynamic_text(&normalized, Some(raw_content_range.start())).fmt(f)?;
+            }
+        }
+
+        preferred_quotes.fmt(f)
+    }
+}
+
+bitflags! {
+    #[derive(Copy, Clone, Debug)]
+    struct StringPrefix: u8 {
+        const UNICODE   = 0b0000_0001;
+        /// `r"test"`
+        const RAW       = 0b0000_0010;
+        /// `R"test"
+        const RAW_UPPER = 0b0000_0100;
+        const BYTE      = 0b0000_1000;
+        const F_STRING  = 0b0001_0000;
+    }
+}
+
+impl StringPrefix {
+    fn parse(input: &str) -> StringPrefix {
+        let chars = input.chars();
+        let mut prefix = StringPrefix::empty();
+
+        for c in chars {
+            let flag = match c {
+                'u' | 'U' => StringPrefix::UNICODE,
+                'f' | 'F' => StringPrefix::F_STRING,
+                'b' | 'B' => StringPrefix::BYTE,
+                'r' => StringPrefix::RAW,
+                'R' => StringPrefix::RAW_UPPER,
+                _ => break,
+            };
+
+            prefix |= flag;
+        }
+
+        prefix
+    }
+
+    const fn text_len(self) -> TextSize {
+        TextSize::new(self.bits().count_ones())
+    }
+}
+
+impl Format<PyFormatContext<'_>> for StringPrefix {
+    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
+        if self.contains(StringPrefix::RAW) {
+            text("r").fmt(f)?;
+        } else if self.contains(StringPrefix::RAW_UPPER) {
+            text("R").fmt(f)?;
+        }
+
+        if self.contains(StringPrefix::BYTE) {
+            text("b").fmt(f)?;
+        }
+
+        if self.contains(StringPrefix::F_STRING) {
+            text("f").fmt(f)?;
+        }
+
+        // Drop unicode
+
+        Ok(())
+    }
+}
+
+/// Detects the preferred quotes for `input`. The preferred quote style is the one that
+/// requires less escape sequences.
+fn preferred_quotes(input: &str) -> QuoteStyle {
+    let mut single_quotes = 0u32;
+    let mut double_quotes = 0u32;
+    let mut chars = input.chars();
+
+    while let Some(c) = chars.next() {
+        let style = match c {
+            '\\' => chars.next().ok_or(()).and_then(QuoteStyle::try_from),
+            c => QuoteStyle::try_from(c),
+        };
+
+        match style {
+            Ok(QuoteStyle::Single) => {
+                single_quotes += 1;
+            }
+            Ok(QuoteStyle::Double) => {
+                double_quotes += 1;
+            }
+            Err(_) => {}
+        }
+    }
+
+    if double_quotes > single_quotes {
+        QuoteStyle::Single
+    } else {
+        QuoteStyle::Double
+    }
+}
+
+struct StringQuotes {
+    triple: bool,
+    style: QuoteStyle,
+}
+
+impl StringQuotes {
+    fn parse(input: &str) -> Option<StringQuotes> {
+        let mut chars = input.chars();
+
+        let quote_char = chars.next()?;
+        let style = QuoteStyle::try_from(quote_char).ok()?;
+
+        let triple = chars.next() == Some(quote_char) && chars.next() == Some(quote_char);
+
+        Some(Self { triple, style })
+    }
+
+    const fn text_len(&self) -> TextSize {
+        if self.triple {
+            TextSize::new(3)
+        } else {
+            TextSize::new(1)
+        }
+    }
+}
+
+impl Format<PyFormatContext<'_>> for StringQuotes {
+    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
+        let quotes = match (self.style, self.triple) {
+            (QuoteStyle::Single, false) => "'",
+            (QuoteStyle::Single, true) => "'''",
+            (QuoteStyle::Double, false) => "\"",
+            (QuoteStyle::Double, true) => "\"\"\"",
+        };
+
+        text(quotes).fmt(f)
+    }
+}
+
+fn normalize_quotes(input: &str, style: QuoteStyle) -> Cow<str> {
+    let mut output = String::new();
+
+    let mut chars = input.char_indices();
+
+    let preferred_quote = style.as_char();
+    let opposite_quote = style.opposite().as_char();
+    let mut last_index = 0;
+
+    while let Some((index, c)) = chars.next() {
+        if c == '\\'
+            && chars
+                .next()
+                .map_or(false, |(_, next)| next == opposite_quote)
+        {
+            // Remove the escape
+            output.push_str(&input[last_index..index]);
+            last_index = index + '\\'.len_utf8();
+        } else if c == preferred_quote {
+            // Escape the quote
+            output.push_str(&input[last_index..index]);
+            output.push('\\');
+            output.push(c);
+            last_index = index + preferred_quote.len_utf8();
+        }
+    }
+
+    if last_index == 0 {
+        Cow::Borrowed(input)
+    } else {
+        output.push_str(&input[last_index..]);
+        Cow::Owned(output)
+    }
+}
diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs
@@ -226,6 +226,41 @@ impl Format<PyFormatContext<'_>> for VerbatimText {
     }
 }
 
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum QuoteStyle {
+    Single,
+    Double,
+}
+
+impl QuoteStyle {
+    pub const fn as_char(self) -> char {
+        match self {
+            QuoteStyle::Single => '\'',
+            QuoteStyle::Double => '"',
+        }
+    }
+
+    #[must_use]
+    pub const fn opposite(self) -> QuoteStyle {
+        match self {
+            QuoteStyle::Single => QuoteStyle::Double,
+            QuoteStyle::Double => QuoteStyle::Single,
+        }
+    }
+}
+
+impl TryFrom<char> for QuoteStyle {
+    type Error = ();
+
+    fn try_from(value: char) -> std::result::Result<Self, Self::Error> {
+        match value {
+            '\'' => Ok(QuoteStyle::Single),
+            '"' => Ok(QuoteStyle::Double),
+            _ => Err(()),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use anyhow::Result;