Skip to content

Commit

Permalink
Format single string part
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Jun 22, 2023
1 parent 52dc57e commit 0a3ed21
Show file tree
Hide file tree
Showing 21 changed files with 574 additions and 319 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/ruff_python_formatter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ ruff_python_ast = { path = "../ruff_python_ast" }
ruff_text_size = { workspace = true }

anyhow = { workspace = true }
bitflags = { workspace = true }
clap = { workspace = true }
countme = "3.0.1"
is-macro = { workspace = true }
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"' test"
'" test'

"\" test"
'\' test'

# Prefer single quotes for string with more double quotes
"' \" \" '' \" \" '"

# Prefer double quotes for string with more single quotes
'\' " " \'\' " " \''

# Prefer double quotes for string with equal amount of single and double quotes
'" \' " " \'\''
"' \" '' \" \" '"


u"Test"
U"Test"

r"Test"
R"Test"

'This string will not include \
backslashes or newline characters.'

if True:
'This string will not include \
backslashes or newline characters.'
31 changes: 4 additions & 27 deletions crates/ruff_python_formatter/src/expression/expr_constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,11 @@ use crate::comments::Comments;
use crate::expression::parentheses::{
default_expression_needs_parentheses, NeedsParentheses, Parentheses, Parenthesize,
};
use crate::expression::string::FormatString;
use crate::prelude::*;
use crate::trivia::SimpleTokenizer;
use crate::{not_yet_implemented_custom_text, verbatim_text, FormatNodeRule};
use ruff_formatter::{write, FormatContext, FormatError};
use ruff_python_ast::str::{is_implicit_concatenation, leading_quote};
use ruff_text_size::TextRange;
use rustpython_parser::ast::{Constant, ExprConstant, Ranged};
use rustpython_parser::lexer::{lex_starts_at, Lexer};
use rustpython_parser::{Mode, Tok};
use ruff_formatter::write;
use rustpython_parser::ast::{Constant, ExprConstant};

#[derive(Default)]
pub struct FormatExprConstant;
Expand All @@ -33,7 +29,7 @@ impl FormatNodeRule<ExprConstant> for FormatExprConstant {
Constant::Int(_) | Constant::Float(_) | Constant::Complex { .. } => {
write!(f, [verbatim_text(item)])
}
Constant::Str(_) => FormatString { constant: item }.fmt(f),
Constant::Str(_) => FormatString::new(item).fmt(f),
Constant::Bytes(_) => {
not_yet_implemented_custom_text(r#"b"NOT_YET_IMPLEMENTED_BYTE_STRING""#).fmt(f)
}
Expand Down Expand Up @@ -73,22 +69,3 @@ impl NeedsParentheses for ExprConstant {
}
}
}

struct FormatString<'a> {
constant: &'a ExprConstant,
}

impl Format<PyFormatContext<'_>> for FormatString<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
let constant = self.constant;
debug_assert!(constant.value.is_str());

let string_content = f.context().locator().slice(constant.range());

if is_implicit_concatenation(string_content) {
not_yet_implemented_custom_text(r#""NOT_YET_IMPLEMENTED_STRING""#).fmt(f)
} else {
source_text_slice(constant.range(), ContainsNewlines::Detect).fmt(f)
}
}
}
1 change: 1 addition & 0 deletions crates/ruff_python_formatter/src/expression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ pub(crate) mod expr_unary_op;
pub(crate) mod expr_yield;
pub(crate) mod expr_yield_from;
pub(crate) mod parentheses;
mod string;

#[derive(Default)]
pub struct FormatExpr {
Expand Down
249 changes: 249 additions & 0 deletions crates/ruff_python_formatter/src/expression/string.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
use crate::prelude::*;
use crate::{not_yet_implemented_custom_text, QuoteStyle};
use bitflags::bitflags;
use ruff_formatter::{write, FormatError};
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_parser::ast::{ExprConstant, Ranged};
use std::borrow::Cow;

pub(super) struct FormatString {
string_range: TextRange,
}

impl FormatString {
pub(super) fn new(constant: &ExprConstant) -> Self {
debug_assert!(constant.value.is_str());
Self {
string_range: constant.range(),
}
}
}

impl Format<PyFormatContext<'_>> for FormatString {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
let string_content = f.context().locator().slice(self.string_range);

if is_implicit_concatenation(string_content) {
not_yet_implemented_custom_text(r#""NOT_YET_IMPLEMENTED" "IMPLICIT_CONCATENATION""#)
.fmt(f)
} else {
FormatStringPart::new(self.string_range).fmt(f)
}
}
}

struct FormatStringPart {
part_range: TextRange,
}

impl FormatStringPart {
const fn new(range: TextRange) -> Self {
Self { part_range: range }
}
}

impl Format<PyFormatContext<'_>> for FormatStringPart {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
let string_content = f.context().locator().slice(self.part_range);

let prefix = StringPrefix::parse(string_content);
let after_prefix = &string_content[usize::from(prefix.text_len())..];

let quotes = StringQuotes::parse(after_prefix).ok_or(FormatError::SyntaxError)?;
let relative_raw_content_range = TextRange::new(
prefix.text_len() + quotes.text_len(),
string_content.text_len() - quotes.text_len(),
);
let raw_content_range = relative_raw_content_range + self.part_range.start();

let raw_content = &string_content[relative_raw_content_range];
let preferred_quote = preferred_quotes(raw_content);

let preferred_quotes = StringQuotes {
style: preferred_quote,
triple: quotes.triple,
};

write!(f, [prefix, preferred_quotes])?;

let normalized = normalize_quotes(raw_content, preferred_quote);

match normalized {
Cow::Borrowed(_) => {
source_text_slice(raw_content_range, ContainsNewlines::Detect).fmt(f)?;
}
Cow::Owned(normalized) => {
dynamic_text(&normalized, Some(raw_content_range.start())).fmt(f)?;
}
}

preferred_quotes.fmt(f)
}
}

bitflags! {
#[derive(Copy, Clone, Debug)]
struct StringPrefix: u8 {
const UNICODE = 0b0000_0001;
/// `r"test"`
const RAW = 0b0000_0010;
/// `R"test"
const RAW_UPPER = 0b0000_0100;
const BYTE = 0b0000_1000;
const F_STRING = 0b0001_0000;
}
}

impl StringPrefix {
fn parse(input: &str) -> StringPrefix {
let chars = input.chars();
let mut prefix = StringPrefix::empty();

for c in chars {
let flag = match c {
'u' | 'U' => StringPrefix::UNICODE,
'f' | 'F' => StringPrefix::F_STRING,
'b' | 'B' => StringPrefix::BYTE,
'r' => StringPrefix::RAW,
'R' => StringPrefix::RAW_UPPER,
_ => break,
};

prefix |= flag;
}

prefix
}

const fn text_len(self) -> TextSize {
TextSize::new(self.bits().count_ones())
}
}

impl Format<PyFormatContext<'_>> for StringPrefix {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
if self.contains(StringPrefix::RAW) {
text("r").fmt(f)?;
} else if self.contains(StringPrefix::RAW_UPPER) {
text("R").fmt(f)?;
}

if self.contains(StringPrefix::BYTE) {
text("b").fmt(f)?;
}

if self.contains(StringPrefix::F_STRING) {
text("f").fmt(f)?;
}

// Drop unicode

Ok(())
}
}

/// Detects the preferred quotes for `input`. The preferred quote style is the one that
/// requires less escape sequences.
fn preferred_quotes(input: &str) -> QuoteStyle {
let mut single_quotes = 0u32;
let mut double_quotes = 0u32;
let mut chars = input.chars();

while let Some(c) = chars.next() {
let style = match c {
'\\' => chars.next().ok_or(()).and_then(QuoteStyle::try_from),
c => QuoteStyle::try_from(c),
};

match style {
Ok(QuoteStyle::Single) => {
single_quotes += 1;
}
Ok(QuoteStyle::Double) => {
double_quotes += 1;
}
Err(_) => {}
}
}

if double_quotes > single_quotes {
QuoteStyle::Single
} else {
QuoteStyle::Double
}
}

struct StringQuotes {
triple: bool,
style: QuoteStyle,
}

impl StringQuotes {
fn parse(input: &str) -> Option<StringQuotes> {
let mut chars = input.chars();

let quote_char = chars.next()?;
let style = QuoteStyle::try_from(quote_char).ok()?;

let triple = chars.next() == Some(quote_char) && chars.next() == Some(quote_char);

Some(Self { triple, style })
}

const fn text_len(&self) -> TextSize {
if self.triple {
TextSize::new(3)
} else {
TextSize::new(1)
}
}
}

impl Format<PyFormatContext<'_>> for StringQuotes {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
let quotes = match (self.style, self.triple) {
(QuoteStyle::Single, false) => "'",
(QuoteStyle::Single, true) => "'''",
(QuoteStyle::Double, false) => "\"",
(QuoteStyle::Double, true) => "\"\"\"",
};

text(quotes).fmt(f)
}
}

fn normalize_quotes(input: &str, style: QuoteStyle) -> Cow<str> {
let mut output = String::new();

let mut chars = input.char_indices();

let preferred_quote = style.as_char();
let opposite_quote = style.opposite().as_char();
let mut last_index = 0;

while let Some((index, c)) = chars.next() {
if c == '\\'
&& chars
.next()
.map_or(false, |(_, next)| next == opposite_quote)
{
// Remove the escape
output.push_str(&input[last_index..index]);
last_index = index + '\\'.len_utf8();
} else if c == preferred_quote {
// Escape the quote
output.push_str(&input[last_index..index]);
output.push('\\');
output.push(c);
last_index = index + preferred_quote.len_utf8();
}
}

if last_index == 0 {
Cow::Borrowed(input)
} else {
output.push_str(&input[last_index..]);
Cow::Owned(output)
}
}
35 changes: 35 additions & 0 deletions crates/ruff_python_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,41 @@ impl Format<PyFormatContext<'_>> for VerbatimText {
}
}

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum QuoteStyle {
Single,
Double,
}

impl QuoteStyle {
pub const fn as_char(self) -> char {
match self {
QuoteStyle::Single => '\'',
QuoteStyle::Double => '"',
}
}

#[must_use]
pub const fn opposite(self) -> QuoteStyle {
match self {
QuoteStyle::Single => QuoteStyle::Double,
QuoteStyle::Double => QuoteStyle::Single,
}
}
}

impl TryFrom<char> for QuoteStyle {
type Error = ();

fn try_from(value: char) -> std::result::Result<Self, Self::Error> {
match value {
'\'' => Ok(QuoteStyle::Single),
'"' => Ok(QuoteStyle::Double),
_ => Err(()),
}
}
}

#[cfg(test)]
mod tests {
use anyhow::Result;
Expand Down
Loading

0 comments on commit 0a3ed21

Please sign in to comment.