Skip to content

Commit

Permalink
split string module (#9987)
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser authored Feb 14, 2024
1 parent bb8d203 commit fe79798
Show file tree
Hide file tree
Showing 4 changed files with 847 additions and 813 deletions.
212 changes: 212 additions & 0 deletions crates/ruff_python_formatter/src/string/any.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
use std::iter::FusedIterator;

use memchr::memchr2;

use ruff_python_ast::{
self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
StringLiteral,
};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange};

use crate::expression::expr_f_string::f_string_quoting;
use crate::other::f_string::FormatFString;
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::string::{Quoting, StringPrefix, StringQuotes};

/// Represents any kind of string expression. This could be either a string,
/// bytes or f-string.
#[derive(Copy, Clone, Debug)]
pub(crate) enum AnyString<'a> {
String(&'a ExprStringLiteral),
Bytes(&'a ExprBytesLiteral),
FString(&'a ExprFString),
}

impl<'a> AnyString<'a> {
/// Creates a new [`AnyString`] from the given [`Expr`].
///
/// Returns `None` if the expression is not either a string, bytes or f-string.
pub(crate) fn from_expression(expression: &'a Expr) -> Option<AnyString<'a>> {
match expression {
Expr::StringLiteral(string) => Some(AnyString::String(string)),
Expr::BytesLiteral(bytes) => Some(AnyString::Bytes(bytes)),
Expr::FString(fstring) => Some(AnyString::FString(fstring)),
_ => None,
}
}

/// Returns `true` if the string is implicitly concatenated.
pub(crate) fn is_implicit_concatenated(self) -> bool {
match self {
Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
}
}

/// Returns the quoting to be used for this string.
pub(super) fn quoting(self, locator: &Locator<'_>) -> Quoting {
match self {
Self::String(_) | Self::Bytes(_) => Quoting::CanChange,
Self::FString(f_string) => f_string_quoting(f_string, locator),
}
}

/// Returns a vector of all the [`AnyStringPart`] of this string.
pub(super) fn parts(self, quoting: Quoting) -> AnyStringPartsIter<'a> {
match self {
Self::String(ExprStringLiteral { value, .. }) => {
AnyStringPartsIter::String(value.iter())
}
Self::Bytes(ExprBytesLiteral { value, .. }) => AnyStringPartsIter::Bytes(value.iter()),
Self::FString(ExprFString { value, .. }) => {
AnyStringPartsIter::FString(value.iter(), quoting)
}
}
}

pub(crate) fn is_multiline(self, source: &str) -> bool {
match self {
AnyString::String(_) | AnyString::Bytes(_) => {
let contents = &source[self.range()];
let prefix = StringPrefix::parse(contents);
let quotes = StringQuotes::parse(
&contents[TextRange::new(prefix.text_len(), contents.text_len())],
);

quotes.is_some_and(StringQuotes::is_triple)
&& memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
}
AnyString::FString(fstring) => {
memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
}
}
}
}

impl Ranged for AnyString<'_> {
fn range(&self) -> TextRange {
match self {
Self::String(expr) => expr.range(),
Self::Bytes(expr) => expr.range(),
Self::FString(expr) => expr.range(),
}
}
}

impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
fn from(value: &AnyString<'a>) -> Self {
match value {
AnyString::String(expr) => AnyNodeRef::ExprStringLiteral(expr),
AnyString::Bytes(expr) => AnyNodeRef::ExprBytesLiteral(expr),
AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
}
}
}

impl<'a> From<AnyString<'a>> for AnyNodeRef<'a> {
fn from(value: AnyString<'a>) -> Self {
AnyNodeRef::from(&value)
}
}

impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
fn from(value: &AnyString<'a>) -> Self {
match value {
AnyString::String(expr) => ExpressionRef::StringLiteral(expr),
AnyString::Bytes(expr) => ExpressionRef::BytesLiteral(expr),
AnyString::FString(expr) => ExpressionRef::FString(expr),
}
}
}

pub(super) enum AnyStringPartsIter<'a> {
String(std::slice::Iter<'a, StringLiteral>),
Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
FString(std::slice::Iter<'a, ast::FStringPart>, Quoting),
}

impl<'a> Iterator for AnyStringPartsIter<'a> {
type Item = AnyStringPart<'a>;

fn next(&mut self) -> Option<Self::Item> {
let part = match self {
Self::String(inner) => {
let part = inner.next()?;
AnyStringPart::String {
part,
layout: StringLiteralKind::String,
}
}
Self::Bytes(inner) => AnyStringPart::Bytes(inner.next()?),
Self::FString(inner, quoting) => {
let part = inner.next()?;
match part {
ast::FStringPart::Literal(string_literal) => AnyStringPart::String {
part: string_literal,
layout: StringLiteralKind::InImplicitlyConcatenatedFString(*quoting),
},
ast::FStringPart::FString(f_string) => AnyStringPart::FString {
part: f_string,
quoting: *quoting,
},
}
}
};

Some(part)
}
}

impl FusedIterator for AnyStringPartsIter<'_> {}

/// Represents any kind of string which is part of an implicitly concatenated
/// string. This could be either a string, bytes or f-string.
///
/// This is constructed from the [`AnyString::parts`] method on [`AnyString`].
#[derive(Clone, Debug)]
pub(super) enum AnyStringPart<'a> {
String {
part: &'a ast::StringLiteral,
layout: StringLiteralKind,
},
Bytes(&'a ast::BytesLiteral),
FString {
part: &'a ast::FString,
quoting: Quoting,
},
}

impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
fn from(value: &AnyStringPart<'a>) -> Self {
match value {
AnyStringPart::String { part, .. } => AnyNodeRef::StringLiteral(part),
AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
AnyStringPart::FString { part, .. } => AnyNodeRef::FString(part),
}
}
}

impl Ranged for AnyStringPart<'_> {
fn range(&self) -> TextRange {
match self {
Self::String { part, .. } => part.range(),
Self::Bytes(part) => part.range(),
Self::FString { part, .. } => part.range(),
}
}
}

impl Format<PyFormatContext<'_>> for AnyStringPart<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
match self {
AnyStringPart::String { part, layout } => {
FormatStringLiteral::new(part, *layout).fmt(f)
}
AnyStringPart::Bytes(bytes_literal) => bytes_literal.format().fmt(f),
AnyStringPart::FString { part, quoting } => FormatFString::new(part, *quoting).fmt(f),
}
}
}
14 changes: 7 additions & 7 deletions crates/ruff_python_formatter/src/string/docstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ use super::{NormalizedString, QuoteChar};
/// `indent-width * spaces` to tabs because doing so could break ASCII art and other docstrings
/// that use spaces for alignment.
pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
let docstring = &normalized.text;
let docstring = &normalized.text();

// Black doesn't change the indentation of docstrings that contain an escaped newline
if contains_unescaped_newline(docstring) {
Expand All @@ -125,7 +125,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
let mut lines = docstring.split('\n').peekable();

// Start the string
write!(f, [normalized.prefix, normalized.quotes])?;
write!(f, [normalized.prefix(), normalized.quotes()])?;
// We track where in the source docstring we are (in source code byte offsets)
let mut offset = normalized.start();

Expand All @@ -141,7 +141,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form

// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
// inner quotes and closing quotes from getting to close to avoid `""""content`
if trim_both.starts_with(normalized.quotes.quote_char.as_char()) {
if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
space().fmt(f)?;
}

Expand All @@ -168,7 +168,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
{
space().fmt(f)?;
}
normalized.quotes.fmt(f)?;
normalized.quotes().fmt(f)?;
return Ok(());
}

Expand All @@ -194,7 +194,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
offset,
stripped_indentation,
already_normalized,
quote_char: normalized.quotes.quote_char,
quote_char: normalized.quotes().quote_char,
code_example: CodeExample::default(),
}
.add_iter(lines)?;
Expand All @@ -207,7 +207,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
space().fmt(f)?;
}

write!(f, [normalized.quotes])
write!(f, [normalized.quotes()])
}

fn contains_unescaped_newline(haystack: &str) -> bool {
Expand Down Expand Up @@ -1569,7 +1569,7 @@ fn docstring_format_source(
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
trim_end.ends_with(normalized.quotes.quote_char.as_char())
trim_end.ends_with(normalized.quotes().quote_char.as_char())
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
}

Expand Down
Loading

0 comments on commit fe79798

Please sign in to comment.