Skip to content

Commit

Permalink
[internal] ComparableExpr (f)strings and bytes made invariant under…
Browse files Browse the repository at this point in the history
… concatenation (#13301)
  • Loading branch information
dylwil3 committed Sep 25, 2024
1 parent ca0ae0a commit f27a8b8
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 39 deletions.
149 changes: 110 additions & 39 deletions crates/ruff_python_ast/src/comparable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
//! an implicit concatenation of string literals, as these expressions are considered to
//! have the same shape in that they evaluate to the same value.

use std::borrow::Cow;

use crate as ast;

#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
Expand Down Expand Up @@ -511,7 +513,7 @@ impl<'a> From<&'a ast::ExceptHandler> for ComparableExceptHandler<'a> {

#[derive(Debug, PartialEq, Eq, Hash)]
pub enum ComparableFStringElement<'a> {
Literal(&'a str),
Literal(Cow<'a, str>),
FStringExpressionElement(FStringExpressionElement<'a>),
}

Expand All @@ -527,23 +529,34 @@ impl<'a> From<&'a ast::FStringElement> for ComparableFStringElement<'a> {
fn from(fstring_element: &'a ast::FStringElement) -> Self {
match fstring_element {
ast::FStringElement::Literal(ast::FStringLiteralElement { value, .. }) => {
Self::Literal(value)
}
ast::FStringElement::Expression(formatted_value) => {
Self::FStringExpressionElement(FStringExpressionElement {
expression: (&formatted_value.expression).into(),
debug_text: formatted_value.debug_text.as_ref(),
conversion: formatted_value.conversion,
format_spec: formatted_value
.format_spec
.as_ref()
.map(|spec| spec.elements.iter().map(Into::into).collect()),
})
Self::Literal(value.as_ref().into())
}
ast::FStringElement::Expression(formatted_value) => formatted_value.into(),
}
}
}

impl<'a> From<&'a ast::FStringExpressionElement> for ComparableFStringElement<'a> {
fn from(fstring_expression_element: &'a ast::FStringExpressionElement) -> Self {
let ast::FStringExpressionElement {
expression,
debug_text,
conversion,
format_spec,
range: _,
} = fstring_expression_element;

Self::FStringExpressionElement(FStringExpressionElement {
expression: (expression).into(),
debug_text: debug_text.as_ref(),
conversion: *conversion,
format_spec: format_spec
.as_ref()
.map(|spec| spec.elements.iter().map(Into::into).collect()),
})
}
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ComparableElifElseClause<'a> {
test: Option<ComparableExpr<'a>>,
Expand Down Expand Up @@ -597,28 +610,82 @@ impl<'a> From<ast::LiteralExpressionRef<'a>> for ComparableLiteral<'a> {

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ComparableFString<'a> {
elements: Vec<ComparableFStringElement<'a>>,
}
elements: Box<[ComparableFStringElement<'a>]>,
}

impl<'a> From<&'a ast::FStringValue> for ComparableFString<'a> {
// The approach below is somewhat complicated, so it may
// require some justification.
//
// Suppose given an f-string of the form
// `f"{foo!r} one" " and two " f" and three {bar!s}"`
// This decomposes as:
// - An `FStringPart::FString`, `f"{foo!r} one"` with elements
// - `FStringElement::Expression` encoding `{foo!r}`
// - `FStringElement::Literal` encoding " one"
// - An `FStringPart::Literal` capturing `" and two "`
// - An `FStringPart::FString`, `f" and three {bar!s}"` with elements
// - `FStringElement::Literal` encoding " and three "
// - `FStringElement::Expression` encoding `{bar!s}`
//
// We would like to extract from this a vector of (comparable) f-string
// _elements_ which alternate between expression elements and literal
// elements. In order to do so, we need to concatenate adjacent string
// literals. String literals may be separated for two reasons: either
// they appear in adjacent string literal parts, or else a string literal
// part is adjacent to a string literal _element_ inside of an f-string part.
fn from(value: &'a ast::FStringValue) -> Self {
#[derive(Default)]
struct Collector<'a> {
elements: Vec<ComparableFStringElement<'a>>,
}

impl<'a> From<&'a ast::FString> for ComparableFString<'a> {
fn from(fstring: &'a ast::FString) -> Self {
Self {
elements: fstring.elements.iter().map(Into::into).collect(),
impl<'a> Collector<'a> {
// The logic for concatenating adjacent string literals
// occurs here, implicitly: when we encounter a sequence
// of string literals, the first gets pushed to the
// `elements` vector, while subsequent strings
// are concatenated onto this top string.
fn push_literal(&mut self, literal: &'a str) {
if let Some(ComparableFStringElement::Literal(existing_literal)) =
self.elements.last_mut()
{
existing_literal.to_mut().push_str(literal);
} else {
self.elements
.push(ComparableFStringElement::Literal(literal.into()));
}
}

fn push_expression(&mut self, expression: &'a ast::FStringExpressionElement) {
self.elements.push(expression.into());
}
}
}
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub enum ComparableFStringPart<'a> {
Literal(ComparableStringLiteral<'a>),
FString(ComparableFString<'a>),
}
let mut collector = Collector::default();

for part in value {
match part {
ast::FStringPart::Literal(string_literal) => {
collector.push_literal(&string_literal.value);
}
ast::FStringPart::FString(fstring) => {
for element in &fstring.elements {
match element {
ast::FStringElement::Literal(literal) => {
collector.push_literal(&literal.value);
}
ast::FStringElement::Expression(expression) => {
collector.push_expression(expression);
}
}
}
}
}
}

impl<'a> From<&'a ast::FStringPart> for ComparableFStringPart<'a> {
fn from(f_string_part: &'a ast::FStringPart) -> Self {
match f_string_part {
ast::FStringPart::Literal(string_literal) => Self::Literal(string_literal.into()),
ast::FStringPart::FString(f_string) => Self::FString(f_string.into()),
Self {
elements: collector.elements.into_boxed_slice(),
}
}
}
Expand All @@ -638,13 +705,13 @@ impl<'a> From<&'a ast::StringLiteral> for ComparableStringLiteral<'a> {

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ComparableBytesLiteral<'a> {
value: &'a [u8],
value: Cow<'a, [u8]>,
}

impl<'a> From<&'a ast::BytesLiteral> for ComparableBytesLiteral<'a> {
fn from(bytes_literal: &'a ast::BytesLiteral) -> Self {
Self {
value: &bytes_literal.value,
value: Cow::Borrowed(&bytes_literal.value),
}
}
}
Expand Down Expand Up @@ -775,17 +842,17 @@ pub struct ExprFStringExpressionElement<'a> {

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ExprFString<'a> {
parts: Vec<ComparableFStringPart<'a>>,
value: ComparableFString<'a>,
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ExprStringLiteral<'a> {
parts: Vec<ComparableStringLiteral<'a>>,
value: ComparableStringLiteral<'a>,
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct ExprBytesLiteral<'a> {
parts: Vec<ComparableBytesLiteral<'a>>,
value: ComparableBytesLiteral<'a>,
}

#[derive(Debug, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -1019,17 +1086,21 @@ impl<'a> From<&'a ast::Expr> for ComparableExpr<'a> {
}),
ast::Expr::FString(ast::ExprFString { value, range: _ }) => {
Self::FString(ExprFString {
parts: value.iter().map(Into::into).collect(),
value: value.into(),
})
}
ast::Expr::StringLiteral(ast::ExprStringLiteral { value, range: _ }) => {
Self::StringLiteral(ExprStringLiteral {
parts: value.iter().map(Into::into).collect(),
value: ComparableStringLiteral {
value: value.to_str(),
},
})
}
ast::Expr::BytesLiteral(ast::ExprBytesLiteral { value, range: _ }) => {
Self::BytesLiteral(ExprBytesLiteral {
parts: value.iter().map(Into::into).collect(),
value: ComparableBytesLiteral {
value: Cow::from(value),
},
})
}
ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, range: _ }) => {
Expand Down
17 changes: 17 additions & 0 deletions crates/ruff_python_ast/src/nodes.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![allow(clippy::derive_partial_eq_without_eq)]

use std::borrow::Cow;
use std::fmt;
use std::fmt::Debug;
use std::iter::FusedIterator;
Expand Down Expand Up @@ -2186,6 +2187,22 @@ impl PartialEq<[u8]> for BytesLiteralValue {
}
}

impl<'a> From<&'a BytesLiteralValue> for Cow<'a, [u8]> {
fn from(value: &'a BytesLiteralValue) -> Self {
match &value.inner {
BytesLiteralValueInner::Single(BytesLiteral {
value: bytes_value, ..
}) => Cow::from(bytes_value.as_ref()),
BytesLiteralValueInner::Concatenated(bytes_literal_vec) => Cow::Owned(
bytes_literal_vec
.iter()
.flat_map(|bytes_literal| bytes_literal.value.to_vec())
.collect::<Vec<u8>>(),
),
}
}
}

/// An internal representation of [`BytesLiteralValue`].
#[derive(Clone, Debug, PartialEq)]
enum BytesLiteralValueInner {
Expand Down
47 changes: 47 additions & 0 deletions crates/ruff_python_ast_integration_tests/tests/comparable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use ruff_python_ast::comparable::ComparableExpr;
use ruff_python_parser::{parse_expression, ParseError};

#[test]
fn concatenated_strings_compare_equal() -> Result<(), ParseError> {
let split_contents = r#"'a' 'b' r'\n raw'"#;
let value_contents = r#"'ab\\n raw'"#;

let split_parsed = parse_expression(split_contents)?;
let value_parsed = parse_expression(value_contents)?;

let split_compr = ComparableExpr::from(split_parsed.expr());
let value_compr = ComparableExpr::from(value_parsed.expr());

assert_eq!(split_compr, value_compr);
Ok(())
}

#[test]
fn concatenated_bytes_compare_equal() -> Result<(), ParseError> {
let split_contents = r#"b'a' b'b'"#;
let value_contents = r#"b'ab'"#;

let split_parsed = parse_expression(split_contents)?;
let value_parsed = parse_expression(value_contents)?;

let split_compr = ComparableExpr::from(split_parsed.expr());
let value_compr = ComparableExpr::from(value_parsed.expr());

assert_eq!(split_compr, value_compr);
Ok(())
}

#[test]
fn concatenated_fstrings_compare_equal() -> Result<(), ParseError> {
let split_contents = r#"f"{foo!r} this" r"\n raw" f" and {bar!s} that""#;
let value_contents = r#"f"{foo!r} this\\n raw and {bar!s} that""#;

let split_parsed = parse_expression(split_contents)?;
let value_parsed = parse_expression(value_contents)?;

let split_compr = ComparableExpr::from(split_parsed.expr());
let value_compr = ComparableExpr::from(value_parsed.expr());

assert_eq!(split_compr, value_compr);
Ok(())
}

0 comments on commit f27a8b8

Please sign in to comment.