From fdc15b1e11f3e9ee82c399b6c910e47844d7a4a9 Mon Sep 17 00:00:00 2001 From: Zanie Date: Thu, 24 Aug 2023 14:31:28 -0500 Subject: [PATCH 1/5] Avoid parsing other parts of a format specification if replacements are present --- .../rules/bad_string_format_character.rs | 7 +- crates/ruff_python_literal/src/format.rs | 138 +++++++++--------- 2 files changed, 76 insertions(+), 69 deletions(-) diff --git a/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs index fdf221d10a4d0..9c9db34f16c2f 100644 --- a/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs +++ b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs @@ -63,13 +63,14 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) { )); } Err(_) => {} - Ok(format_spec) => { - for replacement in format_spec.replacements() { + Ok(FormatSpec::Static(_)) => {} + Ok(FormatSpec::Dynamic(format_spec)) => { + for replacement in format_spec.replacements { let FormatPart::Field { format_spec, .. } = replacement else { continue; }; if let Err(FormatSpecError::InvalidFormatType) = - FormatSpec::parse(format_spec) + FormatSpec::parse(&format_spec) { checker.diagnostics.push(Diagnostic::new( BadStringFormatCharacter { diff --git a/crates/ruff_python_literal/src/format.rs b/crates/ruff_python_literal/src/format.rs index 2e2364e218635..c9d8e2d87c8bb 100644 --- a/crates/ruff_python_literal/src/format.rs +++ b/crates/ruff_python_literal/src/format.rs @@ -209,11 +209,16 @@ impl FormatParse for FormatType { /// "hello {name:{fmt:{not_allowed}}}".format(name="test", fmt="<20") # Syntax error /// ``` /// -/// When replacements are present in a format specification, we will parse them and -/// store them in [`FormatSpec`] but will otherwise ignore them if they would introduce -/// an invalid format specification at runtime. +/// When replacements are present in a format specification, parsing will return a [`DynamicFormatSpec`] +/// and avoid attempting to parse any of the clauses. Otherwise, a [`StaticFormatSpec`] will be used. #[derive(Debug, PartialEq)] -pub struct FormatSpec { +pub enum FormatSpec { + Static(StaticFormatSpec), + Dynamic(DynamicFormatSpec), +} + +#[derive(Debug, PartialEq)] +pub struct StaticFormatSpec { // Ex) `!s` in `'{!s}'` conversion: Option, // Ex) `*` in `'{:*^30}'` @@ -232,8 +237,12 @@ pub struct FormatSpec { precision: Option, // Ex) `f` in `'{:+f}'` format_type: Option, +} + +#[derive(Debug, PartialEq)] +pub struct DynamicFormatSpec { // Ex) `x` and `y` in `'{:*{x},{y}b}'` - replacements: Vec, + pub replacements: Vec, } #[derive(Copy, Clone, Debug, PartialEq, Default)] @@ -334,27 +343,51 @@ fn parse_nested_placeholder<'a>( } } +/// Parse and consume all placeholders in a format spec +/// This will also consume any intermediate characters such as `x` and `y` in `"x{foo}y{bar}z"` +/// If no placeholders are present, the text will be returned unmodified. +fn consume_all_placeholders<'a>( + placeholders: &mut Vec, + text: &'a str, +) -> Result<&'a str, FormatSpecError> { + let mut chars = text.chars(); + let mut text = text; + let mut placeholder_count = placeholders.len(); + + while chars.clone().contains(&'{') { + text = parse_nested_placeholder(placeholders, chars.as_str())?; + chars = text.chars(); + + dbg!(&text, &chars); + // If we did not parse a placeholder, consume a character + if placeholder_count == placeholders.len() { + chars.next(); + } else { + placeholder_count = placeholders.len(); + } + } + dbg!(&text); + Ok(text) +} + impl FormatSpec { pub fn parse(text: &str) -> Result { let mut replacements = vec![]; // get_integer in CPython - let text = parse_nested_placeholder(&mut replacements, text)?; + let text = consume_all_placeholders(&mut replacements, text)?; + + if !replacements.is_empty() { + return Ok(FormatSpec::Dynamic(DynamicFormatSpec { replacements })); + } + let (conversion, text) = FormatConversion::parse(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (mut fill, mut align, text) = parse_fill_and_align(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (sign, text) = FormatSign::parse(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (alternate_form, text) = parse_alternate_form(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (zero, text) = parse_zero(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (width, text) = parse_number(text)?; - let text = parse_nested_placeholder(&mut replacements, text)?; let (grouping_option, text) = FormatGrouping::parse(text); - let text = parse_nested_placeholder(&mut replacements, text)?; let (precision, text) = parse_precision(text)?; - let text = parse_nested_placeholder(&mut replacements, text)?; let (format_type, _text) = if text.is_empty() { (None, text) @@ -376,7 +409,7 @@ impl FormatSpec { align = align.or(Some(FormatAlign::AfterSign)); } - Ok(FormatSpec { + Ok(FormatSpec::Static(StaticFormatSpec { conversion, fill, align, @@ -386,12 +419,7 @@ impl FormatSpec { grouping_option, precision, format_type, - replacements, - }) - } - - pub fn replacements(&self) -> &[FormatPart] { - return self.replacements.as_slice(); + })) } } @@ -730,7 +758,7 @@ mod tests { #[test] fn test_width_only() { - let expected = Ok(FormatSpec { + let expected = Ok(FormatSpec::Static(StaticFormatSpec { conversion: None, fill: None, align: None, @@ -740,14 +768,13 @@ mod tests { grouping_option: None, precision: None, format_type: None, - replacements: vec![], - }); + })); assert_eq!(FormatSpec::parse("33"), expected); } #[test] fn test_fill_and_width() { - let expected = Ok(FormatSpec { + let expected = Ok(FormatSpec::Static(StaticFormatSpec { conversion: None, fill: Some('<'), align: Some(FormatAlign::Right), @@ -757,44 +784,25 @@ mod tests { grouping_option: None, precision: None, format_type: None, - replacements: vec![], - }); + })); assert_eq!(FormatSpec::parse("<>33"), expected); } #[test] fn test_format_part() { - let expected = Ok(FormatSpec { - conversion: None, - fill: None, - align: None, - sign: None, - alternate_form: false, - width: None, - grouping_option: None, - precision: None, - format_type: None, + let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { replacements: vec![FormatPart::Field { field_name: "x".to_string(), conversion_spec: None, format_spec: String::new(), }], - }); + })); assert_eq!(FormatSpec::parse("{x}"), expected); } #[test] - fn test_format_parts() { - let expected = Ok(FormatSpec { - conversion: None, - fill: None, - align: None, - sign: None, - alternate_form: false, - width: None, - grouping_option: None, - precision: None, - format_type: None, + fn test_dynamic_format_spec() { + let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { replacements: vec![ FormatPart::Field { field_name: "x".to_string(), @@ -812,34 +820,25 @@ mod tests { format_spec: String::new(), }, ], - }); + })); assert_eq!(FormatSpec::parse("{x}{y:<2}{z}"), expected); } #[test] - fn test_format_part_with_others() { - let expected = Ok(FormatSpec { - conversion: None, - fill: None, - align: Some(FormatAlign::Left), - sign: None, - alternate_form: false, - width: Some(20), - grouping_option: None, - precision: None, - format_type: Some(FormatType::Binary), + fn test_dynamic_format_spec_with_others() { + let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { replacements: vec![FormatPart::Field { field_name: "x".to_string(), conversion_spec: None, format_spec: String::new(), }], - }); + })); assert_eq!(FormatSpec::parse("<{x}20b"), expected); } #[test] fn test_all() { - let expected = Ok(FormatSpec { + let expected = Ok(FormatSpec::Static(StaticFormatSpec { conversion: None, fill: Some('<'), align: Some(FormatAlign::Right), @@ -849,8 +848,7 @@ mod tests { grouping_option: Some(FormatGrouping::Comma), precision: Some(11), format_type: Some(FormatType::Binary), - replacements: vec![], - }); + })); assert_eq!(FormatSpec::parse("<>-#23,.11b"), expected); } @@ -966,7 +964,15 @@ mod tests { ); assert_eq!( FormatSpec::parse("{}}"), - Err(FormatSpecError::InvalidFormatType) + // Note this should be an `InvalidFormatType` but we give up + // on all other parsing validation when we see a replacement + Ok(FormatSpec::Dynamic(DynamicFormatSpec { + replacements: vec![FormatPart::Field { + field_name: String::new(), + conversion_spec: None, + format_spec: String::new() + }] + })) ); assert_eq!( FormatSpec::parse("{{x}}"), From 094e51ed3f81433c3b704c180f59e064f70ec39a Mon Sep 17 00:00:00 2001 From: Zanie Date: Tue, 22 Aug 2023 09:36:48 -0500 Subject: [PATCH 2/5] Clean up implementation --- crates/ruff_python_literal/src/format.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/crates/ruff_python_literal/src/format.rs b/crates/ruff_python_literal/src/format.rs index c9d8e2d87c8bb..7778545e828bf 100644 --- a/crates/ruff_python_literal/src/format.rs +++ b/crates/ruff_python_literal/src/format.rs @@ -190,7 +190,7 @@ impl FormatParse for FormatType { /// "hello {name:<20}".format(name="test") /// ``` /// -/// Format specifications allow nested replacements for dynamic formatting. +/// Format specifications allow nested placeholders for dynamic formatting. /// For example, the following statements are equivalent: /// ```python /// "hello {name:{fmt}}".format(name="test", fmt="<20") @@ -198,18 +198,18 @@ impl FormatParse for FormatType { /// "hello {name:<20{empty}>}".format(name="test", empty="") /// ``` /// -/// Nested replacements can include additional format specifiers. +/// Nested placeholders can include additional format specifiers. /// ```python /// "hello {name:{fmt:*>}}".format(name="test", fmt="<20") /// ``` /// -/// However, replacements can only be singly nested (preserving our sanity). +/// However, placeholders can only be singly nested (preserving our sanity). /// A [`FormatSpecError::PlaceholderRecursionExceeded`] will be raised while parsing in this case. /// ```python /// "hello {name:{fmt:{not_allowed}}}".format(name="test", fmt="<20") # Syntax error /// ``` /// -/// When replacements are present in a format specification, parsing will return a [`DynamicFormatSpec`] +/// When placeholders are present in a format specification, parsing will return a [`DynamicFormatSpec`] /// and avoid attempting to parse any of the clauses. Otherwise, a [`StaticFormatSpec`] will be used. #[derive(Debug, PartialEq)] pub enum FormatSpec { @@ -242,7 +242,7 @@ pub struct StaticFormatSpec { #[derive(Debug, PartialEq)] pub struct DynamicFormatSpec { // Ex) `x` and `y` in `'{:*{x},{y}b}'` - pub replacements: Vec, + pub placeholders: Vec, } #[derive(Copy, Clone, Debug, PartialEq, Default)] @@ -329,7 +329,7 @@ fn parse_precision(text: &str) -> Result<(Option, &str), FormatSpecError> /// Parses a format part within a format spec fn parse_nested_placeholder<'a>( - parts: &mut Vec, + placeholders: &mut Vec, text: &'a str, ) -> Result<&'a str, FormatSpecError> { match FormatString::parse_spec(text, AllowPlaceholderNesting::No) { @@ -337,7 +337,7 @@ fn parse_nested_placeholder<'a>( Err(FormatParseError::MissingStartBracket) => Ok(text), Err(err) => Err(FormatSpecError::InvalidPlaceholder(err)), Ok((format_part, text)) => { - parts.push(format_part); + placeholders.push(format_part); Ok(text) } } @@ -372,12 +372,11 @@ fn consume_all_placeholders<'a>( impl FormatSpec { pub fn parse(text: &str) -> Result { - let mut replacements = vec![]; - // get_integer in CPython - let text = consume_all_placeholders(&mut replacements, text)?; + let mut placeholders = vec![]; + let text = consume_all_placeholders(&mut placeholders, text)?; - if !replacements.is_empty() { - return Ok(FormatSpec::Dynamic(DynamicFormatSpec { replacements })); + if !placeholders.is_empty() { + return Ok(FormatSpec::Dynamic(DynamicFormatSpec { placeholders })); } let (conversion, text) = FormatConversion::parse(text); From 35d2440f8ed4afc528d07715f7bb91c557189487 Mon Sep 17 00:00:00 2001 From: Zanie Date: Tue, 22 Aug 2023 09:37:26 -0500 Subject: [PATCH 3/5] Add failing test case for `bad-string-format-character` --- .../pylint/bad_string_format_character.py | 8 +++++--- .../pylint/rules/bad_string_format_character.rs | 4 ++-- ...__PLE1300_bad_string_format_character.py.snap | 12 ++++++------ crates/ruff_python_literal/src/format.rs | 16 ++++++++-------- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py b/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py index 4b2a194abb155..cffe53d7234ea 100644 --- a/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py +++ b/crates/ruff/resources/test/fixtures/pylint/bad_string_format_character.py @@ -15,9 +15,11 @@ "{:s} {:y}".format("hello", "world") # [bad-format-character] "{:*^30s}".format("centered") # OK -"{:{s}}".format("hello", s="s") # OK (nested replacement value not checked) - -"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked) +"{:{s}}".format("hello", s="s") # OK (nested placeholder value not checked) +"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested placeholder format spec checked) +"{0:.{prec}g}".format(1.23, prec=15) # OK (cannot validate after nested placeholder) +"{0:.{foo}{bar}{foobar}y}".format(...) # OK (cannot validate after nested placeholders) +"{0:.{foo}x{bar}y{foobar}g}".format(...) # OK (all nested placeholders are consumed without considering in between chars) ## f-strings diff --git a/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs index 9c9db34f16c2f..cd8002bca0b43 100644 --- a/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs +++ b/crates/ruff/src/rules/pylint/rules/bad_string_format_character.rs @@ -65,8 +65,8 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) { Err(_) => {} Ok(FormatSpec::Static(_)) => {} Ok(FormatSpec::Dynamic(format_spec)) => { - for replacement in format_spec.replacements { - let FormatPart::Field { format_spec, .. } = replacement else { + for placeholder in format_spec.placeholders { + let FormatPart::Field { format_spec, .. } = placeholder else { continue; }; if let Err(FormatSpecError::InvalidFormatType) = diff --git a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap index f34f9cc876358..709c8ec814c89 100644 --- a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap +++ b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE1300_bad_string_format_character.py.snap @@ -51,14 +51,14 @@ bad_string_format_character.py:15:1: PLE1300 Unsupported format character 'y' 17 | "{:*^30s}".format("centered") # OK | -bad_string_format_character.py:20:1: PLE1300 Unsupported format character 'y' +bad_string_format_character.py:19:1: PLE1300 Unsupported format character 'y' | -18 | "{:{s}}".format("hello", s="s") # OK (nested replacement value not checked) -19 | -20 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked) +17 | "{:*^30s}".format("centered") # OK +18 | "{:{s}}".format("hello", s="s") # OK (nested placeholder value not checked) +19 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested placeholder format spec checked) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 -21 | -22 | ## f-strings +20 | "{0:.{prec}g}".format(1.23, prec=15) # OK (cannot validate after nested placeholder) +21 | "{0:.{foo}{bar}{foobar}y}".format(...) # OK (cannot validate after nested placeholders) | diff --git a/crates/ruff_python_literal/src/format.rs b/crates/ruff_python_literal/src/format.rs index 7778545e828bf..1ec6bc7b2a0b9 100644 --- a/crates/ruff_python_literal/src/format.rs +++ b/crates/ruff_python_literal/src/format.rs @@ -333,7 +333,7 @@ fn parse_nested_placeholder<'a>( text: &'a str, ) -> Result<&'a str, FormatSpecError> { match FormatString::parse_spec(text, AllowPlaceholderNesting::No) { - // Not a nested replacement, OK + // Not a nested placeholder, OK Err(FormatParseError::MissingStartBracket) => Ok(text), Err(err) => Err(FormatSpecError::InvalidPlaceholder(err)), Ok((format_part, text)) => { @@ -464,7 +464,7 @@ impl std::fmt::Display for FormatParseError { std::write!(fmt, "unescaped start bracket in literal") } Self::PlaceholderRecursionExceeded => { - std::write!(fmt, "multiply nested replacement not allowed") + std::write!(fmt, "multiply nested placeholder not allowed") } Self::UnknownConversion => { std::write!(fmt, "unknown conversion") @@ -790,7 +790,7 @@ mod tests { #[test] fn test_format_part() { let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { - replacements: vec![FormatPart::Field { + placeholders: vec![FormatPart::Field { field_name: "x".to_string(), conversion_spec: None, format_spec: String::new(), @@ -802,7 +802,7 @@ mod tests { #[test] fn test_dynamic_format_spec() { let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { - replacements: vec![ + placeholders: vec![ FormatPart::Field { field_name: "x".to_string(), conversion_spec: None, @@ -826,7 +826,7 @@ mod tests { #[test] fn test_dynamic_format_spec_with_others() { let expected = Ok(FormatSpec::Dynamic(DynamicFormatSpec { - replacements: vec![FormatPart::Field { + placeholders: vec![FormatPart::Field { field_name: "x".to_string(), conversion_spec: None, format_spec: String::new(), @@ -874,7 +874,7 @@ mod tests { } #[test] - fn test_format_parse_nested_replacement() { + fn test_format_parse_nested_placeholder() { let expected = Ok(FormatString { format_parts: vec![ FormatPart::Literal("abcd".to_owned()), @@ -964,9 +964,9 @@ mod tests { assert_eq!( FormatSpec::parse("{}}"), // Note this should be an `InvalidFormatType` but we give up - // on all other parsing validation when we see a replacement + // on all other parsing validation when we see a placeholder Ok(FormatSpec::Dynamic(DynamicFormatSpec { - replacements: vec![FormatPart::Field { + placeholders: vec![FormatPart::Field { field_name: String::new(), conversion_spec: None, format_spec: String::new() From 53de2d38cb7eb9250f99e44c7a29afbd6600272b Mon Sep 17 00:00:00 2001 From: Zanie Date: Thu, 24 Aug 2023 14:43:45 -0500 Subject: [PATCH 4/5] Remove debug prints --- crates/ruff_python_literal/src/format.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/ruff_python_literal/src/format.rs b/crates/ruff_python_literal/src/format.rs index 1ec6bc7b2a0b9..8d1614f8625af 100644 --- a/crates/ruff_python_literal/src/format.rs +++ b/crates/ruff_python_literal/src/format.rs @@ -357,8 +357,6 @@ fn consume_all_placeholders<'a>( while chars.clone().contains(&'{') { text = parse_nested_placeholder(placeholders, chars.as_str())?; chars = text.chars(); - - dbg!(&text, &chars); // If we did not parse a placeholder, consume a character if placeholder_count == placeholders.len() { chars.next(); @@ -366,7 +364,6 @@ fn consume_all_placeholders<'a>( placeholder_count = placeholders.len(); } } - dbg!(&text); Ok(text) } From 78a965ab195eed0f1a5da856381996afdf0f8f72 Mon Sep 17 00:00:00 2001 From: Zanie Date: Fri, 25 Aug 2023 11:10:34 -0500 Subject: [PATCH 5/5] Review feedback Thanks Charlie :) --- crates/ruff_python_literal/src/format.rs | 47 ++++++++---------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/crates/ruff_python_literal/src/format.rs b/crates/ruff_python_literal/src/format.rs index 8d1614f8625af..01f98587634c9 100644 --- a/crates/ruff_python_literal/src/format.rs +++ b/crates/ruff_python_literal/src/format.rs @@ -327,51 +327,34 @@ fn parse_precision(text: &str) -> Result<(Option, &str), FormatSpecError> }) } -/// Parses a format part within a format spec -fn parse_nested_placeholder<'a>( - placeholders: &mut Vec, - text: &'a str, -) -> Result<&'a str, FormatSpecError> { +/// Parses a placeholder format part within a format specification +fn parse_nested_placeholder(text: &str) -> Result, FormatSpecError> { match FormatString::parse_spec(text, AllowPlaceholderNesting::No) { // Not a nested placeholder, OK - Err(FormatParseError::MissingStartBracket) => Ok(text), + Err(FormatParseError::MissingStartBracket) => Ok(None), Err(err) => Err(FormatSpecError::InvalidPlaceholder(err)), - Ok((format_part, text)) => { - placeholders.push(format_part); - Ok(text) - } + Ok((format_part, text)) => Ok(Some((format_part, text))), } } -/// Parse and consume all placeholders in a format spec -/// This will also consume any intermediate characters such as `x` and `y` in `"x{foo}y{bar}z"` -/// If no placeholders are present, the text will be returned unmodified. -fn consume_all_placeholders<'a>( - placeholders: &mut Vec, - text: &'a str, -) -> Result<&'a str, FormatSpecError> { - let mut chars = text.chars(); - let mut text = text; - let mut placeholder_count = placeholders.len(); - - while chars.clone().contains(&'{') { - text = parse_nested_placeholder(placeholders, chars.as_str())?; - chars = text.chars(); - // If we did not parse a placeholder, consume a character - if placeholder_count == placeholders.len() { - chars.next(); +/// Parse all placeholders in a format specification +/// If no placeholders are present, an empty vector will be returned +fn parse_nested_placeholders(mut text: &str) -> Result, FormatSpecError> { + let mut placeholders = vec![]; + while let Some(bracket) = text.find('{') { + if let Some((format_part, rest)) = parse_nested_placeholder(&text[bracket..])? { + text = rest; + placeholders.push(format_part); } else { - placeholder_count = placeholders.len(); + text = &text[bracket + 1..]; } } - Ok(text) + Ok(placeholders) } impl FormatSpec { pub fn parse(text: &str) -> Result { - let mut placeholders = vec![]; - let text = consume_all_placeholders(&mut placeholders, text)?; - + let placeholders = parse_nested_placeholders(text)?; if !placeholders.is_empty() { return Ok(FormatSpec::Dynamic(DynamicFormatSpec { placeholders })); }