Skip to content

Commit

Permalink
Separate Q003 to accomodate f-string context (#7588)
Browse files Browse the repository at this point in the history
## Summary

This PR updates the `Q003` rule to accommodate the new f-string context.
The logic here takes into consideration the nested f-strings and the
configured target version.

The rule checks for escaped quotes within a string and determines if
they are avoidable or not. It is avoidable if:
1. Outer quote matches the user preferred quote
2. Not a raw string
3. Not a triple-quoted string
4. String content contains the same quote as the outer one
5. String content _doesn't_ contain the opposite quote

For f-string, the way it works is by using a context stack to keep track
of certain things but mainly the text range (`FStringMiddle`) where the
escapes exists. It contains the following:

1. Do we want to check for escaped quotes in the current f-string? This
is required to:
* Preserve the context for `FStringMiddle` tokens where we need to check
for escaped quotes. But, the answer to whether we need to check or not
lies with the `FStringStart` token which contains the quotes. So, when
the context starts, we'll store this information.
  	* Disallow nesting for pre 3.12 target versions
2. Store the `FStringStart` token range. This is required to create the
edit to replace the quote if this f-string contains escaped quote(s).
3. All the `FStringMiddle` ranges where there are escaped quote(s).

## Test Plan

* Add new test cases for nested f-strings.
* Write new tests for old Python versions as existing ones test it on
the latest version by default which is 3.12 as of this writing.
* Verify the snapshots
  • Loading branch information
dhruvmanila committed Sep 28, 2023
1 parent 30bc1af commit 35d47d1
Show file tree
Hide file tree
Showing 14 changed files with 866 additions and 165 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,27 @@
'This is a'
'\'string\''
)

# Same as above, but with f-strings
f'This is a \'string\'' # Q003
f'This is \\ a \\\'string\'' # Q003
f'"This" is a \'string\''
f"This is a 'string'"
f"\"This\" is a 'string'"
fr'This is a \'string\''
fR'This is a \'string\''
foo = (
f'This is a'
f'\'string\'' # Q003
)

# Nested f-strings (Python 3.12+)
#
# The first one is interesting because the fix for it is valid pre 3.12:
#
# f"'foo' {'nested'}"
#
# but as the actual string itself is invalid pre 3.12, we don't catch it.
f'\'foo\' {'nested'}' # Q003
f'\'foo\' {f'nested'}' # Q003
f'\'foo\' {f'\'nested\''} \'\'' # Q003
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,26 @@
"This is a"
"\"string\""
)

# Same as above, but with f-strings
f"This is a \"string\""
f"'This' is a \"string\""
f'This is a "string"'
f'\'This\' is a "string"'
fr"This is a \"string\""
fR"This is a \"string\""
foo = (
f"This is a"
f"\"string\""
)

# Nested f-strings (Python 3.12+)
#
# The first one is interesting because the fix for it is valid pre 3.12:
#
# f'"foo" {"nested"}'
#
# but as the actual string itself is invalid pre 3.12, we don't catch it.
f"\"foo\" {"foo"}" # Q003
f"\"foo\" {f"foo"}" # Q003
f"\"foo\" {f"\"foo\""} \"\"" # Q003
5 changes: 4 additions & 1 deletion crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,14 @@ pub(crate) fn check_tokens(
);
}

if settings.rules.enabled(Rule::AvoidableEscapedQuote) && settings.flake8_quotes.avoid_escape {
flake8_quotes::rules::avoidable_escaped_quote(&mut diagnostics, tokens, locator, settings);
}

if settings.rules.any_enabled(&[
Rule::BadQuotesInlineString,
Rule::BadQuotesMultilineString,
Rule::BadQuotesDocstring,
Rule::AvoidableEscapedQuote,
]) {
flake8_quotes::rules::from_tokens(&mut diagnostics, tokens, locator, settings);
}
Expand Down
39 changes: 39 additions & 0 deletions crates/ruff_linter/src/rules/flake8_quotes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod tests {

use crate::assert_messages;
use crate::registry::Rule;
use crate::settings::types::PythonVersion;
use crate::settings::LinterSettings;
use crate::test::test_path;

Expand Down Expand Up @@ -45,6 +46,44 @@ mod tests {
Ok(())
}

#[test]
fn require_singles_over_doubles_escaped_py311() -> Result<()> {
let diagnostics = test_path(
Path::new("flake8_quotes/doubles_escaped.py"),
&LinterSettings {
flake8_quotes: super::settings::Settings {
inline_quotes: Quote::Single,
multiline_quotes: Quote::Single,
docstring_quotes: Quote::Single,
avoid_escape: true,
},
..LinterSettings::for_rule(Rule::AvoidableEscapedQuote)
.with_target_version(PythonVersion::Py311)
},
)?;
assert_messages!(diagnostics);
Ok(())
}

#[test]
fn require_doubles_over_singles_escaped_py311() -> Result<()> {
let diagnostics = test_path(
Path::new("flake8_quotes/singles_escaped.py"),
&LinterSettings {
flake8_quotes: super::settings::Settings {
inline_quotes: Quote::Double,
multiline_quotes: Quote::Double,
docstring_quotes: Quote::Double,
avoid_escape: true,
},
..LinterSettings::for_rule(Rule::AvoidableEscapedQuote)
.with_target_version(PythonVersion::Py311)
},
)?;
assert_messages!(diagnostics);
Ok(())
}

#[test_case(Path::new("singles.py"))]
#[test_case(Path::new("singles_escaped.py"))]
#[test_case(Path::new("singles_implicit.py"))]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::str::{is_triple_quote, leading_quote};
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::Tok;
use ruff_source_file::Locator;
use ruff_text_size::TextRange;

use crate::lex::docstring_detection::StateMachine;
use crate::registry::AsRule;
use crate::settings::LinterSettings;

/// ## What it does
/// Checks for strings that include escaped quotes, and suggests changing
/// the quote style to avoid the need to escape them.
///
/// ## Why is this bad?
/// It's preferable to avoid escaped quotes in strings. By changing the
/// outer quote style, you can avoid escaping inner quotes.
///
/// ## Example
/// ```python
/// foo = 'bar\'s'
/// ```
///
/// Use instead:
/// ```python
/// foo = "bar's"
/// ```
#[violation]
pub struct AvoidableEscapedQuote;

impl AlwaysAutofixableViolation for AvoidableEscapedQuote {
#[derive_message_formats]
fn message(&self) -> String {
format!("Change outer quotes to avoid escaping inner quotes")
}

fn autofix_title(&self) -> String {
"Change outer quotes to avoid escaping inner quotes".to_string()
}
}

struct FStringContext {
/// Whether to check for escaped quotes in the f-string.
check_for_escaped_quote: bool,
/// The range of the f-string start token.
start_range: TextRange,
/// The ranges of the f-string middle tokens containing escaped quotes.
middle_ranges_with_escapes: Vec<TextRange>,
}

impl FStringContext {
fn new(check_for_escaped_quote: bool, fstring_start_range: TextRange) -> Self {
Self {
check_for_escaped_quote,
start_range: fstring_start_range,
middle_ranges_with_escapes: vec![],
}
}

/// Update the context to not check for escaped quotes, and clear any
/// existing reported ranges.
fn ignore_escaped_quotes(&mut self) {
self.check_for_escaped_quote = false;
self.middle_ranges_with_escapes.clear();
}

fn push_fstring_middle_range(&mut self, range: TextRange) {
self.middle_ranges_with_escapes.push(range);
}
}

/// Q003
pub(crate) fn avoidable_escaped_quote(
diagnostics: &mut Vec<Diagnostic>,
lxr: &[LexResult],
locator: &Locator,
settings: &LinterSettings,
) {
let quotes_settings = &settings.flake8_quotes;
let supports_pep701 = settings.target_version.supports_pep701();
let mut fstrings: Vec<FStringContext> = Vec::new();
let mut state_machine = StateMachine::default();

for &(ref tok, tok_range) in lxr.iter().flatten() {
let is_docstring = state_machine.consume(tok);
if is_docstring {
continue;
}

if !supports_pep701 {
// If this is a string or a start of a f-string which is inside another
// f-string, we won't check for escaped quotes for the entire f-string
// if the target version doesn't support PEP 701. For example:
//
// ```python
// f"\"foo\" {'nested'}"
// # ^^^^^^^^
// # We're here
// ```
//
// If we try to fix the above example, the outer and inner quote
// will be the same which is invalid pre 3.12:
//
// ```python
// f'"foo" {'nested'}"
// ```
if matches!(tok, Tok::String { .. } | Tok::FStringStart) {
if let Some(fstring_context) = fstrings.last_mut() {
fstring_context.ignore_escaped_quotes();
continue;
}
}
}

match tok {
Tok::String {
value: string_contents,
kind,
triple_quoted,
} => {
if kind.is_raw() || *triple_quoted {
continue;
}

// Check if we're using the preferred quotation style.
if !leading_quote(locator.slice(tok_range))
.is_some_and(|text| text.contains(quotes_settings.inline_quotes.as_char()))
{
continue;
}

if string_contents.contains(quotes_settings.inline_quotes.as_char())
&& !string_contents.contains(quotes_settings.inline_quotes.opposite().as_char())
{
let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, tok_range);
if settings.rules.should_fix(diagnostic.kind.rule()) {
let fixed_contents = format!(
"{prefix}{quote}{value}{quote}",
prefix = kind.as_str(),
quote = quotes_settings.inline_quotes.opposite().as_char(),
value = unescape_string(string_contents)
);
diagnostic.set_fix(Fix::automatic(Edit::range_replacement(
fixed_contents,
tok_range,
)));
}
diagnostics.push(diagnostic);
}
}
Tok::FStringStart => {
let text = locator.slice(tok_range);
// Check for escaped quote only if we're using the preferred quotation
// style and it isn't a triple-quoted f-string.
let check_for_escaped_quote = text
.contains(quotes_settings.inline_quotes.as_char())
&& !is_triple_quote(text);
fstrings.push(FStringContext::new(check_for_escaped_quote, tok_range));
}
Tok::FStringMiddle {
value: string_contents,
is_raw,
} if !is_raw => {
let Some(context) = fstrings.last_mut() else {
continue;
};
if !context.check_for_escaped_quote {
continue;
}
if string_contents.contains(quotes_settings.inline_quotes.as_char())
&& !string_contents.contains(quotes_settings.inline_quotes.opposite().as_char())
{
context.push_fstring_middle_range(tok_range);
}
}
Tok::FStringEnd => {
let Some(context) = fstrings.pop() else {
continue;
};
if context.middle_ranges_with_escapes.is_empty() {
// There are no `FStringMiddle` tokens containing any escaped
// quotes.
continue;
}
let mut diagnostic = Diagnostic::new(
AvoidableEscapedQuote,
TextRange::new(context.start_range.start(), tok_range.end()),
);
if settings.rules.should_fix(diagnostic.kind.rule()) {
let fstring_start_edit = Edit::range_replacement(
// No need for `r`/`R` as we don't perform the checks
// for raw strings.
format!("f{}", quotes_settings.inline_quotes.opposite().as_char()),
context.start_range,
);
let fstring_middle_and_end_edits = context
.middle_ranges_with_escapes
.iter()
.map(|&range| {
Edit::range_replacement(unescape_string(locator.slice(range)), range)
})
.chain(std::iter::once(
// `FStringEnd` edit
Edit::range_replacement(
quotes_settings
.inline_quotes
.opposite()
.as_char()
.to_string(),
tok_range,
),
));
diagnostic.set_fix(Fix::automatic_edits(
fstring_start_edit,
fstring_middle_and_end_edits,
));
}
diagnostics.push(diagnostic);
}
_ => {}
}
}
}

fn unescape_string(value: &str) -> String {
let mut fixed_contents = String::with_capacity(value.len());

let mut chars = value.chars().peekable();
while let Some(char) = chars.next() {
if char != '\\' {
fixed_contents.push(char);
continue;
}
// If we're at the end of the line
let Some(next_char) = chars.peek() else {
fixed_contents.push(char);
continue;
};
// Remove quote escape
if matches!(*next_char, '\'' | '"') {
continue;
}
fixed_contents.push(char);
}

fixed_contents
}
Loading

0 comments on commit 35d47d1

Please sign in to comment.