Skip to content

Commit

Permalink
Avoid syntax error via invalid ur string prefix (astral-sh#8971)
Browse files Browse the repository at this point in the history
## Summary

If a string has a Unicode prefix, we can't add the `r` prefix on top of
that -- we need to remove and replace it. (The Unicode prefix is
redundant anyway in Python 3.)

Closes astral-sh#8967.
  • Loading branch information
charliermarsh authored Dec 2, 2023
1 parent 3fbabfe commit 1dda669
Show file tree
Hide file tree
Showing 8 changed files with 303 additions and 401 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@ def f():
''' # noqa

regex = '\\\_'

#: W605:1:7
u'foo\ bar'
42 changes: 28 additions & 14 deletions crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,54 @@
# Same as `W605_0.py` but using f-strings instead.

#: W605:1:10
regex = '\.png$'
regex = f'\.png$'

#: W605:2:1
regex = '''
regex = f'''
\.png$
'''

#: W605:2:6
f(
'\_'
f'\_'
)

#: W605:4:6
"""
f"""
multi-line
literal
with \_ somewhere
in the middle
"""

#: W605:1:38
value = f'new line\nand invalid escape \_ here'

def f():
#: W605:1:11
return'\.png$'

#: Okay
regex = r'\.png$'
regex = '\\.png$'
regex = r'''
regex = fr'\.png$'
regex = f'\\.png$'
regex = fr'''
\.png$
'''
regex = r'''
regex = fr'''
\\.png$
'''
s = '\\'
regex = '\w' # noqa
regex = '''
s = f'\\'
regex = f'\w' # noqa
regex = f'''
\w
''' # noqa

regex = f'\\\_'
value = f'\{{1}}'
value = f'\{1}'
value = f'{1:\}'
value = f"{f"\{1}"}"
value = rf"{f"\{1}"}"

# Okay
value = rf'\{{1}}'
value = rf'\{1}'
value = rf'{1:\}'
value = f"{rf"\{1}"}"
54 changes: 0 additions & 54 deletions crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py

This file was deleted.

1 change: 0 additions & 1 deletion crates/ruff_linter/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ mod tests {
#[test_case(Rule::BlankLineWithWhitespace, Path::new("W29.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_0.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))]
#[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501.py"))]
#[test_case(Rule::LineTooLong, Path::new("E501_3.py"))]
#[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ use memchr::memchr_iter;
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_index::Indexer;
use ruff_python_parser::Tok;
use ruff_python_parser::{StringKind, Tok};
use ruff_source_file::Locator;
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

use crate::fix::edits::pad_start;

Expand Down Expand Up @@ -58,18 +58,6 @@ impl AlwaysFixableViolation for InvalidEscapeSequence {
}
}

#[derive(Debug, PartialEq, Eq)]
enum FixTitle {
AddBackslash,
UseRawStringLiteral,
}

#[derive(Debug)]
struct InvalidEscapeChar {
ch: char,
range: TextRange,
}

/// W605
pub(crate) fn invalid_escape_sequence(
diagnostics: &mut Vec<Diagnostic>,
Expand Down Expand Up @@ -195,41 +183,77 @@ pub(crate) fn invalid_escape_sequence(
if contains_valid_escape_sequence {
// Escape with backslash.
for invalid_escape_char in &invalid_escape_chars {
let diagnostic = Diagnostic::new(
let mut diagnostic = Diagnostic::new(
InvalidEscapeSequence {
ch: invalid_escape_char.ch,
fix_title: FixTitle::AddBackslash,
},
invalid_escape_char.range,
)
.with_fix(Fix::safe_edit(Edit::insertion(
invalid_escape_char.range(),
);
diagnostic.set_fix(Fix::safe_edit(Edit::insertion(
r"\".to_string(),
invalid_escape_char.range.start() + TextSize::from(1),
invalid_escape_char.start() + TextSize::from(1),
)));
invalid_escape_sequence.push(diagnostic);
}
} else {
// Turn into raw string.
for invalid_escape_char in &invalid_escape_chars {
let diagnostic = Diagnostic::new(
let mut diagnostic = Diagnostic::new(
InvalidEscapeSequence {
ch: invalid_escape_char.ch,
fix_title: FixTitle::UseRawStringLiteral,
},
invalid_escape_char.range,
)
.with_fix(
// If necessary, add a space between any leading keyword (`return`, `yield`,
// `assert`, etc.) and the string. For example, `return"foo"` is valid, but
// `returnr"foo"` is not.
Fix::safe_edit(Edit::insertion(
pad_start("r".to_string(), string_start_location, locator),
string_start_location,
)),
invalid_escape_char.range(),
);

if matches!(
token,
Tok::String {
kind: StringKind::Unicode,
..
}
) {
// Replace the Unicode prefix with `r`.
diagnostic.set_fix(Fix::safe_edit(Edit::replacement(
"r".to_string(),
string_start_location,
string_start_location + TextSize::from(1),
)));
} else {
// Insert the `r` prefix.
diagnostic.set_fix(
// If necessary, add a space between any leading keyword (`return`, `yield`,
// `assert`, etc.) and the string. For example, `return"foo"` is valid, but
// `returnr"foo"` is not.
Fix::safe_edit(Edit::insertion(
pad_start("r".to_string(), string_start_location, locator),
string_start_location,
)),
);
}

invalid_escape_sequence.push(diagnostic);
}
}

diagnostics.extend(invalid_escape_sequence);
}

#[derive(Debug, PartialEq, Eq)]
enum FixTitle {
AddBackslash,
UseRawStringLiteral,
}

#[derive(Debug)]
struct InvalidEscapeChar {
ch: char,
range: TextRange,
}

impl Ranged for InvalidEscapeChar {
fn range(&self) -> TextRange {
self.range
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ W605_0.py:45:12: W605 [*] Invalid escape sequence: `\_`
44 |
45 | regex = '\\\_'
| ^^ W605
46 |
47 | #: W605:1:7
|
= help: Add backslash to escape sequence

Expand All @@ -134,5 +136,23 @@ W605_0.py:45:12: W605 [*] Invalid escape sequence: `\_`
44 44 |
45 |-regex = '\\\_'
45 |+regex = '\\\\_'
46 46 |
47 47 | #: W605:1:7
48 48 | u'foo\ bar'

W605_0.py:48:6: W605 [*] Invalid escape sequence: `\ `
|
47 | #: W605:1:7
48 | u'foo\ bar'
| ^^ W605
|
= help: Use a raw string literal

Safe fix
45 45 | regex = '\\\_'
46 46 |
47 47 | #: W605:1:7
48 |-u'foo\ bar'
48 |+r'foo\ bar'


Loading

0 comments on commit 1dda669

Please sign in to comment.