Skip to content

Commit

Permalink
Enable token-based rules on source with syntax errors (#11950)
Browse files Browse the repository at this point in the history
## Summary

This PR updates the linter, specifically the token-based rules, to work
on the tokens that come after a syntax error.

For context, the token-based rules only diagnose the tokens up to the
first lexical error. This PR builds up an error resilience by
introducing a `TokenIterWithContext` which updates the `nesting` level
and tries to reflect it with what the lexer is seeing. This isn't 100%
accurate because if the parser recovered from an unclosed parenthesis in
the middle of the line, the context won't reduce the nesting level until
it sees the newline token at the end of the line.

resolves: #11915

## Test Plan

* Add test cases for a bunch of rules that are affected by this change.
* Run the fuzzer for a long time, making sure to fix any other bugs.
  • Loading branch information
dhruvmanila authored Jul 2, 2024
1 parent 88a4cc4 commit 8f40928
Show file tree
Hide file tree
Showing 29 changed files with 915 additions and 152 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Check for `flake8-commas` violation for a file containing syntax errors.
(
*args
)

def foo[(param1='test', param2='test',):
pass

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# The lexer doesn't emit a string token if it's unterminated
"a" "b
"a" "b" "c
"a" """b
c""" "d

# For f-strings, the `FStringRanges` won't contain the range for
# unterminated f-strings.
f"a" f"b
f"a" f"b" f"c
f"a" f"""b
c""" f"d {e

(
"a"
"b
"c"
"d"
)


# Triple-quoted strings, if unterminated, consume everything that comes after
# the opening quote. So, no test code should raise the violation after this.
(
"""abc"""
f"""def
"g" "h"
"i" "j"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Check for E30 errors in a file containing syntax errors with unclosed
# parenthesis.

def foo[T1, T2():
pass

def bar():
pass



class Foo:
def __init__(
pass
def method():
pass

foo = Foo(


def top(
def nested1():
pass
def nested2():
pass

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# These test cases contain syntax errors. The characters within the unterminated
# strings shouldn't be highlighted.

# Before any syntax error
b = ''
# Unterminated string
b = '
b = ''
# Unterminated f-string
b = f'
b = f''
# Implicitly concatenated
b = '' f'' '
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ pub(crate) fn check_tokens(
Rule::InvalidCharacterNul,
Rule::InvalidCharacterZeroWidthSpace,
]) {
for token in tokens.up_to_first_unknown() {
for token in tokens {
pylint::rules::invalid_string_characters(
&mut diagnostics,
token.kind(),
Expand Down
9 changes: 2 additions & 7 deletions crates/ruff_linter/src/directives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,9 @@ where
fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer) -> NoqaMapping {
let mut string_mappings = Vec::new();

for token in tokens.up_to_first_unknown() {
for token in tokens {
match token.kind() {
TokenKind::EndOfFile => {
break;
}

// For multi-line strings, we expect `noqa` directives on the last line of the
// string.
// For multi-line strings, we expect `noqa` directives on the last line of the string.
TokenKind::String if token.is_triple_quoted_string() => {
if locator.contains_line_break(token.range()) {
string_mappings.push(TextRange::new(
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/doc_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub(crate) struct DocLines<'a> {
impl<'a> DocLines<'a> {
fn new(tokens: &'a Tokens) -> Self {
Self {
inner: tokens.up_to_first_unknown().iter(),
inner: tokens.iter(),
prev: TextSize::default(),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ pub(crate) fn trailing_commas(
indexer: &Indexer,
) {
let mut fstrings = 0u32;
let simple_tokens = tokens.up_to_first_unknown().iter().filter_map(|token| {
let simple_tokens = tokens.iter().filter_map(|token| {
match token.kind() {
// Completely ignore comments -- they just interfere with the logic.
TokenKind::Comment => None,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,30 @@
---
source: crates/ruff_linter/src/rules/flake8_commas/mod.rs
---
COM81_syntax_error.py:2:5: SyntaxError: Starred expression cannot be used here
COM81_syntax_error.py:3:5: SyntaxError: Starred expression cannot be used here
|
1 | (
2 | *args
1 | # Check for `flake8-commas` violation for a file containing syntax errors.
2 | (
3 | *args
| ^
3 | )
4 | )
|

COM81_syntax_error.py:6:9: SyntaxError: Type parameter list cannot be empty
|
4 | )
5 |
6 | def foo[(param1='test', param2='test',):
| ^
7 | pass
|

COM81_syntax_error.py:6:38: COM819 Trailing comma prohibited
|
4 | )
5 |
6 | def foo[(param1='test', param2='test',):
| ^ COM819
7 | pass
|
= help: Remove trailing comma
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ mod tests {

#[test_case(Rule::SingleLineImplicitStringConcatenation, Path::new("ISC.py"))]
#[test_case(Rule::MultiLineImplicitStringConcatenation, Path::new("ISC.py"))]
#[test_case(
Rule::SingleLineImplicitStringConcatenation,
Path::new("ISC_syntax_error.py")
)]
#[test_case(
Rule::MultiLineImplicitStringConcatenation,
Path::new("ISC_syntax_error.py")
)]
#[test_case(Rule::ExplicitStringConcatenation, Path::new("ISC.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ pub(crate) fn implicit(
indexer: &Indexer,
) {
for (a_token, b_token) in tokens
.up_to_first_unknown()
.iter()
.filter(|token| {
token.kind() != TokenKind::Comment
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
---
source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
---
ISC_syntax_error.py:2:5: SyntaxError: missing closing quote in string literal
|
1 | # The lexer doesn't emit a string token if it's unterminated
2 | "a" "b
| ^
3 | "a" "b" "c
4 | "a" """b
|

ISC_syntax_error.py:2:7: SyntaxError: Expected a statement
|
1 | # The lexer doesn't emit a string token if it's unterminated
2 | "a" "b
| ^
3 | "a" "b" "c
4 | "a" """b
5 | c""" "d
|

ISC_syntax_error.py:3:1: ISC001 Implicitly concatenated string literals on one line
|
1 | # The lexer doesn't emit a string token if it's unterminated
2 | "a" "b
3 | "a" "b" "c
| ^^^^^^^ ISC001
4 | "a" """b
5 | c""" "d
|
= help: Combine string literals

ISC_syntax_error.py:3:9: SyntaxError: missing closing quote in string literal
|
1 | # The lexer doesn't emit a string token if it's unterminated
2 | "a" "b
3 | "a" "b" "c
| ^
4 | "a" """b
5 | c""" "d
|

ISC_syntax_error.py:3:11: SyntaxError: Expected a statement
|
1 | # The lexer doesn't emit a string token if it's unterminated
2 | "a" "b
3 | "a" "b" "c
| ^
4 | "a" """b
5 | c""" "d
|

ISC_syntax_error.py:4:1: ISC001 Implicitly concatenated string literals on one line
|
2 | "a" "b
3 | "a" "b" "c
4 | / "a" """b
5 | | c""" "d
| |____^ ISC001
6 |
7 | # For f-strings, the `FStringRanges` won't contain the range for
|
= help: Combine string literals

ISC_syntax_error.py:5:6: SyntaxError: missing closing quote in string literal
|
3 | "a" "b" "c
4 | "a" """b
5 | c""" "d
| ^
6 |
7 | # For f-strings, the `FStringRanges` won't contain the range for
|

ISC_syntax_error.py:5:8: SyntaxError: Expected a statement
|
3 | "a" "b" "c
4 | "a" """b
5 | c""" "d
| ^
6 |
7 | # For f-strings, the `FStringRanges` won't contain the range for
8 | # unterminated f-strings.
|

ISC_syntax_error.py:9:8: SyntaxError: f-string: unterminated string
|
7 | # For f-strings, the `FStringRanges` won't contain the range for
8 | # unterminated f-strings.
9 | f"a" f"b
| ^
10 | f"a" f"b" f"c
11 | f"a" f"""b
|

ISC_syntax_error.py:9:9: SyntaxError: Expected FStringEnd, found newline
|
7 | # For f-strings, the `FStringRanges` won't contain the range for
8 | # unterminated f-strings.
9 | f"a" f"b
| ^
10 | f"a" f"b" f"c
11 | f"a" f"""b
12 | c""" f"d {e
|

ISC_syntax_error.py:10:1: ISC001 Implicitly concatenated string literals on one line
|
8 | # unterminated f-strings.
9 | f"a" f"b
10 | f"a" f"b" f"c
| ^^^^^^^^^ ISC001
11 | f"a" f"""b
12 | c""" f"d {e
|
= help: Combine string literals

ISC_syntax_error.py:10:13: SyntaxError: f-string: unterminated string
|
8 | # unterminated f-strings.
9 | f"a" f"b
10 | f"a" f"b" f"c
| ^
11 | f"a" f"""b
12 | c""" f"d {e
|

ISC_syntax_error.py:10:14: SyntaxError: Expected FStringEnd, found newline
|
8 | # unterminated f-strings.
9 | f"a" f"b
10 | f"a" f"b" f"c
| ^
11 | f"a" f"""b
12 | c""" f"d {e
|

ISC_syntax_error.py:11:1: ISC001 Implicitly concatenated string literals on one line
|
9 | f"a" f"b
10 | f"a" f"b" f"c
11 | / f"a" f"""b
12 | | c""" f"d {e
| |____^ ISC001
13 |
14 | (
|
= help: Combine string literals

ISC_syntax_error.py:16:5: SyntaxError: missing closing quote in string literal
|
14 | (
15 | "a"
16 | "b
| ^
17 | "c"
18 | "d"
|

ISC_syntax_error.py:26:9: SyntaxError: f-string: unterminated triple-quoted string
|
24 | (
25 | """abc"""
26 | f"""def
| ^
27 | "g" "h"
28 | "i" "j"
|

ISC_syntax_error.py:30:1: SyntaxError: unexpected EOF while parsing
|
28 | "i" "j"
29 | )
|

ISC_syntax_error.py:30:1: SyntaxError: f-string: unterminated string
|
28 | "i" "j"
29 | )
|
Loading

0 comments on commit 8f40928

Please sign in to comment.