Enable token-based rules on source with syntax errors

astral-sh · Jun 20, 2024 · b7134c9 · b7134c9
1 parent a26bd01
commit b7134c9
Show file tree

Hide file tree

Showing 27 changed files with 488 additions and 106 deletions.
diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_implicit_str_concat/ISC_syntax_error.py b/crates/ruff_linter/resources/test/fixtures/flake8_implicit_str_concat/ISC_syntax_error.py
@@ -0,0 +1,29 @@
+# The lexer doesn't emit a string token if it's unterminated
+"a" "b
+"a" "b" "c
+"a" """b
+c""" "d
+
+# For f-strings, the `FStringRanges` won't contain the range for
+# unterminated f-strings.
+f"a" f"b
+f"a" f"b" f"c
+f"a" f"""b
+c""" f"d {e
+
+(
+    "a"
+    "b
+    "c"
+    "d"
+)
+
+
+# Triple-quoted strings, if unterminated, consume everything that comes after
+# the opening quote. So, no test code should raise the violation after this.
+(
+    """abc"""
+    f"""def
+    "g" "h"
+    "i" "j"
+)
diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E30_syntax_error.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E30_syntax_error.py
@@ -0,0 +1,26 @@
+# Check for E30 errors in a file containing syntax errors with unclosed
+# parenthesis.
+
+def foo[T1, T2():
+    pass
+
+def bar():
+    pass
+
+
+
+class Foo:
+    def __init__(
+        pass
+    def method():
+        pass
+
+foo = Foo(
+
+
+def top(
+    def nested1():
+        pass
+    def nested2():
+        pass
+
diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters_syntax_error.py b/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters_syntax_error.py
@@ -0,0 +1,13 @@
+# These test cases contain syntax errors. The characters within the unterminated
+# strings shouldn't be highlighted.
+
+# Before any syntax error
+b = ''
+# Unterminated string
+b = '
+b = ''
+# Unterminated f-string
+b = f'
+b = f''
+# Implicitly concatenated
+b = '' f'' '
diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs
@@ -93,7 +93,7 @@ pub(crate) fn check_tokens(
         Rule::InvalidCharacterNul,
         Rule::InvalidCharacterZeroWidthSpace,
     ]) {
-        for token in tokens.up_to_first_unknown() {
+        for token in tokens {
             pylint::rules::invalid_string_characters(
                 &mut diagnostics,
                 token.kind(),

diff --git a/crates/ruff_linter/src/directives.rs b/crates/ruff_linter/src/directives.rs
@@ -107,7 +107,7 @@ where
 fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer) -> NoqaMapping {
     let mut string_mappings = Vec::new();
 
-    for token in tokens.up_to_first_unknown() {
+    for token in tokens {
         match token.kind() {
             TokenKind::EndOfFile => {
                 break;

diff --git a/crates/ruff_linter/src/doc_lines.rs b/crates/ruff_linter/src/doc_lines.rs
@@ -24,7 +24,7 @@ pub(crate) struct DocLines<'a> {
 impl<'a> DocLines<'a> {
     fn new(tokens: &'a Tokens) -> Self {
         Self {
-            inner: tokens.up_to_first_unknown().iter(),
+            inner: tokens.iter(),
             prev: TextSize::default(),
         }
     }

diff --git a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs
@@ -231,7 +231,7 @@ pub(crate) fn trailing_commas(
     indexer: &Indexer,
 ) {
     let mut fstrings = 0u32;
-    let simple_tokens = tokens.up_to_first_unknown().iter().filter_map(|token| {
+    let simple_tokens = tokens.iter().filter_map(|token| {
         match token.kind() {
             // Completely ignore comments -- they just interfere with the logic.
             TokenKind::Comment => None,
@@ -253,7 +253,11 @@ pub(crate) fn trailing_commas(
                     None
                 }
             }
-            _ => {
+            kind => {
+                if matches!(kind, TokenKind::Newline if fstrings > 0) {
+                    // The parser recovered from an unterminated f-string.
+                    fstrings = 0;
+                }
                 if fstrings == 0 {
                     Some(SimpleToken::from(token.as_tuple()))
                 } else {

diff --git a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
@@ -15,6 +15,14 @@ mod tests {
 
     #[test_case(Rule::SingleLineImplicitStringConcatenation, Path::new("ISC.py"))]
     #[test_case(Rule::MultiLineImplicitStringConcatenation, Path::new("ISC.py"))]
+    #[test_case(
+        Rule::SingleLineImplicitStringConcatenation,
+        Path::new("ISC_syntax_error.py")
+    )]
+    #[test_case(
+        Rule::MultiLineImplicitStringConcatenation,
+        Path::new("ISC_syntax_error.py")
+    )]
     #[test_case(Rule::ExplicitStringConcatenation, Path::new("ISC.py"))]
     fn rules(rule_code: Rule, path: &Path) -> Result<()> {
         let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());

diff --git a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs
@@ -98,7 +98,6 @@ pub(crate) fn implicit(
     indexer: &Indexer,
 ) {
     for (a_token, b_token) in tokens
-        .up_to_first_unknown()
         .iter()
         .filter(|token| {
             token.kind() != TokenKind::Comment

diff --git a/...ts/ruff_linter__rules__flake8_implicit_str_concat__tests__ISC001_ISC_syntax_error.py.snap b/...ts/ruff_linter__rules__flake8_implicit_str_concat__tests__ISC001_ISC_syntax_error.py.snap
@@ -0,0 +1,67 @@
+---
+source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
+---
+ISC_syntax_error.py:3:1: ISC001 [*] Implicitly concatenated string literals on one line
+  |
+1 | # The lexer doesn't emit a string token if it's unterminated
+2 | "a" "b
+3 | "a" "b" "c
+  | ^^^^^^^ ISC001
+4 | "a" """b
+5 | c""" "d
+  |
+  = help: Combine string literals
+
+ℹ Safe fix
+1 1 | # The lexer doesn't emit a string token if it's unterminated
+2 2 | "a" "b
+3   |-"a" "b" "c
+  3 |+"ab" "c
+4 4 | "a" """b
+5 5 | c""" "d
+6 6 | 
+
+ISC_syntax_error.py:4:1: ISC001 Implicitly concatenated string literals on one line
+  |
+2 |   "a" "b
+3 |   "a" "b" "c
+4 | / "a" """b
+5 | | c""" "d
+  | |____^ ISC001
+6 |   
+7 |   # For f-strings, the `FStringRanges` won't contain the range for
+  |
+  = help: Combine string literals
+
+ISC_syntax_error.py:10:1: ISC001 [*] Implicitly concatenated string literals on one line
+   |
+ 8 | # unterminated f-strings.
+ 9 | f"a" f"b
+10 | f"a" f"b" f"c
+   | ^^^^^^^^^ ISC001
+11 | f"a" f"""b
+12 | c""" f"d {e
+   |
+   = help: Combine string literals
+
+ℹ Safe fix
+7  7  | # For f-strings, the `FStringRanges` won't contain the range for
+8  8  | # unterminated f-strings.
+9  9  | f"a" f"b
+10    |-f"a" f"b" f"c
+   10 |+f"ab" f"c
+11 11 | f"a" f"""b
+12 12 | c""" f"d {e
+13 13 | 
+
+ISC_syntax_error.py:11:1: ISC001 Implicitly concatenated string literals on one line
+   |
+ 9 |   f"a" f"b
+10 |   f"a" f"b" f"c
+11 | / f"a" f"""b
+12 | | c""" f"d {e
+   | |____^ ISC001
+13 |   
+14 |   (
+   |
+   = help: Combine string literals
diff --git a/...ts/ruff_linter__rules__flake8_implicit_str_concat__tests__ISC002_ISC_syntax_error.py.snap b/...ts/ruff_linter__rules__flake8_implicit_str_concat__tests__ISC002_ISC_syntax_error.py.snap
@@ -0,0 +1,4 @@
+---
+source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
+---
+
diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs
@@ -195,6 +195,14 @@ mod tests {
     #[test_case(Rule::BlankLineAfterDecorator, Path::new("E30.py"))]
     #[test_case(Rule::BlankLinesAfterFunctionOrClass, Path::new("E30.py"))]
     #[test_case(Rule::BlankLinesBeforeNestedDefinition, Path::new("E30.py"))]
+    #[test_case(Rule::BlankLineBetweenMethods, Path::new("E30_syntax_error.py"))]
+    #[test_case(Rule::BlankLinesTopLevel, Path::new("E30_syntax_error.py"))]
+    #[test_case(Rule::TooManyBlankLines, Path::new("E30_syntax_error.py"))]
+    #[test_case(Rule::BlankLinesAfterFunctionOrClass, Path::new("E30_syntax_error.py"))]
+    #[test_case(
+        Rule::BlankLinesBeforeNestedDefinition,
+        Path::new("E30_syntax_error.py")
+    )]
     fn blank_lines(rule_code: Rule, path: &Path) -> Result<()> {
         let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
         let diagnostics = test_path(

diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs
@@ -405,7 +405,7 @@ impl<'a> LinePreprocessor<'a> {
         cell_offsets: Option<&'a CellOffsets>,
     ) -> LinePreprocessor<'a> {
         LinePreprocessor {
-            tokens: tokens.up_to_first_unknown().iter().peekable(),
+            tokens: tokens.iter().peekable(),
             locator,
             line_start: TextSize::new(0),
             max_preceding_blank_lines: BlankLines::Zero,
@@ -506,43 +506,47 @@ impl<'a> Iterator for LinePreprocessor<'a> {
                 TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
                     parens = parens.saturating_sub(1);
                 }
-                TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => {
-                    let indent_range = TextRange::new(self.line_start, first_token_range.start());
-
-                    let indent_length =
-                        expand_indent(self.locator.slice(indent_range), self.indent_width);
-
-                    self.max_preceding_blank_lines =
-                        self.max_preceding_blank_lines.max(blank_lines);
-
-                    let logical_line = LogicalLineInfo {
-                        kind: logical_line_kind,
-                        first_token_range,
-                        last_token,
-                        logical_line_end: range.end(),
-                        is_comment_only: line_is_comment_only,
-                        is_beginning_of_cell: self.is_beginning_of_cell,
-                        is_docstring,
-                        indent_length,
-                        blank_lines,
-                        preceding_blank_lines: self.max_preceding_blank_lines,
-                    };
+                TokenKind::Newline if parens > 0 => {
+                    // The parser recovered from an unclosed parenthesis.
+                    parens = 0;
+                }
+                _ => {}
+            }
 
-                    // Reset the blank lines after a non-comment only line.
-                    if !line_is_comment_only {
-                        self.max_preceding_blank_lines = BlankLines::Zero;
-                    }
+            if matches!(kind, TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0) {
+                let indent_range = TextRange::new(self.line_start, first_token_range.start());
+
+                let indent_length =
+                    expand_indent(self.locator.slice(indent_range), self.indent_width);
+
+                self.max_preceding_blank_lines = self.max_preceding_blank_lines.max(blank_lines);
+
+                let logical_line = LogicalLineInfo {
+                    kind: logical_line_kind,
+                    first_token_range,
+                    last_token,
+                    logical_line_end: range.end(),
+                    is_comment_only: line_is_comment_only,
+                    is_beginning_of_cell: self.is_beginning_of_cell,
+                    is_docstring,
+                    indent_length,
+                    blank_lines,
+                    preceding_blank_lines: self.max_preceding_blank_lines,
+                };
 
-                    // Set the start for the next logical line.
-                    self.line_start = range.end();
+                // Reset the blank lines after a non-comment only line.
+                if !line_is_comment_only {
+                    self.max_preceding_blank_lines = BlankLines::Zero;
+                }
 
-                    if self.cell_offsets.is_some() && !line_is_comment_only {
-                        self.is_beginning_of_cell = false;
-                    }
+                // Set the start for the next logical line.
+                self.line_start = range.end();
 
-                    return Some(logical_line);
+                if self.cell_offsets.is_some() && !line_is_comment_only {
+                    self.is_beginning_of_cell = false;
                 }
-                _ => {}
+
+                return Some(logical_line);
             }
 
             if !kind.is_trivia() {

diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/compound_statements.rs
@@ -134,7 +134,7 @@ pub(crate) fn compound_statements(
     let mut indent = 0u32;
 
     // Use an iterator to allow passing it around.
-    let mut token_iter = tokens.up_to_first_unknown().iter();
+    let mut token_iter = tokens.iter();
 
     loop {
         let Some(token) = token_iter.next() else {
@@ -148,6 +148,10 @@ pub(crate) fn compound_statements(
             TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => {
                 nesting = nesting.saturating_sub(1);
             }
+            TokenKind::Newline if nesting > 0 => {
+                // The parser recovered from an unclosed parenthesis.
+                nesting = 0;
+            }
             TokenKind::Ellipsis => {
                 if allow_ellipsis {
                     allow_ellipsis = false;

diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/logical_lines/mod.rs
@@ -65,7 +65,7 @@ impl<'a> LogicalLines<'a> {
         let mut builder = LogicalLinesBuilder::with_capacity(tokens.len());
         let mut parens = 0u32;
 
-        for token in tokens.up_to_first_unknown() {
+        for token in tokens {
             builder.push_token(token.kind(), token.range());
 
             match token.kind() {
@@ -75,6 +75,11 @@ impl<'a> LogicalLines<'a> {
                 TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
                     parens = parens.saturating_sub(1);
                 }
+                TokenKind::Newline if parens > 0 => {
+                    // The parser recovered from an unclosed parenthesis.
+                    parens = 0;
+                    builder.finish_line();
+                }
                 TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => {
                     builder.finish_line();
                 }

diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/too_many_newlines_at_end_of_file.rs
@@ -60,7 +60,7 @@ pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec<Diagnostic>
     let mut end: Option<TextSize> = None;
 
     // Count the number of trailing newlines.
-    for token in tokens.up_to_first_unknown().iter().rev() {
+    for token in tokens.iter().rev() {
         match token.kind() {
             TokenKind::NonLogicalNewline | TokenKind::Newline => {
                 if num_trailing_newlines == 0 {

diff --git a/...codestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E301_E30_syntax_error.py.snap b/...codestyle/snapshots/ruff_linter__rules__pycodestyle__tests__E301_E30_syntax_error.py.snap
@@ -0,0 +1,21 @@
+---
+source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
+---
+E30_syntax_error.py:15:5: E301 [*] Expected 1 blank line, found 0
+   |
+13 |     def __init__(
+14 |         pass
+15 |     def method():
+   |     ^^^ E301
+16 |         pass
+   |
+   = help: Add missing blank line
+
+ℹ Safe fix
+12 12 | class Foo:
+13 13 |     def __init__(
+14 14 |         pass
+   15 |+
+15 16 |     def method():
+16 17 |         pass
+17 18 |