Enable token-based rules on source with syntax errors

astral-sh · Jun 28, 2024 · 1961406 · 1961406
1 parent b28dc9a
commit 1961406
Show file tree

Hide file tree

Showing 27 changed files with 480 additions and 148 deletions.
diff --git a/crates/ruff_linter/resources/test/fixtures/flake8_implicit_str_concat/ISC_syntax_error.py b/crates/ruff_linter/resources/test/fixtures/flake8_implicit_str_concat/ISC_syntax_error.py
@@ -0,0 +1,29 @@
+# The lexer doesn't emit a string token if it's unterminated
+"a" "b
+"a" "b" "c
+"a" """b
+c""" "d
+
+# For f-strings, the `FStringRanges` won't contain the range for
+# unterminated f-strings.
+f"a" f"b
+f"a" f"b" f"c
+f"a" f"""b
+c""" f"d {e
+
+(
+    "a"
+    "b
+    "c"
+    "d"
+)
+
+
+# Triple-quoted strings, if unterminated, consume everything that comes after
+# the opening quote. So, no test code should raise the violation after this.
+(
+    """abc"""
+    f"""def
+    "g" "h"
+    "i" "j"
+)
diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/E30_syntax_error.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/E30_syntax_error.py
@@ -0,0 +1,26 @@
+# Check for E30 errors in a file containing syntax errors with unclosed
+# parenthesis.
+
+def foo[T1, T2():
+    pass
+
+def bar():
+    pass
+
+
+
+class Foo:
+    def __init__(
+        pass
+    def method():
+        pass
+
+foo = Foo(
+
+
+def top(
+    def nested1():
+        pass
+    def nested2():
+        pass
+
diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters_syntax_error.py b/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters_syntax_error.py
@@ -0,0 +1,13 @@
+# These test cases contain syntax errors. The characters within the unterminated
+# strings shouldn't be highlighted.
+
+# Before any syntax error
+b = ''
+# Unterminated string
+b = '
+b = ''
+# Unterminated f-string
+b = f'
+b = f''
+# Implicitly concatenated
+b = '' f'' '
diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs
@@ -93,7 +93,7 @@ pub(crate) fn check_tokens(
         Rule::InvalidCharacterNul,
         Rule::InvalidCharacterZeroWidthSpace,
     ]) {
-        for token in tokens.up_to_first_unknown() {
+        for token in tokens {
             pylint::rules::invalid_string_characters(
                 &mut diagnostics,
                 token.kind(),

diff --git a/crates/ruff_linter/src/directives.rs b/crates/ruff_linter/src/directives.rs
@@ -107,14 +107,9 @@ where
 fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer) -> NoqaMapping {
     let mut string_mappings = Vec::new();
 
-    for token in tokens.up_to_first_unknown() {
+    for token in tokens {
         match token.kind() {
-            TokenKind::EndOfFile => {
-                break;
-            }
-
-            // For multi-line strings, we expect `noqa` directives on the last line of the
-            // string.
+            // For multi-line strings, we expect `noqa` directives on the last line of the string.
             TokenKind::String if token.is_triple_quoted_string() => {
                 if locator.contains_line_break(token.range()) {
                     string_mappings.push(TextRange::new(

diff --git a/crates/ruff_linter/src/doc_lines.rs b/crates/ruff_linter/src/doc_lines.rs
@@ -24,7 +24,7 @@ pub(crate) struct DocLines<'a> {
 impl<'a> DocLines<'a> {
     fn new(tokens: &'a Tokens) -> Self {
         Self {
-            inner: tokens.up_to_first_unknown().iter(),
+            inner: tokens.iter(),
             prev: TextSize::default(),
         }
     }

diff --git a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs
@@ -231,7 +231,7 @@ pub(crate) fn trailing_commas(
     indexer: &Indexer,
 ) {
     let mut fstrings = 0u32;
-    let simple_tokens = tokens.up_to_first_unknown().iter().filter_map(|token| {
+    let simple_tokens = tokens.iter().filter_map(|token| {
         match token.kind() {
             // Completely ignore comments -- they just interfere with the logic.
             TokenKind::Comment => None,
@@ -253,7 +253,11 @@ pub(crate) fn trailing_commas(
                     None
                 }
             }
-            _ => {
+            kind => {
+                if matches!(kind, TokenKind::Newline if fstrings > 0) {
+                    // The parser recovered from an unterminated f-string.
+                    fstrings = 0;
+                }
                 if fstrings == 0 {
                     Some(SimpleToken::from(token.as_tuple()))
                 } else {

diff --git a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
@@ -15,6 +15,14 @@ mod tests {
 
     #[test_case(Rule::SingleLineImplicitStringConcatenation, Path::new("ISC.py"))]
     #[test_case(Rule::MultiLineImplicitStringConcatenation, Path::new("ISC.py"))]
+    #[test_case(
+        Rule::SingleLineImplicitStringConcatenation,
+        Path::new("ISC_syntax_error.py")
+    )]
+    #[test_case(
+        Rule::MultiLineImplicitStringConcatenation,
+        Path::new("ISC_syntax_error.py")
+    )]
     #[test_case(Rule::ExplicitStringConcatenation, Path::new("ISC.py"))]
     fn rules(rule_code: Rule, path: &Path) -> Result<()> {
         let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());

diff --git a/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs b/crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs
@@ -98,7 +98,6 @@ pub(crate) fn implicit(
     indexer: &Indexer,
 ) {
     for (a_token, b_token) in tokens
-        .up_to_first_unknown()
         .iter()
         .filter(|token| {
             token.kind() != TokenKind::Comment

diff --git a/...ts/ruff_linter__rules__flake8_implicit_str_concat__tests__ISC001_ISC_syntax_error.py.snap b/...ts/ruff_linter__rules__flake8_implicit_str_concat__tests__ISC001_ISC_syntax_error.py.snap
@@ -0,0 +1,67 @@
+---
+source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
+---
+ISC_syntax_error.py:3:1: ISC001 [*] Implicitly concatenated string literals on one line
+  |
+1 | # The lexer doesn't emit a string token if it's unterminated
+2 | "a" "b
+3 | "a" "b" "c
+  | ^^^^^^^ ISC001
+4 | "a" """b
+5 | c""" "d
+  |
+  = help: Combine string literals
+
+ℹ Safe fix
+1 1 | # The lexer doesn't emit a string token if it's unterminated
+2 2 | "a" "b
+3   |-"a" "b" "c
+  3 |+"ab" "c
+4 4 | "a" """b
+5 5 | c""" "d
+6 6 | 
+
+ISC_syntax_error.py:4:1: ISC001 Implicitly concatenated string literals on one line
+  |
+2 |   "a" "b
+3 |   "a" "b" "c
+4 | / "a" """b
+5 | | c""" "d
+  | |____^ ISC001
+6 |   
+7 |   # For f-strings, the `FStringRanges` won't contain the range for
+  |
+  = help: Combine string literals
+
+ISC_syntax_error.py:10:1: ISC001 [*] Implicitly concatenated string literals on one line
+   |
+ 8 | # unterminated f-strings.
+ 9 | f"a" f"b
+10 | f"a" f"b" f"c
+   | ^^^^^^^^^ ISC001
+11 | f"a" f"""b
+12 | c""" f"d {e
+   |
+   = help: Combine string literals
+
+ℹ Safe fix
+7  7  | # For f-strings, the `FStringRanges` won't contain the range for
+8  8  | # unterminated f-strings.
+9  9  | f"a" f"b
+10    |-f"a" f"b" f"c
+   10 |+f"ab" f"c
+11 11 | f"a" f"""b
+12 12 | c""" f"d {e
+13 13 | 
+
+ISC_syntax_error.py:11:1: ISC001 Implicitly concatenated string literals on one line
+   |
+ 9 |   f"a" f"b
+10 |   f"a" f"b" f"c
+11 | / f"a" f"""b
+12 | | c""" f"d {e
+   | |____^ ISC001
+13 |   
+14 |   (
+   |
+   = help: Combine string literals
diff --git a/...ts/ruff_linter__rules__flake8_implicit_str_concat__tests__ISC002_ISC_syntax_error.py.snap b/...ts/ruff_linter__rules__flake8_implicit_str_concat__tests__ISC002_ISC_syntax_error.py.snap
@@ -0,0 +1,4 @@
+---
+source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
+---
+
diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs
@@ -191,6 +191,14 @@ mod tests {
     #[test_case(Rule::BlankLineAfterDecorator, Path::new("E30.py"))]
     #[test_case(Rule::BlankLinesAfterFunctionOrClass, Path::new("E30.py"))]
     #[test_case(Rule::BlankLinesBeforeNestedDefinition, Path::new("E30.py"))]
+    #[test_case(Rule::BlankLineBetweenMethods, Path::new("E30_syntax_error.py"))]
+    #[test_case(Rule::BlankLinesTopLevel, Path::new("E30_syntax_error.py"))]
+    #[test_case(Rule::TooManyBlankLines, Path::new("E30_syntax_error.py"))]
+    #[test_case(Rule::BlankLinesAfterFunctionOrClass, Path::new("E30_syntax_error.py"))]
+    #[test_case(
+        Rule::BlankLinesBeforeNestedDefinition,
+        Path::new("E30_syntax_error.py")
+    )]
     fn blank_lines(rule_code: Rule, path: &Path) -> Result<()> {
         let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
         let diagnostics = test_path(

diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs
@@ -1,7 +1,7 @@
 use itertools::Itertools;
 use ruff_notebook::CellOffsets;
-use ruff_python_parser::Token;
 use ruff_python_parser::Tokens;
+use ruff_python_parser::TokensIterWithContext;
 use std::cmp::Ordering;
 use std::iter::Peekable;
 use std::num::NonZeroU32;
@@ -384,7 +384,7 @@ struct LogicalLineInfo {
 /// Iterator that processes tokens until a full logical line (or comment line) is "built".
 /// It then returns characteristics of that logical line (see `LogicalLineInfo`).
 struct LinePreprocessor<'a> {
-    tokens: Peekable<Iter<'a, Token>>,
+    tokens: TokensIterWithContext<'a>,
     locator: &'a Locator<'a>,
     indent_width: IndentWidth,
     /// The start position of the next logical line.
@@ -406,7 +406,7 @@ impl<'a> LinePreprocessor<'a> {
         cell_offsets: Option<&'a CellOffsets>,
     ) -> LinePreprocessor<'a> {
         LinePreprocessor {
-            tokens: tokens.up_to_first_unknown().iter().peekable(),
+            tokens: tokens.iter_with_context(),
             locator,
             line_start: TextSize::new(0),
             max_preceding_blank_lines: BlankLines::Zero,
@@ -428,7 +428,6 @@ impl<'a> Iterator for LinePreprocessor<'a> {
         let mut blank_lines = BlankLines::Zero;
         let mut first_logical_line_token: Option<(LogicalLineKind, TextRange)> = None;
         let mut last_token = TokenKind::EndOfFile;
-        let mut parens = 0u32;
 
         while let Some(token) = self.tokens.next() {
             let (kind, range) = token.as_tuple();
@@ -500,50 +499,40 @@ impl<'a> Iterator for LinePreprocessor<'a> {
                 is_docstring = false;
             }
 
-            match kind {
-                TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => {
-                    parens = parens.saturating_add(1);
-                }
-                TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
-                    parens = parens.saturating_sub(1);
-                }
-                TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => {
-                    let indent_range = TextRange::new(self.line_start, first_token_range.start());
-
-                    let indent_length =
-                        expand_indent(self.locator.slice(indent_range), self.indent_width);
-
-                    self.max_preceding_blank_lines =
-                        self.max_preceding_blank_lines.max(blank_lines);
-
-                    let logical_line = LogicalLineInfo {
-                        kind: logical_line_kind,
-                        first_token_range,
-                        last_token,
-                        logical_line_end: range.end(),
-                        is_comment_only: line_is_comment_only,
-                        is_beginning_of_cell: self.is_beginning_of_cell,
-                        is_docstring,
-                        indent_length,
-                        blank_lines,
-                        preceding_blank_lines: self.max_preceding_blank_lines,
-                    };
-
-                    // Reset the blank lines after a non-comment only line.
-                    if !line_is_comment_only {
-                        self.max_preceding_blank_lines = BlankLines::Zero;
-                    }
+            if kind.is_any_newline() && !self.tokens.in_parenthesized_context() {
+                let indent_range = TextRange::new(self.line_start, first_token_range.start());
+
+                let indent_length =
+                    expand_indent(self.locator.slice(indent_range), self.indent_width);
+
+                self.max_preceding_blank_lines = self.max_preceding_blank_lines.max(blank_lines);
+
+                let logical_line = LogicalLineInfo {
+                    kind: logical_line_kind,
+                    first_token_range,
+                    last_token,
+                    logical_line_end: range.end(),
+                    is_comment_only: line_is_comment_only,
+                    is_beginning_of_cell: self.is_beginning_of_cell,
+                    is_docstring,
+                    indent_length,
+                    blank_lines,
+                    preceding_blank_lines: self.max_preceding_blank_lines,
+                };
 
-                    // Set the start for the next logical line.
-                    self.line_start = range.end();
+                // Reset the blank lines after a non-comment only line.
+                if !line_is_comment_only {
+                    self.max_preceding_blank_lines = BlankLines::Zero;
+                }
 
-                    if self.cell_offsets.is_some() && !line_is_comment_only {
-                        self.is_beginning_of_cell = false;
-                    }
+                // Set the start for the next logical line.
+                self.line_start = range.end();
 
-                    return Some(logical_line);
+                if self.cell_offsets.is_some() && !line_is_comment_only {
+                    self.is_beginning_of_cell = false;
                 }
-                _ => {}
+
+                return Some(logical_line);
             }
 
             if !is_non_logical_token(kind) {