Skip to content

Commit

Permalink
Enable token-based rules on source with syntax errors
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Jun 20, 2024
1 parent a26bd01 commit b7134c9
Show file tree
Hide file tree
Showing 27 changed files with 488 additions and 106 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# The lexer doesn't emit a string token if it's unterminated
"a" "b
"a" "b" "c
"a" """b
c""" "d

# For f-strings, the `FStringRanges` won't contain the range for
# unterminated f-strings.
f"a" f"b
f"a" f"b" f"c
f"a" f"""b
c""" f"d {e

(
"a"
"b
"c"
"d"
)


# Triple-quoted strings, if unterminated, consume everything that comes after
# the opening quote. So, no test code should raise the violation after this.
(
"""abc"""
f"""def
"g" "h"
"i" "j"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Check for E30 errors in a file containing syntax errors with unclosed
# parenthesis.

def foo[T1, T2():
pass

def bar():
pass



class Foo:
def __init__(
pass
def method():
pass

foo = Foo(


def top(
def nested1():
pass
def nested2():
pass

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# These test cases contain syntax errors. The characters within the unterminated
# strings shouldn't be highlighted.

# Before any syntax error
b = ''
# Unterminated string
b = '
b = ''
# Unterminated f-string
b = f'
b = f''
# Implicitly concatenated
b = '' f'' '
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ pub(crate) fn check_tokens(
Rule::InvalidCharacterNul,
Rule::InvalidCharacterZeroWidthSpace,
]) {
for token in tokens.up_to_first_unknown() {
for token in tokens {
pylint::rules::invalid_string_characters(
&mut diagnostics,
token.kind(),
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/directives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ where
fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer) -> NoqaMapping {
let mut string_mappings = Vec::new();

for token in tokens.up_to_first_unknown() {
for token in tokens {
match token.kind() {
TokenKind::EndOfFile => {
break;
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/doc_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub(crate) struct DocLines<'a> {
impl<'a> DocLines<'a> {
fn new(tokens: &'a Tokens) -> Self {
Self {
inner: tokens.up_to_first_unknown().iter(),
inner: tokens.iter(),
prev: TextSize::default(),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ pub(crate) fn trailing_commas(
indexer: &Indexer,
) {
let mut fstrings = 0u32;
let simple_tokens = tokens.up_to_first_unknown().iter().filter_map(|token| {
let simple_tokens = tokens.iter().filter_map(|token| {
match token.kind() {
// Completely ignore comments -- they just interfere with the logic.
TokenKind::Comment => None,
Expand All @@ -253,7 +253,11 @@ pub(crate) fn trailing_commas(
None
}
}
_ => {
kind => {
if matches!(kind, TokenKind::Newline if fstrings > 0) {
// The parser recovered from an unterminated f-string.
fstrings = 0;
}
if fstrings == 0 {
Some(SimpleToken::from(token.as_tuple()))
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ mod tests {

#[test_case(Rule::SingleLineImplicitStringConcatenation, Path::new("ISC.py"))]
#[test_case(Rule::MultiLineImplicitStringConcatenation, Path::new("ISC.py"))]
#[test_case(
Rule::SingleLineImplicitStringConcatenation,
Path::new("ISC_syntax_error.py")
)]
#[test_case(
Rule::MultiLineImplicitStringConcatenation,
Path::new("ISC_syntax_error.py")
)]
#[test_case(Rule::ExplicitStringConcatenation, Path::new("ISC.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ pub(crate) fn implicit(
indexer: &Indexer,
) {
for (a_token, b_token) in tokens
.up_to_first_unknown()
.iter()
.filter(|token| {
token.kind() != TokenKind::Comment
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
---
source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
---
ISC_syntax_error.py:3:1: ISC001 [*] Implicitly concatenated string literals on one line
|
1 | # The lexer doesn't emit a string token if it's unterminated
2 | "a" "b
3 | "a" "b" "c
| ^^^^^^^ ISC001
4 | "a" """b
5 | c""" "d
|
= help: Combine string literals

Safe fix
1 1 | # The lexer doesn't emit a string token if it's unterminated
2 2 | "a" "b
3 |-"a" "b" "c
3 |+"ab" "c
4 4 | "a" """b
5 5 | c""" "d
6 6 |

ISC_syntax_error.py:4:1: ISC001 Implicitly concatenated string literals on one line
|
2 | "a" "b
3 | "a" "b" "c
4 | / "a" """b
5 | | c""" "d
| |____^ ISC001
6 |
7 | # For f-strings, the `FStringRanges` won't contain the range for
|
= help: Combine string literals

ISC_syntax_error.py:10:1: ISC001 [*] Implicitly concatenated string literals on one line
|
8 | # unterminated f-strings.
9 | f"a" f"b
10 | f"a" f"b" f"c
| ^^^^^^^^^ ISC001
11 | f"a" f"""b
12 | c""" f"d {e
|
= help: Combine string literals

Safe fix
7 7 | # For f-strings, the `FStringRanges` won't contain the range for
8 8 | # unterminated f-strings.
9 9 | f"a" f"b
10 |-f"a" f"b" f"c
10 |+f"ab" f"c
11 11 | f"a" f"""b
12 12 | c""" f"d {e
13 13 |

ISC_syntax_error.py:11:1: ISC001 Implicitly concatenated string literals on one line
|
9 | f"a" f"b
10 | f"a" f"b" f"c
11 | / f"a" f"""b
12 | | c""" f"d {e
| |____^ ISC001
13 |
14 | (
|
= help: Combine string literals
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
---

8 changes: 8 additions & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ mod tests {
#[test_case(Rule::BlankLineAfterDecorator, Path::new("E30.py"))]
#[test_case(Rule::BlankLinesAfterFunctionOrClass, Path::new("E30.py"))]
#[test_case(Rule::BlankLinesBeforeNestedDefinition, Path::new("E30.py"))]
#[test_case(Rule::BlankLineBetweenMethods, Path::new("E30_syntax_error.py"))]
#[test_case(Rule::BlankLinesTopLevel, Path::new("E30_syntax_error.py"))]
#[test_case(Rule::TooManyBlankLines, Path::new("E30_syntax_error.py"))]
#[test_case(Rule::BlankLinesAfterFunctionOrClass, Path::new("E30_syntax_error.py"))]
#[test_case(
Rule::BlankLinesBeforeNestedDefinition,
Path::new("E30_syntax_error.py")
)]
fn blank_lines(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
70 changes: 37 additions & 33 deletions crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ impl<'a> LinePreprocessor<'a> {
cell_offsets: Option<&'a CellOffsets>,
) -> LinePreprocessor<'a> {
LinePreprocessor {
tokens: tokens.up_to_first_unknown().iter().peekable(),
tokens: tokens.iter().peekable(),
locator,
line_start: TextSize::new(0),
max_preceding_blank_lines: BlankLines::Zero,
Expand Down Expand Up @@ -506,43 +506,47 @@ impl<'a> Iterator for LinePreprocessor<'a> {
TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
parens = parens.saturating_sub(1);
}
TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => {
let indent_range = TextRange::new(self.line_start, first_token_range.start());

let indent_length =
expand_indent(self.locator.slice(indent_range), self.indent_width);

self.max_preceding_blank_lines =
self.max_preceding_blank_lines.max(blank_lines);

let logical_line = LogicalLineInfo {
kind: logical_line_kind,
first_token_range,
last_token,
logical_line_end: range.end(),
is_comment_only: line_is_comment_only,
is_beginning_of_cell: self.is_beginning_of_cell,
is_docstring,
indent_length,
blank_lines,
preceding_blank_lines: self.max_preceding_blank_lines,
};
TokenKind::Newline if parens > 0 => {
// The parser recovered from an unclosed parenthesis.
parens = 0;
}
_ => {}
}

// Reset the blank lines after a non-comment only line.
if !line_is_comment_only {
self.max_preceding_blank_lines = BlankLines::Zero;
}
if matches!(kind, TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0) {
let indent_range = TextRange::new(self.line_start, first_token_range.start());

let indent_length =
expand_indent(self.locator.slice(indent_range), self.indent_width);

self.max_preceding_blank_lines = self.max_preceding_blank_lines.max(blank_lines);

let logical_line = LogicalLineInfo {
kind: logical_line_kind,
first_token_range,
last_token,
logical_line_end: range.end(),
is_comment_only: line_is_comment_only,
is_beginning_of_cell: self.is_beginning_of_cell,
is_docstring,
indent_length,
blank_lines,
preceding_blank_lines: self.max_preceding_blank_lines,
};

// Set the start for the next logical line.
self.line_start = range.end();
// Reset the blank lines after a non-comment only line.
if !line_is_comment_only {
self.max_preceding_blank_lines = BlankLines::Zero;
}

if self.cell_offsets.is_some() && !line_is_comment_only {
self.is_beginning_of_cell = false;
}
// Set the start for the next logical line.
self.line_start = range.end();

return Some(logical_line);
if self.cell_offsets.is_some() && !line_is_comment_only {
self.is_beginning_of_cell = false;
}
_ => {}

return Some(logical_line);
}

if !kind.is_trivia() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ pub(crate) fn compound_statements(
let mut indent = 0u32;

// Use an iterator to allow passing it around.
let mut token_iter = tokens.up_to_first_unknown().iter();
let mut token_iter = tokens.iter();

loop {
let Some(token) = token_iter.next() else {
Expand All @@ -148,6 +148,10 @@ pub(crate) fn compound_statements(
TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => {
nesting = nesting.saturating_sub(1);
}
TokenKind::Newline if nesting > 0 => {
// The parser recovered from an unclosed parenthesis.
nesting = 0;
}
TokenKind::Ellipsis => {
if allow_ellipsis {
allow_ellipsis = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl<'a> LogicalLines<'a> {
let mut builder = LogicalLinesBuilder::with_capacity(tokens.len());
let mut parens = 0u32;

for token in tokens.up_to_first_unknown() {
for token in tokens {
builder.push_token(token.kind(), token.range());

match token.kind() {
Expand All @@ -75,6 +75,11 @@ impl<'a> LogicalLines<'a> {
TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
parens = parens.saturating_sub(1);
}
TokenKind::Newline if parens > 0 => {
// The parser recovered from an unclosed parenthesis.
parens = 0;
builder.finish_line();
}
TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => {
builder.finish_line();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub(crate) fn too_many_newlines_at_end_of_file(diagnostics: &mut Vec<Diagnostic>
let mut end: Option<TextSize> = None;

// Count the number of trailing newlines.
for token in tokens.up_to_first_unknown().iter().rev() {
for token in tokens.iter().rev() {
match token.kind() {
TokenKind::NonLogicalNewline | TokenKind::Newline => {
if num_trailing_newlines == 0 {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
---
E30_syntax_error.py:15:5: E301 [*] Expected 1 blank line, found 0
|
13 | def __init__(
14 | pass
15 | def method():
| ^^^ E301
16 | pass
|
= help: Add missing blank line

Safe fix
12 12 | class Foo:
13 13 | def __init__(
14 14 | pass
15 |+
15 16 | def method():
16 17 | pass
17 18 |
Loading

0 comments on commit b7134c9

Please sign in to comment.