Skip to content

Commit

Permalink
Enable token-based rules on source with syntax errors
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Jun 28, 2024
1 parent b28dc9a commit 1961406
Show file tree
Hide file tree
Showing 27 changed files with 480 additions and 148 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# The lexer doesn't emit a string token if it's unterminated
"a" "b
"a" "b" "c
"a" """b
c""" "d

# For f-strings, the `FStringRanges` won't contain the range for
# unterminated f-strings.
f"a" f"b
f"a" f"b" f"c
f"a" f"""b
c""" f"d {e

(
"a"
"b
"c"
"d"
)


# Triple-quoted strings, if unterminated, consume everything that comes after
# the opening quote. So, no test code should raise the violation after this.
(
"""abc"""
f"""def
"g" "h"
"i" "j"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Check for E30 errors in a file containing syntax errors with unclosed
# parenthesis.

def foo[T1, T2():
pass

def bar():
pass



class Foo:
def __init__(
pass
def method():
pass

foo = Foo(


def top(
def nested1():
pass
def nested2():
pass

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# These test cases contain syntax errors. The characters within the unterminated
# strings shouldn't be highlighted.

# Before any syntax error
b = ''
# Unterminated string
b = '
b = ''
# Unterminated f-string
b = f'
b = f''
# Implicitly concatenated
b = '' f'' '
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ pub(crate) fn check_tokens(
Rule::InvalidCharacterNul,
Rule::InvalidCharacterZeroWidthSpace,
]) {
for token in tokens.up_to_first_unknown() {
for token in tokens {
pylint::rules::invalid_string_characters(
&mut diagnostics,
token.kind(),
Expand Down
9 changes: 2 additions & 7 deletions crates/ruff_linter/src/directives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,9 @@ where
fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer) -> NoqaMapping {
let mut string_mappings = Vec::new();

for token in tokens.up_to_first_unknown() {
for token in tokens {
match token.kind() {
TokenKind::EndOfFile => {
break;
}

// For multi-line strings, we expect `noqa` directives on the last line of the
// string.
// For multi-line strings, we expect `noqa` directives on the last line of the string.
TokenKind::String if token.is_triple_quoted_string() => {
if locator.contains_line_break(token.range()) {
string_mappings.push(TextRange::new(
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/doc_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub(crate) struct DocLines<'a> {
impl<'a> DocLines<'a> {
fn new(tokens: &'a Tokens) -> Self {
Self {
inner: tokens.up_to_first_unknown().iter(),
inner: tokens.iter(),
prev: TextSize::default(),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ pub(crate) fn trailing_commas(
indexer: &Indexer,
) {
let mut fstrings = 0u32;
let simple_tokens = tokens.up_to_first_unknown().iter().filter_map(|token| {
let simple_tokens = tokens.iter().filter_map(|token| {
match token.kind() {
// Completely ignore comments -- they just interfere with the logic.
TokenKind::Comment => None,
Expand All @@ -253,7 +253,11 @@ pub(crate) fn trailing_commas(
None
}
}
_ => {
kind => {
if matches!(kind, TokenKind::Newline if fstrings > 0) {
// The parser recovered from an unterminated f-string.
fstrings = 0;
}
if fstrings == 0 {
Some(SimpleToken::from(token.as_tuple()))
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ mod tests {

#[test_case(Rule::SingleLineImplicitStringConcatenation, Path::new("ISC.py"))]
#[test_case(Rule::MultiLineImplicitStringConcatenation, Path::new("ISC.py"))]
#[test_case(
Rule::SingleLineImplicitStringConcatenation,
Path::new("ISC_syntax_error.py")
)]
#[test_case(
Rule::MultiLineImplicitStringConcatenation,
Path::new("ISC_syntax_error.py")
)]
#[test_case(Rule::ExplicitStringConcatenation, Path::new("ISC.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ pub(crate) fn implicit(
indexer: &Indexer,
) {
for (a_token, b_token) in tokens
.up_to_first_unknown()
.iter()
.filter(|token| {
token.kind() != TokenKind::Comment
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
---
source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
---
ISC_syntax_error.py:3:1: ISC001 [*] Implicitly concatenated string literals on one line
|
1 | # The lexer doesn't emit a string token if it's unterminated
2 | "a" "b
3 | "a" "b" "c
| ^^^^^^^ ISC001
4 | "a" """b
5 | c""" "d
|
= help: Combine string literals

Safe fix
1 1 | # The lexer doesn't emit a string token if it's unterminated
2 2 | "a" "b
3 |-"a" "b" "c
3 |+"ab" "c
4 4 | "a" """b
5 5 | c""" "d
6 6 |

ISC_syntax_error.py:4:1: ISC001 Implicitly concatenated string literals on one line
|
2 | "a" "b
3 | "a" "b" "c
4 | / "a" """b
5 | | c""" "d
| |____^ ISC001
6 |
7 | # For f-strings, the `FStringRanges` won't contain the range for
|
= help: Combine string literals

ISC_syntax_error.py:10:1: ISC001 [*] Implicitly concatenated string literals on one line
|
8 | # unterminated f-strings.
9 | f"a" f"b
10 | f"a" f"b" f"c
| ^^^^^^^^^ ISC001
11 | f"a" f"""b
12 | c""" f"d {e
|
= help: Combine string literals

Safe fix
7 7 | # For f-strings, the `FStringRanges` won't contain the range for
8 8 | # unterminated f-strings.
9 9 | f"a" f"b
10 |-f"a" f"b" f"c
10 |+f"ab" f"c
11 11 | f"a" f"""b
12 12 | c""" f"d {e
13 13 |

ISC_syntax_error.py:11:1: ISC001 Implicitly concatenated string literals on one line
|
9 | f"a" f"b
10 | f"a" f"b" f"c
11 | / f"a" f"""b
12 | | c""" f"d {e
| |____^ ISC001
13 |
14 | (
|
= help: Combine string literals
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
source: crates/ruff_linter/src/rules/flake8_implicit_str_concat/mod.rs
---

8 changes: 8 additions & 0 deletions crates/ruff_linter/src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,14 @@ mod tests {
#[test_case(Rule::BlankLineAfterDecorator, Path::new("E30.py"))]
#[test_case(Rule::BlankLinesAfterFunctionOrClass, Path::new("E30.py"))]
#[test_case(Rule::BlankLinesBeforeNestedDefinition, Path::new("E30.py"))]
#[test_case(Rule::BlankLineBetweenMethods, Path::new("E30_syntax_error.py"))]
#[test_case(Rule::BlankLinesTopLevel, Path::new("E30_syntax_error.py"))]
#[test_case(Rule::TooManyBlankLines, Path::new("E30_syntax_error.py"))]
#[test_case(Rule::BlankLinesAfterFunctionOrClass, Path::new("E30_syntax_error.py"))]
#[test_case(
Rule::BlankLinesBeforeNestedDefinition,
Path::new("E30_syntax_error.py")
)]
fn blank_lines(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
77 changes: 33 additions & 44 deletions crates/ruff_linter/src/rules/pycodestyle/rules/blank_lines.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use itertools::Itertools;
use ruff_notebook::CellOffsets;
use ruff_python_parser::Token;
use ruff_python_parser::Tokens;
use ruff_python_parser::TokensIterWithContext;
use std::cmp::Ordering;
use std::iter::Peekable;
use std::num::NonZeroU32;
Expand Down Expand Up @@ -384,7 +384,7 @@ struct LogicalLineInfo {
/// Iterator that processes tokens until a full logical line (or comment line) is "built".
/// It then returns characteristics of that logical line (see `LogicalLineInfo`).
struct LinePreprocessor<'a> {
tokens: Peekable<Iter<'a, Token>>,
tokens: TokensIterWithContext<'a>,
locator: &'a Locator<'a>,
indent_width: IndentWidth,
/// The start position of the next logical line.
Expand All @@ -406,7 +406,7 @@ impl<'a> LinePreprocessor<'a> {
cell_offsets: Option<&'a CellOffsets>,
) -> LinePreprocessor<'a> {
LinePreprocessor {
tokens: tokens.up_to_first_unknown().iter().peekable(),
tokens: tokens.iter_with_context(),
locator,
line_start: TextSize::new(0),
max_preceding_blank_lines: BlankLines::Zero,
Expand All @@ -428,7 +428,6 @@ impl<'a> Iterator for LinePreprocessor<'a> {
let mut blank_lines = BlankLines::Zero;
let mut first_logical_line_token: Option<(LogicalLineKind, TextRange)> = None;
let mut last_token = TokenKind::EndOfFile;
let mut parens = 0u32;

while let Some(token) = self.tokens.next() {
let (kind, range) = token.as_tuple();
Expand Down Expand Up @@ -500,50 +499,40 @@ impl<'a> Iterator for LinePreprocessor<'a> {
is_docstring = false;
}

match kind {
TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => {
parens = parens.saturating_add(1);
}
TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
parens = parens.saturating_sub(1);
}
TokenKind::Newline | TokenKind::NonLogicalNewline if parens == 0 => {
let indent_range = TextRange::new(self.line_start, first_token_range.start());

let indent_length =
expand_indent(self.locator.slice(indent_range), self.indent_width);

self.max_preceding_blank_lines =
self.max_preceding_blank_lines.max(blank_lines);

let logical_line = LogicalLineInfo {
kind: logical_line_kind,
first_token_range,
last_token,
logical_line_end: range.end(),
is_comment_only: line_is_comment_only,
is_beginning_of_cell: self.is_beginning_of_cell,
is_docstring,
indent_length,
blank_lines,
preceding_blank_lines: self.max_preceding_blank_lines,
};

// Reset the blank lines after a non-comment only line.
if !line_is_comment_only {
self.max_preceding_blank_lines = BlankLines::Zero;
}
if kind.is_any_newline() && !self.tokens.in_parenthesized_context() {
let indent_range = TextRange::new(self.line_start, first_token_range.start());

let indent_length =
expand_indent(self.locator.slice(indent_range), self.indent_width);

self.max_preceding_blank_lines = self.max_preceding_blank_lines.max(blank_lines);

let logical_line = LogicalLineInfo {
kind: logical_line_kind,
first_token_range,
last_token,
logical_line_end: range.end(),
is_comment_only: line_is_comment_only,
is_beginning_of_cell: self.is_beginning_of_cell,
is_docstring,
indent_length,
blank_lines,
preceding_blank_lines: self.max_preceding_blank_lines,
};

// Set the start for the next logical line.
self.line_start = range.end();
// Reset the blank lines after a non-comment only line.
if !line_is_comment_only {
self.max_preceding_blank_lines = BlankLines::Zero;
}

if self.cell_offsets.is_some() && !line_is_comment_only {
self.is_beginning_of_cell = false;
}
// Set the start for the next logical line.
self.line_start = range.end();

return Some(logical_line);
if self.cell_offsets.is_some() && !line_is_comment_only {
self.is_beginning_of_cell = false;
}
_ => {}

return Some(logical_line);
}

if !is_non_logical_token(kind) {
Expand Down
Loading

0 comments on commit 1961406

Please sign in to comment.