Skip to content

Commit

Permalink
Use empty range when there's "gap" in token source (#11032)
Browse files Browse the repository at this point in the history
## Summary

This fixes a bug where the parser would panic when there is a "gap" in
the token source.

What's a gap?

The reason it's `<=` instead of just `==` is because there could be
whitespaces between
the two tokens. For example:

```python
#     last token end
#     | current token (newline) start
#     v v
def foo \n
#      ^
#      assume there's trailing whitespace here
```

Or, there could tokens that are considered "trivia" and thus aren't
emitted by the token
source. These are comments and non-logical newlines. For example:

```python
#     last token end
#     v
def foo # comment\n
#                ^ current token (newline) start
```

In either of the above cases, there's a "gap" between the end of the
last token and start
of the current token.

## Test Plan

Add test cases and update the snapshots.
  • Loading branch information
dhruvmanila authored Apr 19, 2024
1 parent 9b80cc0 commit d3cd61f
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 26 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
def foo # comment
def bar(): ...
def baz
59 changes: 53 additions & 6 deletions crates/ruff_python_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,59 @@ impl<'src> Parser<'src> {
}

fn node_range(&self, start: TextSize) -> TextRange {
// It's possible during error recovery that the parsing didn't consume any tokens. In that case,
// `last_token_end` still points to the end of the previous token but `start` is the start of the current token.
// Calling `TextRange::new(start, self.last_token_end)` would panic in that case because `start > end`.
// This path "detects" this case and creates an empty range instead.
if self.node_start() == start {
TextRange::empty(start)
// It's possible during error recovery that the parsing didn't consume any tokens. In that
// case, `last_token_end` still points to the end of the previous token but `start` is the
// start of the current token. Calling `TextRange::new(start, self.last_token_end)` would
// panic in that case because `start > end`. This path "detects" this case and creates an
// empty range instead.
//
// The reason it's `<=` instead of just `==` is because there could be whitespaces between
// the two tokens. For example:
//
// ```python
// # last token end
// # | current token (newline) start
// # v v
// def foo \n
// # ^
// # assume there's trailing whitespace here
// ```
//
// Or, there could tokens that are considered "trivia" and thus aren't emitted by the token
// source. These are comments and non-logical newlines. For example:
//
// ```python
// # last token end
// # v
// def foo # comment\n
// # ^ current token (newline) start
// ```
//
// In either of the above cases, there's a "gap" between the end of the last token and start
// of the current token.
if self.last_token_end <= start {
// We need to create an empty range at the last token end instead of the start because
// otherwise this node range will fall outside the range of it's parent node. Taking
// the above example:
//
// ```python
// if True:
// # function start
// # | function end
// # v v
// def foo # comment
// # ^ current token start
// ```
//
// Here, the current token start is the start of parameter range but the function ends
// at `foo`. Even if there's a function body, the range of parameters would still be
// before the comment.

// test_err node_range_with_gaps
// def foo # comment
// def bar(): ...
// def baz
TextRange::empty(self.last_token_end)
} else {
TextRange::new(start, self.last_token_end)
}
Expand Down
35 changes: 16 additions & 19 deletions crates/ruff_python_parser/src/parser/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1663,23 +1663,19 @@ impl<'src> Parser<'src> {
// x = 10
let type_params = self.try_parse_type_params();

// test_ok function_def_parameter_range
// def foo(
// first: int,
// second: int,
// ) -> int: ...

// test_err function_def_unclosed_parameter_list
// def foo(a: int, b:
// def foo():
// return 42
// def foo(a: int, b: str
// x = 10
let parameters_start = self.node_start();
self.expect(TokenKind::Lpar);
let mut parameters = self.parse_parameters(FunctionKind::FunctionDef);
self.expect(TokenKind::Rpar);

// test_ok function_def_parameter_range
// def foo(
// first: int,
// second: int,
// ) -> int: ...
parameters.range = self.node_range(parameters_start);
let parameters = self.parse_parameters(FunctionKind::FunctionDef);

let returns = if self.eat(TokenKind::Rarrow) {
if self.at_expr() {
Expand Down Expand Up @@ -2844,19 +2840,16 @@ impl<'src> Parser<'src> {
pub(super) fn parse_parameters(&mut self, function_kind: FunctionKind) -> ast::Parameters {
let start = self.node_start();

if matches!(function_kind, FunctionKind::FunctionDef) {
self.expect(TokenKind::Lpar);
}

// TODO(dhruvmanila): This has the same problem as `parse_match_pattern_mapping`
// has where if there are multiple kwarg or vararg, the last one will win and
// the parser will drop the previous ones. Another thing is the vararg and kwarg
// uses `Parameter` (not `ParameterWithDefault`) which means that the parser cannot
// recover well from `*args=(1, 2)`.
let mut parameters = ast::Parameters {
range: TextRange::default(),
posonlyargs: vec![],
args: vec![],
kwonlyargs: vec![],
vararg: None,
kwarg: None,
};
let mut parameters = ast::Parameters::empty(TextRange::default());

let mut seen_default_param = false; // `a=10`
let mut seen_positional_only_separator = false; // `/`
Expand Down Expand Up @@ -3094,6 +3087,10 @@ impl<'src> Parser<'src> {
self.add_error(ParseErrorType::ExpectedKeywordParam, star_range);
}

if matches!(function_kind, FunctionKind::FunctionDef) {
self.expect(TokenKind::Rpar);
}

parameters.range = self.node_range(start);

// test_err params_duplicate_names
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
---
source: crates/ruff_python_parser/tests/fixtures.rs
input_file: crates/ruff_python_parser/resources/inline/err/node_range_with_gaps.py
---
## AST

```
Module(
ModModule {
range: 0..41,
body: [
FunctionDef(
StmtFunctionDef {
range: 0..7,
is_async: false,
decorator_list: [],
name: Identifier {
id: "foo",
range: 4..7,
},
type_params: None,
parameters: Parameters {
range: 7..7,
posonlyargs: [],
args: [],
vararg: None,
kwonlyargs: [],
kwarg: None,
},
returns: None,
body: [],
},
),
FunctionDef(
StmtFunctionDef {
range: 18..32,
is_async: false,
decorator_list: [],
name: Identifier {
id: "bar",
range: 22..25,
},
type_params: None,
parameters: Parameters {
range: 25..27,
posonlyargs: [],
args: [],
vararg: None,
kwonlyargs: [],
kwarg: None,
},
returns: None,
body: [
Expr(
StmtExpr {
range: 29..32,
value: EllipsisLiteral(
ExprEllipsisLiteral {
range: 29..32,
},
),
},
),
],
},
),
FunctionDef(
StmtFunctionDef {
range: 33..40,
is_async: false,
decorator_list: [],
name: Identifier {
id: "baz",
range: 37..40,
},
type_params: None,
parameters: Parameters {
range: 40..40,
posonlyargs: [],
args: [],
vararg: None,
kwonlyargs: [],
kwarg: None,
},
returns: None,
body: [],
},
),
],
},
)
```
## Errors

|
1 | def foo # comment
| ^ Syntax Error: Expected '(', found newline
2 | def bar(): ...
3 | def baz
|


|
1 | def foo # comment
2 | def bar(): ...
| ^^^ Syntax Error: Expected ')', found 'def'
3 | def baz
|


|
1 | def foo # comment
2 | def bar(): ...
3 | def baz
| ^ Syntax Error: Expected '(', found newline
|


|
2 | def bar(): ...
3 | def baz
|
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ Module(
conversion: None,
format_spec: Some(
FStringFormatSpec {
range: 43..43,
range: 42..42,
elements: [],
},
),
Expand Down

0 comments on commit d3cd61f

Please sign in to comment.