From 47c9ed07f2c00ac0d600294a0cb418d91c13b4ea Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 26 Jun 2024 14:00:48 +0530 Subject: [PATCH] Consider 2-character EOL before line continuation (#12035) ## Summary This PR fixes a bug introduced in https://github.com/astral-sh/ruff/pull/12008 which didn't consider the two character newline after the line continuation character. For example, consider the following code highlighted with whitespaces: ```py call(foo # comment \\r\n \r\n def bar():\r\n ....pass\r\n ``` The lexer is at `def` when it's running the re-lexing logic and trying to move back to a newline character. It encounters `\n` and it's being escaped (incorrect) but `\r` is being escaped, so it moves the lexer to `\n` character. This creates an overlap in token ranges which causes the panic. ``` Name 0..4 Lpar 4..5 Name 5..8 Comment 9..20 NonLogicalNewline 20..22 <-- overlap between Newline 21..22 <-- these two tokens NonLogicalNewline 22..23 Def 23..26 ... ``` fixes: #12028 ## Test Plan Add a test case with line continuation and windows style newline character. --- .gitattributes | 1 + .../line_continuation_windows_eol.py | 4 + crates/ruff_python_parser/src/lexer.rs | 48 ++++++---- ...ing__line_continuation_windows_eol.py.snap | 89 +++++++++++++++++++ 4 files changed, 125 insertions(+), 17 deletions(-) create mode 100644 crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap diff --git a/.gitattributes b/.gitattributes index 8f333acef68b9..9ae06f93d11a9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8,6 +8,7 @@ crates/ruff_linter/resources/test/fixtures/pycodestyle/W391_3.py text eol=crlf crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples_crlf.py text eol=crlf crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples_crlf.py.snap text eol=crlf +crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py text eol=crlf crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py text eol=crlf crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py text eol=cr diff --git a/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py b/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py new file mode 100644 index 0000000000000..f2848adfc5583 --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py @@ -0,0 +1,4 @@ +call(a, b, # comment \ + +def bar(): + pass \ No newline at end of file diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index cc04e7926476f..0640bd8349f66 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1393,26 +1393,40 @@ impl<'src> Lexer<'src> { while let Some(ch) = reverse_chars.next() { if is_python_whitespace(ch) { current_position -= ch.text_len(); - } else if matches!(ch, '\n' | '\r') { - current_position -= ch.text_len(); - // Count the number of backslashes before the newline character. - let mut backslash_count = 0; - while reverse_chars.next_if_eq(&'\\').is_some() { - backslash_count += 1; - } - if backslash_count == 0 { - // No escapes: `\n` - newline_position = Some(current_position); - } else { - if backslash_count % 2 == 0 { - // Even number of backslashes i.e., all backslashes cancel each other out - // which means the newline character is not being escaped. - newline_position = Some(current_position); + continue; + } + + match ch { + '\n' => { + current_position -= ch.text_len(); + if let Some(carriage_return) = reverse_chars.next_if_eq(&'\r') { + current_position -= carriage_return.text_len(); } - current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count); } + '\r' => { + current_position -= ch.text_len(); + } + _ => break, + } + + debug_assert!(matches!(ch, '\n' | '\r')); + + // Count the number of backslashes before the newline character. + let mut backslash_count = 0; + while reverse_chars.next_if_eq(&'\\').is_some() { + backslash_count += 1; + } + + if backslash_count == 0 { + // No escapes: `\n` + newline_position = Some(current_position); } else { - break; + if backslash_count % 2 == 0 { + // Even number of backslashes i.e., all backslashes cancel each other out + // which means the newline character is not being escaped. + newline_position = Some(current_position); + } + current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count); } } diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap new file mode 100644 index 0000000000000..9e22a93f78973 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap @@ -0,0 +1,89 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py +--- +## AST + +``` +Module( + ModModule { + range: 0..46, + body: [ + Expr( + StmtExpr { + range: 0..10, + value: Call( + ExprCall { + range: 0..10, + func: Name( + ExprName { + range: 0..4, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 4..10, + args: [ + Name( + ExprName { + range: 5..6, + id: "a", + ctx: Load, + }, + ), + Name( + ExprName { + range: 8..9, + id: "b", + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + }, + ), + FunctionDef( + StmtFunctionDef { + range: 26..46, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 30..33, + }, + type_params: None, + parameters: Parameters { + range: 33..35, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 42..46, + }, + ), + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | call(a, b, # comment \ +2 | / +3 | | def bar(): + | |_^ Syntax Error: Expected ')', found newline +4 | pass + |