From ed948eaefbde81695f8a2eb0c0b0c2c21420d092 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 20 Jun 2024 16:27:36 +0530 Subject: [PATCH] Avoid moving back the lexer for triple-quoted fstring (#11939) ## Summary This PR avoids moving back the lexer for a triple-quoted f-string during the re-lexing phase. The reason this is a problem is that for a triple-quoted f-string the newlines are part of the f-string itself, specifically they'll be part of the `FStringMiddle` token. So, if we moved the lexer back, there would be a `Newline` token whose range would be in between an `FStringMiddle` token. This creates a panic in downstream usage. fixes: #11937 ## Test Plan Add test cases and validate the snapshots. --- .../resources/invalid/re_lex_logical_token.py | 8 +- .../re_lexing/triple_quoted_fstring_1.py | 6 + .../re_lexing/triple_quoted_fstring_2.py | 6 + .../re_lexing/triple_quoted_fstring_3.py | 7 + crates/ruff_python_parser/src/lexer.rs | 11 ++ ...id_syntax@f_string_unclosed_lbrace.py.snap | 120 +++++++++--------- ...nvalid_syntax@re_lex_logical_token.py.snap | 94 +------------- ...re_lexing__triple_quoted_fstring_1.py.snap | 96 ++++++++++++++ ...re_lexing__triple_quoted_fstring_2.py.snap | 75 +++++++++++ ...re_lexing__triple_quoted_fstring_3.py.snap | 110 ++++++++++++++++ 10 files changed, 377 insertions(+), 156 deletions(-) create mode 100644 crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_1.py create mode 100644 crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_2.py create mode 100644 crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_3.py create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_1.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_2.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_3.py.snap diff --git a/crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py b/crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py index 9a2b9f4fb3500..86e2b1435bd19 100644 --- a/crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py +++ b/crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py @@ -54,10 +54,4 @@ def bar(): if call(f"hello def bar(): - pass - - -# There are trailing whitespace before the newline character but those whitespaces are -# part of the comment token -f"""hello {x # comment -y = 1 \ No newline at end of file + pass \ No newline at end of file diff --git a/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_1.py b/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_1.py new file mode 100644 index 0000000000000..2e985fddfaf28 --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_1.py @@ -0,0 +1,6 @@ +# There are trailing whitespace before the newline character but those whitespaces are +# part of the comment token. +# https://github.com/astral-sh/ruff/issues/11929 + +f"""hello {x # comment +y = 1 \ No newline at end of file diff --git a/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_2.py b/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_2.py new file mode 100644 index 0000000000000..42b9a89f9ae18 --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_2.py @@ -0,0 +1,6 @@ +# The lexer can't be moved back for a triple-quoted f-string because the newlines are +# part of the f-string itself. +# https://github.com/astral-sh/ruff/issues/11937 + +f'''{foo:.3f +''' \ No newline at end of file diff --git a/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_3.py b/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_3.py new file mode 100644 index 0000000000000..26c852e963eda --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_3.py @@ -0,0 +1,7 @@ +# Here, the nesting level is 2 when the parser is trying to recover from an unclosed `{` +# This test demonstrates that we need to reduce the nesting level when recovering from +# within an f-string but the lexer shouldn't go back. + +if call(f'''{x:.3f +''' + pass \ No newline at end of file diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index c4a44b106d8eb..fc6790acafd42 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1370,6 +1370,12 @@ impl<'src> Lexer<'src> { // i.e., it recovered from an unclosed parenthesis (`(`, `[`, or `{`). self.nesting -= 1; + // The lexer can't be moved back for a triple-quoted f-string because the newlines are + // part of the f-string itself, so there is no newline token to be emitted. + if self.current_flags.is_triple_quoted_fstring() { + return false; + } + let mut current_position = self.current_range().start(); let reverse_chars = self.source[..current_position.to_usize()].chars().rev(); let mut newline_position = None; @@ -1578,6 +1584,11 @@ impl TokenFlags { self.intersects(TokenFlags::F_STRING) } + /// Returns `true` if the token is a triple-quoted f-string. + fn is_triple_quoted_fstring(self) -> bool { + self.contains(TokenFlags::F_STRING | TokenFlags::TRIPLE_QUOTED_STRING) + } + /// Returns `true` if the token is a raw string. const fn is_raw_string(self) -> bool { self.intersects(TokenFlags::RAW_STRING) diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_unclosed_lbrace.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_unclosed_lbrace.py.snap index 8d8983a00598b..3f1856b37ffe5 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_unclosed_lbrace.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_unclosed_lbrace.py.snap @@ -139,64 +139,74 @@ Module( ), Expr( StmtExpr { - range: 24..29, + range: 24..37, value: FString( ExprFString { - range: 24..29, + range: 24..37, value: FStringValue { - inner: Single( - FString( - FString { - range: 24..29, - elements: [ - Expression( - FStringExpressionElement { - range: 26..27, - expression: Name( - ExprName { - range: 27..27, - id: "", - ctx: Invalid, - }, - ), - debug_text: None, - conversion: None, - format_spec: None, - }, - ), - ], - flags: FStringFlags { - quote_style: Double, - prefix: Regular, - triple_quoted: false, + inner: Concatenated( + [ + FString( + FString { + range: 24..29, + elements: [ + Expression( + FStringExpressionElement { + range: 26..27, + expression: Name( + ExprName { + range: 27..27, + id: "", + ctx: Invalid, + }, + ), + debug_text: None, + conversion: None, + format_spec: None, + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: false, + }, }, - }, - ), + ), + FString( + FString { + range: 29..37, + elements: [ + Expression( + FStringExpressionElement { + range: 33..34, + expression: Name( + ExprName { + range: 34..34, + id: "", + ctx: Invalid, + }, + ), + debug_text: None, + conversion: None, + format_spec: None, + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: true, + }, + }, + ), + ], ), }, }, ), }, ), - Expr( - StmtExpr { - range: 33..38, - value: Set( - ExprSet { - range: 33..38, - elts: [ - Name( - ExprName { - range: 34..34, - id: "", - ctx: Invalid, - }, - ), - ], - }, - ), - }, - ), ], }, ) @@ -318,21 +328,11 @@ Module( | - | -2 | f"{foo!r" -3 | f"{foo=" -4 | f"{" - | _____^ -5 | | f"""{""" - | |_^ Syntax Error: Expected FStringEnd, found FStringMiddle - | - - | 3 | f"{foo=" 4 | f"{" 5 | f"""{""" - | ^^^ Syntax Error: Expected a statement + | ^^^^ Syntax Error: Expected FStringEnd, found FStringStart | @@ -340,7 +340,7 @@ Module( 3 | f"{foo=" 4 | f"{" 5 | f"""{""" - |______^ + | ^^^ Syntax Error: Expected an expression | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap index 7604d4bede3ee..2a3c1866e7999 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap @@ -7,7 +7,7 @@ input_file: crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py ``` Module( ModModule { - range: 0..1129, + range: 0..979, body: [ If( StmtIf { @@ -670,53 +670,6 @@ Module( ], }, ), - Expr( - StmtExpr { - range: 1097..1109, - value: FString( - ExprFString { - range: 1097..1109, - value: FStringValue { - inner: Single( - FString( - FString { - range: 1097..1109, - elements: [ - Literal( - FStringLiteralElement { - range: 1101..1107, - value: "hello ", - }, - ), - Expression( - FStringExpressionElement { - range: 1107..1109, - expression: Name( - ExprName { - range: 1108..1109, - id: "x", - ctx: Load, - }, - ), - debug_text: None, - conversion: None, - format_spec: None, - }, - ), - ], - flags: FStringFlags { - quote_style: Double, - prefix: Regular, - triple_quoted: true, - }, - }, - ), - ), - }, - }, - ), - }, - ), ], }, ) @@ -878,45 +831,8 @@ Module( | -60 | # There are trailing whitespace before the newline character but those whitespaces are -61 | # part of the comment token -62 | f"""hello {x # comment - | Syntax Error: Expected a statement -63 | y = 1 - | - - - | -60 | # There are trailing whitespace before the newline character but those whitespaces are -61 | # part of the comment token -62 | f"""hello {x # comment - | ___________________________^ -63 | | y = 1 - | |_____^ Syntax Error: f-string: unterminated triple-quoted string - | - - - | -61 | # part of the comment token -62 | f"""hello {x # comment -63 | y = 1 - | ^ Syntax Error: f-string: expecting '}' - | - - - | -60 | # There are trailing whitespace before the newline character but those whitespaces are -61 | # part of the comment token -62 | f"""hello {x # comment - | ___________________________^ -63 | | y = 1 - | |_____^ Syntax Error: Expected FStringEnd, found Unknown - | - - - | -61 | # part of the comment token -62 | f"""hello {x # comment -63 | y = 1 - | Syntax Error: Expected a statement +55 | if call(f"hello +56 | def bar(): +57 | pass + | Syntax Error: Expected a statement | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_1.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_1.py.snap new file mode 100644 index 0000000000000..c5aa38428f4a8 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_1.py.snap @@ -0,0 +1,96 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_1.py +--- +## AST + +``` +Module( + ModModule { + range: 0..198, + body: [ + Expr( + StmtExpr { + range: 166..178, + value: FString( + ExprFString { + range: 166..178, + value: FStringValue { + inner: Single( + FString( + FString { + range: 166..178, + elements: [ + Literal( + FStringLiteralElement { + range: 170..176, + value: "hello ", + }, + ), + Expression( + FStringExpressionElement { + range: 176..178, + expression: Name( + ExprName { + range: 177..178, + id: "x", + ctx: Load, + }, + ), + debug_text: None, + conversion: None, + format_spec: None, + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: true, + }, + }, + ), + ), + }, + }, + ), + }, + ), + ], + }, +) +``` +## Errors + + | +3 | # https://github.com/astral-sh/ruff/issues/11929 +4 | +5 | f"""hello {x # comment + | ___________________________^ +6 | | y = 1 + | |_____^ Syntax Error: f-string: unterminated triple-quoted string + | + + + | +5 | f"""hello {x # comment +6 | y = 1 + | ^ Syntax Error: f-string: expecting '}' + | + + + | +3 | # https://github.com/astral-sh/ruff/issues/11929 +4 | +5 | f"""hello {x # comment + | ___________________________^ +6 | | y = 1 + | |_____^ Syntax Error: Expected FStringEnd, found Unknown + | + + + | +5 | f"""hello {x # comment +6 | y = 1 + | Syntax Error: Expected a statement + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_2.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_2.py.snap new file mode 100644 index 0000000000000..b4683fc253ad9 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_2.py.snap @@ -0,0 +1,75 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_2.py +--- +## AST + +``` +Module( + ModModule { + range: 0..183, + body: [ + Expr( + StmtExpr { + range: 167..183, + value: FString( + ExprFString { + range: 167..183, + value: FStringValue { + inner: Single( + FString( + FString { + range: 167..183, + elements: [ + Expression( + FStringExpressionElement { + range: 171..180, + expression: Name( + ExprName { + range: 172..175, + id: "foo", + ctx: Load, + }, + ), + debug_text: None, + conversion: None, + format_spec: Some( + FStringFormatSpec { + range: 176..180, + elements: [ + Literal( + FStringLiteralElement { + range: 176..180, + value: ".3f\n", + }, + ), + ], + }, + ), + }, + ), + ], + flags: FStringFlags { + quote_style: Single, + prefix: Regular, + triple_quoted: true, + }, + }, + ), + ), + }, + }, + ), + }, + ), + ], + }, +) +``` +## Errors + + | +5 | f'''{foo:.3f +6 | ''' + | ^^^ Syntax Error: f-string: expecting '}' + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_3.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_3.py.snap new file mode 100644 index 0000000000000..70289856fd145 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__triple_quoted_fstring_3.py.snap @@ -0,0 +1,110 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lexing/triple_quoted_fstring_3.py +--- +## AST + +``` +Module( + ModModule { + range: 0..262, + body: [ + If( + StmtIf { + range: 231..262, + test: Call( + ExprCall { + range: 234..253, + func: Name( + ExprName { + range: 234..238, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 238..253, + args: [ + FString( + ExprFString { + range: 239..253, + value: FStringValue { + inner: Single( + FString( + FString { + range: 239..253, + elements: [ + Expression( + FStringExpressionElement { + range: 243..250, + expression: Name( + ExprName { + range: 244..245, + id: "x", + ctx: Load, + }, + ), + debug_text: None, + conversion: None, + format_spec: Some( + FStringFormatSpec { + range: 246..250, + elements: [ + Literal( + FStringLiteralElement { + range: 246..250, + value: ".3f\n", + }, + ), + ], + }, + ), + }, + ), + ], + flags: FStringFlags { + quote_style: Single, + prefix: Regular, + triple_quoted: true, + }, + }, + ), + ), + }, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + Pass( + StmtPass { + range: 258..262, + }, + ), + ], + elif_else_clauses: [], + }, + ), + ], + }, +) +``` +## Errors + + | +5 | if call(f'''{x:.3f +6 | ''' + | ^^^ Syntax Error: f-string: expecting '}' +7 | pass + | + + + | +5 | if call(f'''{x:.3f +6 | ''' + | ^ Syntax Error: Expected ')', found newline +7 | pass + |