From 78c363e200d69a24ba71ed1e9dd324c313a8230f Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Mon, 25 Sep 2023 10:34:10 +0200 Subject: [PATCH] Emit `LexError` for dedent to incorrect level --- .../test/fixtures/ruff/expression/binary.py | 4 +- .../format@expression__binary.py.snap | 4 +- crates/ruff_python_parser/src/lexer.rs | 41 ++++++++++-- .../src/lexer/indentation.rs | 24 ++++++- ...ser__lexer__tests__tet_too_low_dedent.snap | 66 +++++++++++++++++++ 5 files changed, 126 insertions(+), 13 deletions(-) create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/binary.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/binary.py index 3d08feaaf7999..f043808439c88 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/binary.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/binary.py @@ -366,7 +366,7 @@ ): pass - z = ( +z = ( a + # a: extracts this comment @@ -377,7 +377,7 @@ x and y ) ) - ) +) z = ( ( diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__binary.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__binary.py.snap index 2c2ccd9b095ff..b0b0c28cbad22 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__binary.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__binary.py.snap @@ -372,7 +372,7 @@ if ( ): pass - z = ( +z = ( a + # a: extracts this comment @@ -383,7 +383,7 @@ if ( x and y ) ) - ) +) z = ( ( diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 9600e0190f9d4..a8e6833b301cc 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -556,10 +556,22 @@ impl<'source> Lexer<'source> { pub fn next_token(&mut self) -> LexResult { // Return dedent tokens until the current indentation level matches the indentation of the next token. if let Some(indentation) = self.pending_indentation.take() { - if let Ok(Ordering::Greater) = self.indentations.current().try_compare(indentation) { - self.pending_indentation = Some(indentation); - self.indentations.pop(); - return Ok((Tok::Dedent, TextRange::empty(self.offset()))); + match self.indentations.current().try_compare(indentation) { + Ok(Ordering::Greater) => { + self.pending_indentation = Some(indentation); + let offset = self.offset(); + self.indentations.dedent_one(indentation).map_err(|_| { + LexicalError::new(LexicalErrorType::IndentationError, offset) + })?; + return Ok((Tok::Dedent, TextRange::empty(offset))); + } + Ok(_) => {} + Err(_) => { + return Err(LexicalError::new( + LexicalErrorType::IndentationError, + self.offset(), + )); + } } } @@ -690,9 +702,12 @@ impl<'source> Lexer<'source> { let token = match self.indentations.current().try_compare(indentation) { // Dedent Ok(Ordering::Greater) => { - self.indentations.pop(); self.pending_indentation = Some(indentation); + self.indentations.dedent_one(indentation).map_err(|_| { + LexicalError::new(LexicalErrorType::IndentationError, self.offset()) + })?; + Some((Tok::Dedent, TextRange::empty(self.offset()))) } @@ -700,7 +715,7 @@ impl<'source> Lexer<'source> { // Indent Ok(Ordering::Less) => { - self.indentations.push(indentation); + self.indentations.indent(indentation); Some((Tok::Indent, self.token_range())) } Err(_) => { @@ -732,7 +747,7 @@ impl<'source> Lexer<'source> { Ok((Tok::Newline, TextRange::empty(self.offset()))) } // Next, flush the indentation stack to zero. - else if self.indentations.pop().is_some() { + else if self.indentations.dedent().is_some() { Ok((Tok::Dedent, TextRange::empty(self.offset()))) } else { Ok((Tok::EndOfFile, TextRange::empty(self.offset()))) @@ -1678,4 +1693,16 @@ def f(arg=%timeit a = b): result => panic!("Expected an error token but found {result:?}"), } } + + #[test] + fn tet_too_low_dedent() { + let tokens: Vec<_> = lex( + r#"if True: + pass + pass"#, + Mode::Module, + ) + .collect(); + assert_debug_snapshot!(tokens); + } } diff --git a/crates/ruff_python_parser/src/lexer/indentation.rs b/crates/ruff_python_parser/src/lexer/indentation.rs index 9c2f6a05ee5e3..2b12efab063fb 100644 --- a/crates/ruff_python_parser/src/lexer/indentation.rs +++ b/crates/ruff_python_parser/src/lexer/indentation.rs @@ -90,13 +90,33 @@ pub(super) struct Indentations { } impl Indentations { - pub(super) fn push(&mut self, indent: Indentation) { + pub(super) fn indent(&mut self, indent: Indentation) { debug_assert_eq!(self.current().try_compare(indent), Ok(Ordering::Less)); self.stack.push(indent); } - pub(super) fn pop(&mut self) -> Option { + /// Dedent one level to eventually reach `new_indentation`. + /// + /// Returns `Err` if the `new_indentation` is greater than the new current indentation level. + pub(super) fn dedent_one( + &mut self, + new_indentation: Indentation, + ) -> Result, UnexpectedIndentation> { + let previous = self.dedent(); + + match new_indentation.try_compare(*self.current())? { + Ordering::Less | Ordering::Equal => Ok(previous), + // ```python + // if True: + // pass + // pass <- The indentation is greater than the expected indent of 0. + // ``` + Ordering::Greater => Err(UnexpectedIndentation), + } + } + + pub(super) fn dedent(&mut self) -> Option { self.stack.pop() } diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap new file mode 100644 index 0000000000000..8a9ba410ae92d --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__tet_too_low_dedent.snap @@ -0,0 +1,66 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: tokens +--- +[ + Ok( + ( + If, + 0..2, + ), + ), + Ok( + ( + True, + 3..7, + ), + ), + Ok( + ( + Colon, + 7..8, + ), + ), + Ok( + ( + Newline, + 8..9, + ), + ), + Ok( + ( + Indent, + 9..13, + ), + ), + Ok( + ( + Pass, + 13..17, + ), + ), + Ok( + ( + Newline, + 17..18, + ), + ), + Err( + LexicalError { + error: IndentationError, + location: 20, + }, + ), + Ok( + ( + Pass, + 20..24, + ), + ), + Ok( + ( + Newline, + 24..24, + ), + ), +]