From 992da4344ef504909f67c7feb3cfb6c03b2be396 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 5 Jun 2024 10:29:01 +0530 Subject: [PATCH] Use speculative parsing for with-items --- .../src/parser/expression.rs | 51 +- crates/ruff_python_parser/src/parser/mod.rs | 34 +- .../src/parser/statement.rs | 522 +++++------------- ...s__with__ambiguous_lpar_with_items.py.snap | 396 +++++++------ ...nts__with__unclosed_ambiguous_lpar.py.snap | 54 +- ..._with__unclosed_ambiguous_lpar_eof.py.snap | 2 +- ..._items_parenthesized_missing_comma.py.snap | 61 +- 7 files changed, 444 insertions(+), 676 deletions(-) diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index 8504504c8a19a7..ced1376aba1205 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -689,7 +689,8 @@ impl<'src> Parser<'src> { parsed_expr = Expr::Generator(parser.parse_generator_expression( parsed_expr.expr, - GeneratorExpressionInParentheses::No(start), + start, + Parenthesized::No, )) .into(); } @@ -1705,7 +1706,8 @@ impl<'src> Parser<'src> { let generator = Expr::Generator(self.parse_generator_expression( parsed_expr.expr, - GeneratorExpressionInParentheses::Yes(start), + start, + Parenthesized::Yes, )); ParsedExpr { @@ -1942,33 +1944,20 @@ impl<'src> Parser<'src> { pub(super) fn parse_generator_expression( &mut self, element: Expr, - in_parentheses: GeneratorExpressionInParentheses, + start: TextSize, + parenthesized: Parenthesized, ) -> ast::ExprGenerator { let generators = self.parse_generators(); - let (parenthesized, start) = match in_parentheses { - GeneratorExpressionInParentheses::Yes(lpar_start) => { - self.expect(TokenKind::Rpar); - (true, lpar_start) - } - GeneratorExpressionInParentheses::No(expr_start) => (false, expr_start), - GeneratorExpressionInParentheses::Maybe { - lpar_start, - expr_start, - } => { - if self.eat(TokenKind::Rpar) { - (true, lpar_start) - } else { - (false, expr_start) - } - } - }; + if parenthesized.is_yes() { + self.expect(TokenKind::Rpar); + } ast::ExprGenerator { elt: Box::new(element), generators, range: self.node_range(start), - parenthesized, + parenthesized: parenthesized.is_yes(), } } @@ -2472,26 +2461,6 @@ impl From for OperatorPrecedence { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(super) enum GeneratorExpressionInParentheses { - /// The generator expression is in parentheses. The given [`TextSize`] is the - /// start of the left parenthesis. E.g., `(x for x in range(10))`. - Yes(TextSize), - - /// The generator expression is not in parentheses. The given [`TextSize`] is the - /// start of the expression. E.g., `x for x in range(10)`. - No(TextSize), - - /// The generator expression may or may not be in parentheses. The given [`TextSize`]s - /// are the start of the left parenthesis and the start of the expression, respectively. - Maybe { - /// The start of the left parenthesis. - lpar_start: TextSize, - /// The start of the expression. - expr_start: TextSize, - }, -} - /// Represents the precedence used for parsing the value part of a starred expression. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(super) enum StarredExpressionPrecedence { diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index c04875c45bf7d0..e26dd57cd14c0e 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -705,16 +705,6 @@ enum WithItemKind { /// The parentheses belongs to the context expression. ParenthesizedExpression, - /// A list of `with` items that has only one item which is a parenthesized - /// generator expression. - /// - /// ```python - /// with (x for x in range(10)): ... - /// ``` - /// - /// The parentheses belongs to the generator expression. - SingleParenthesizedGeneratorExpression, - /// The `with` items aren't parenthesized in any way. /// /// ```python @@ -732,20 +722,15 @@ impl WithItemKind { const fn list_terminator(self) -> TokenKind { match self { WithItemKind::Parenthesized => TokenKind::Rpar, - WithItemKind::Unparenthesized - | WithItemKind::ParenthesizedExpression - | WithItemKind::SingleParenthesizedGeneratorExpression => TokenKind::Colon, + WithItemKind::Unparenthesized | WithItemKind::ParenthesizedExpression => { + TokenKind::Colon + } } } - /// Returns `true` if the `with` item is a parenthesized expression i.e., the - /// parentheses belong to the context expression. - const fn is_parenthesized_expression(self) -> bool { - matches!( - self, - WithItemKind::ParenthesizedExpression - | WithItemKind::SingleParenthesizedGeneratorExpression - ) + /// Returns `true` if the with items are parenthesized. + const fn is_parenthesized(self) -> bool { + matches!(self, WithItemKind::Parenthesized) } } @@ -1172,7 +1157,6 @@ bitflags! { const LAMBDA_PARAMETERS = 1 << 24; const WITH_ITEMS_PARENTHESIZED = 1 << 25; const WITH_ITEMS_PARENTHESIZED_EXPRESSION = 1 << 26; - const WITH_ITEMS_SINGLE_PARENTHESIZED_GENERATOR_EXPRESSION = 1 << 27; const WITH_ITEMS_UNPARENTHESIZED = 1 << 28; const F_STRING_ELEMENTS = 1 << 29; } @@ -1225,9 +1209,6 @@ impl RecoveryContext { WithItemKind::ParenthesizedExpression => { RecoveryContext::WITH_ITEMS_PARENTHESIZED_EXPRESSION } - WithItemKind::SingleParenthesizedGeneratorExpression => { - RecoveryContext::WITH_ITEMS_SINGLE_PARENTHESIZED_GENERATOR_EXPRESSION - } WithItemKind::Unparenthesized => RecoveryContext::WITH_ITEMS_UNPARENTHESIZED, }, RecoveryContextKind::FStringElements => RecoveryContext::F_STRING_ELEMENTS, @@ -1294,9 +1275,6 @@ impl RecoveryContext { RecoveryContext::WITH_ITEMS_PARENTHESIZED_EXPRESSION => { RecoveryContextKind::WithItems(WithItemKind::ParenthesizedExpression) } - RecoveryContext::WITH_ITEMS_SINGLE_PARENTHESIZED_GENERATOR_EXPRESSION => { - RecoveryContextKind::WithItems(WithItemKind::SingleParenthesizedGeneratorExpression) - } RecoveryContext::WITH_ITEMS_UNPARENTHESIZED => { RecoveryContextKind::WithItems(WithItemKind::Unparenthesized) } diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index 3e9a047db10c06..764d0ad51a6660 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -9,7 +9,7 @@ use ruff_python_ast::{ use ruff_text_size::{Ranged, TextSize}; use crate::lexer::TokenValue; -use crate::parser::expression::{GeneratorExpressionInParentheses, ParsedExpr, EXPR_SET}; +use crate::parser::expression::{ParsedExpr, EXPR_SET}; use crate::parser::progress::ParserProgress; use crate::parser::{ helpers, FunctionKind, Parser, RecoveryContext, RecoveryContextKind, WithItemKind, @@ -17,7 +17,7 @@ use crate::parser::{ use crate::token_set::TokenSet; use crate::{Mode, ParseErrorType, TokenKind}; -use super::expression::{ExpressionContext, OperatorPrecedence}; +use super::expression::ExpressionContext; use super::Parenthesized; /// Tokens that represent compound statements. @@ -1886,11 +1886,8 @@ impl<'src> Parser<'src> { /// Parses a list of with items. /// - /// See: + /// See: fn parse_with_items(&mut self) -> Vec { - let start = self.node_start(); - let mut items = vec![]; - if !self.at_expr() { self.add_error( ParseErrorType::OtherError( @@ -1898,175 +1895,107 @@ impl<'src> Parser<'src> { ), self.current_token_range(), ); - return items; + return vec![]; } - let with_item_kind = if self.eat(TokenKind::Lpar) { - self.parse_parenthesized_with_items(start, &mut items) - } else { - WithItemKind::Unparenthesized - }; + if self.at(TokenKind::Lpar) { + if let Some(items) = self.try_parse_parenthesized_with_items() { + self.expect(TokenKind::Rpar); + items + } else { + // test_ok ambiguous_lpar_with_items_if_expr + // with (x) if True else y: ... + // with (x for x in iter) if True else y: ... + // with (x async for x in iter) if True else y: ... + // with (x)[0] if True else y: ... - if with_item_kind.is_parenthesized_expression() { - // The trailing comma is optional because (1) they aren't allowed in parenthesized - // expression context and, (2) We need to raise the correct error if they're present. - // - // Consider the following three examples: - // - // ```python - // with (item1, item2): ... # (1) - // with (item1, item2),: ... # (2) - // with (item1, item2), item3,: ... # (3) - // ``` - // - // Here, (1) is valid and represents a parenthesized with items while (2) and (3) - // are invalid as they are parenthesized expression. Example (3) will raise an error - // stating that a trailing comma isn't allowed, while (2) will raise an "expected an - // expression" error. - // - // The reason that (2) expects an expression is because if it raised an error - // similar to (3), we would be suggesting to remove the trailing comma, which would - // make it a parenthesized with items. This would contradict our original assumption - // that it's a parenthesized expression. - // - // However, for (3), the error is being raised by the list parsing logic and if the - // trailing comma is removed, it still remains a parenthesized expression, so it's - // fine to raise the error. - if self.eat(TokenKind::Comma) && !self.at_expr() { - self.add_error( - ParseErrorType::ExpectedExpression, - self.current_token_range(), - ); + // test_ok ambiguous_lpar_with_items_binary_expr + // # It doesn't matter what's inside the parentheses, these tests need to make sure + // # all binary expressions parses correctly. + // with (a) and b: ... + // with (a) is not b: ... + // # Make sure precedence works + // with (a) or b and c: ... + // with (a) and b or c: ... + // with (a | b) << c | d: ... + // # Postfix should still be parsed first + // with (a)[0] + b * c: ... + self.parse_comma_separated_list_into_vec( + RecoveryContextKind::WithItems(WithItemKind::ParenthesizedExpression), + |p| p.parse_with_item(WithItemParsingState::Regular).item, + ) } + } else { + self.parse_comma_separated_list_into_vec( + RecoveryContextKind::WithItems(WithItemKind::Unparenthesized), + |p| p.parse_with_item(WithItemParsingState::Regular).item, + ) } - - // This call is a no-op if the with items are parenthesized as all of them - // have already been parsed. - self.parse_comma_separated_list(RecoveryContextKind::WithItems(with_item_kind), |parser| { - items.push(parser.parse_with_item(WithItemParsingState::Regular).item); - }); - - if with_item_kind == WithItemKind::Parenthesized { - self.expect(TokenKind::Rpar); - } - - items } - /// Parse the with items coming after an ambiguous `(` token. + /// Try parsing with-items coming after an ambiguous `(` token. /// /// To understand the ambiguity, consider the following example: /// /// ```python - /// with (item1, item2): ... # (1) - /// with (item1, item2) as f: ... # (2) + /// with (item1, item2): ... # Parenthesized with items + /// with (item1, item2) as f: ... # Parenthesized expression /// ``` + /// When the parser is at the `(` token after the `with` keyword, it doesn't know if `(` is + /// used to parenthesize the with items or if it's part of a parenthesized expression of the + /// first with item. The challenge here is that until the parser sees the matching `)` token, + /// it can't resolve the ambiguity. /// - /// When the parser is at the `(` token after the `with` keyword, it doesn't - /// know if it's used to parenthesize the with items or if it's part of a - /// parenthesized expression of the first with item. The challenge here is - /// that until the parser sees the matching `)` token, it can't resolve the - /// ambiguity. This requires infinite lookahead. - /// - /// This method resolves the ambiguity by parsing the with items assuming that - /// it's a parenthesized with items. Then, once it finds the matching `)`, it - /// checks if the assumption still holds true. If it doesn't, then it combines - /// the parsed with items into a single with item with an appropriate expression. - /// - /// The return value is the kind of with items parsed. Note that there could - /// still be other with items which needs to be parsed as this method stops - /// when the matching `)` is found. - fn parse_parenthesized_with_items( - &mut self, - start: TextSize, - items: &mut Vec, - ) -> WithItemKind { + /// This method resolves the ambiguity using speculative parsing. It starts with an assumption + /// that it's a parenthesized with items. Then, once it finds the matching `)`, it checks if + /// the assumption still holds true. If the initial assumption was correct, this will return + /// the parsed with items. Otherwise, rewind the parser back to the starting `(` token, + /// returning [`None`]. + /// + /// # Panics + /// + /// If the parser isn't positioned at a `(` token. + /// + /// See: + fn try_parse_parenthesized_with_items(&mut self) -> Option> { + let checkpoint = self.checkpoint(); + // We'll start with the assumption that the with items are parenthesized. let mut with_item_kind = WithItemKind::Parenthesized; - // Keep track of certain properties to determine if the with items are - // parenthesized or if it's a parenthesized expression. Refer to their - // usage for examples and explanation. - let mut has_trailing_comma = false; - let mut has_optional_vars = false; - - // Start with parsing the first with item after an ambiguous `(` token - // with the start offset. - let mut state = WithItemParsingState::AmbiguousLparFirstItem(start); + self.bump(TokenKind::Lpar); let mut parsed_with_items = vec![]; - let mut progress = ParserProgress::default(); - - loop { - progress.assert_progressing(self); - - // We stop at the first `)` found. Any nested parentheses will be - // consumed by the with item parsing. This check needs to be done - // first in case there are no with items. For example, - // - // ```python - // with (): ... - // with () as x: ... - // ``` - if self.at(TokenKind::Rpar) { - break; - } - - let parsed_with_item = self.parse_with_item(state); - - if parsed_with_item.item.context_expr.is_generator_expr() - && parsed_with_item.used_ambiguous_lpar - { - // For generator expressions, it's a bit tricky. We need to check if parsing - // a generator expression has used the ambiguous `(` token. This is the case - // for a parenthesized generator expression which is using the ambiguous `(` - // as the start of the generator expression. For example: - // - // ```python - // with (x for x in range(10)): ... - // # ^ - // # Consumed by `parse_with_item` - // ``` - // - // This is only allowed if it's the first with item which is made sure by the - // `with_item_parsing` state. - with_item_kind = WithItemKind::SingleParenthesizedGeneratorExpression; - parsed_with_items.push(parsed_with_item); - break; - } + let mut has_optional_vars = false; + // test_err with_items_parenthesized_missing_comma + // with (item1 item2): ... + // with (item1 as f1 item2): ... + // with (item1, item2 item3, item4): ... + // with (item1, item2 as f1 item3, item4): ... + // with (item1, item2: ... + self.parse_comma_separated_list(RecoveryContextKind::WithItems(with_item_kind), |p| { + let parsed_with_item = p.parse_with_item(WithItemParsingState::Speculative); has_optional_vars |= parsed_with_item.item.optional_vars.is_some(); - parsed_with_items.push(parsed_with_item); + }); - has_trailing_comma = self.eat(TokenKind::Comma); - if !has_trailing_comma { - break; - } - - // Update the with item parsing to indicate that we're no longer - // parsing the first with item, but we haven't yet found the `)` to - // the corresponding ambiguous `(`. - state = WithItemParsingState::AmbiguousLparRest; - } - - // Check if our assumption is incorrect and it's actually a parenthesized - // expression. - if !with_item_kind.is_parenthesized_expression() && self.at(TokenKind::Rpar) { - if has_optional_vars { - // If any of the with item has optional variables, then our assumption is - // correct and it is a parenthesized with items. Now, we need to restrict - // the grammar for a with item's context expression which is: + // Check if our assumption is incorrect and it's actually a parenthesized expression. + match self.current_token_kind() { + TokenKind::Rpar if has_optional_vars => { + // If any of the with item has optional variables, then our assumption is correct + // and it is a parenthesized with items. Now, we need to restrict the grammar for a + // with item's context expression which is: // // with_item: expression ... // // So, named, starred and yield expressions not allowed. for parsed_with_item in &parsed_with_items { - // Parentheses resets the precedence. if parsed_with_item.is_parenthesized { + // Parentheses resets the precedence. continue; } - let err = match parsed_with_item.item.context_expr { + let error = match parsed_with_item.item.context_expr { Expr::Named(_) => ParseErrorType::UnparenthesizedNamedExpression, Expr::Starred(_) => ParseErrorType::InvalidStarredExpressionUsage, Expr::Yield(_) | Expr::YieldFrom(_) => { @@ -2074,18 +2003,18 @@ impl<'src> Parser<'src> { } _ => continue, }; - self.add_error(err, &parsed_with_item.item.context_expr); + self.add_error(error, &parsed_with_item.item.context_expr); } - } else if self.peek() == TokenKind::Colon { - // Here, the parser is at a `)` followed by a `:`. + } + TokenKind::Rpar if self.peek() == TokenKind::Colon => { if parsed_with_items.is_empty() { - // No with items, treat it as a parenthesized expression to - // create an empty tuple expression. + // No with items, treat it as a parenthesized expression to create an empty + // tuple expression. with_item_kind = WithItemKind::ParenthesizedExpression; } else { - // These expressions, if unparenthesized, are only allowed if it's - // a parenthesized expression and none of the with items have an - // optional variable. + // These expressions, if unparenthesized, are only allowed if it's a + // parenthesized expression and none of the with items have an optional + // variable. if parsed_with_items.iter().any(|parsed_with_item| { !parsed_with_item.is_parenthesized && matches!( @@ -2099,17 +2028,18 @@ impl<'src> Parser<'src> { with_item_kind = WithItemKind::ParenthesizedExpression; } } - } else { - // For any other token followed by `)`, if any of the items has - // an optional variables (`as ...`), then our assumption is correct. - // Otherwise, treat it as a parenthesized expression. For example: + } + _ => { + // For any other token followed by `)`, if any of the items has an optional + // variables (`as ...`), then our assumption is correct. Otherwise, treat + // it as a parenthesized expression. For example: // // ```python // with (item1, item2 as f): ... // ``` // - // This also helps in raising the correct syntax error for the - // following case: + // This also helps in raising the correct syntax error for the following + // case: // ```python // with (item1, item2 as f) as x: ... // # ^^ @@ -2119,128 +2049,18 @@ impl<'src> Parser<'src> { } } - if with_item_kind == WithItemKind::Parenthesized && !self.at(TokenKind::Rpar) { - // test_err with_items_parenthesized_missing_comma - // with (item1 item2): ... - // with (item1 as f1 item2): ... - // with (item1, item2 item3, item4): ... - // with (item1, item2 as f1 item3, item4): ... - // with (item1, item2: ... - self.expect(TokenKind::Comma); - } - - // Transform the items if it's a parenthesized expression. - if with_item_kind.is_parenthesized_expression() { - // The generator expression has already consumed the `)`, so avoid - // expecting it again. - if with_item_kind != WithItemKind::SingleParenthesizedGeneratorExpression { - self.expect(TokenKind::Rpar); - } - - let mut lhs = if parsed_with_items.len() == 1 && !has_trailing_comma { - // SAFETY: We've checked that `items` has only one item. - let expr = parsed_with_items.pop().unwrap().item.context_expr; - - // Here, we know that it's a parenthesized expression so the expression - // should be checked against the grammar rule which is: - // - // group: (yield_expr | named_expression) - // - // So, no starred expression allowed. - if expr.is_starred_expr() { - self.add_error(ParseErrorType::InvalidStarredExpressionUsage, &expr); - } - expr - } else { - let mut elts = Vec::with_capacity(parsed_with_items.len()); - - // Here, we know that it's a tuple expression so each expression should - // be checked against the tuple element grammar rule which: - // - // tuple: '(' [ star_named_expression ',' [star_named_expressions] ] ')' - // - // So, no yield expressions allowed. - for expr in parsed_with_items - .drain(..) - .map(|parsed_with_item| parsed_with_item.item.context_expr) - { - if matches!(expr, Expr::Yield(_) | Expr::YieldFrom(_)) { - self.add_error(ParseErrorType::InvalidYieldExpressionUsage, &expr); - } - elts.push(expr); - } - - Expr::Tuple(ast::ExprTuple { - range: self.node_range(start), - elts, - ctx: ExprContext::Load, - parenthesized: true, - }) - }; - - // Remember that the expression is parenthesized and the parser has just - // consumed the `)` token. We need to check for any possible postfix - // expressions. For example: - // - // ```python - // with (foo)(): ... - // # ^ - // - // with (1, 2)[0]: ... - // # ^ - // - // with (foo.bar).baz: ... - // # ^ - // ``` - // - // The reason being that the opening parenthesis is ambiguous and isn't - // considered when parsing the with item in the case. So, the parser - // stops when it sees the `)` token and doesn't check for any postfix - // expressions. - lhs = self.parse_postfix_expression(lhs, start); - - let context_expr = if self.at(TokenKind::If) { - // test_ok ambiguous_lpar_with_items_if_expr - // with (x) if True else y: ... - // with (x for x in iter) if True else y: ... - // with (x async for x in iter) if True else y: ... - // with (x)[0] if True else y: ... - Expr::If(self.parse_if_expression(lhs, start)) - } else { - // test_ok ambiguous_lpar_with_items_binary_expr - // # It doesn't matter what's inside the parentheses, these tests need to make sure - // # all binary expressions parses correctly. - // with (a) and b: ... - // with (a) is not b: ... - // # Make sure precedence works - // with (a) or b and c: ... - // with (a) and b or c: ... - // with (a | b) << c | d: ... - // # Postfix should still be parsed first - // with (a)[0] + b * c: ... - self.parse_binary_expression_or_higher_recursive( - lhs.into(), - OperatorPrecedence::Initial, - ExpressionContext::default(), - start, - ) - .expr - }; - - let optional_vars = self - .at(TokenKind::As) - .then(|| Box::new(self.parse_with_item_optional_vars().expr)); - - items.push(ast::WithItem { - range: self.node_range(start), - context_expr, - optional_vars, - }); + if with_item_kind.is_parenthesized() { + Some( + parsed_with_items + .into_iter() + .map(|parsed_with_item| parsed_with_item.item) + .collect(), + ) } else { - items.extend(parsed_with_items.drain(..).map(|item| item.item)); - } + self.rewind(checkpoint); - with_item_kind + None + } } /// Parses a single `with` item. @@ -2249,93 +2069,33 @@ impl<'src> Parser<'src> { fn parse_with_item(&mut self, state: WithItemParsingState) -> ParsedWithItem { let start = self.node_start(); - let mut used_ambiguous_lpar = false; - // The grammar for the context expression of a with item depends on the state // of with item parsing. - let context_expr = if state.is_ambiguous_lpar() { - // If it's in an ambiguous state, the parenthesis (`(`) could be part of any - // of the following expression: - // - // Tuple expression - star_named_expression - // Generator expression - named_expression - // Parenthesized expression - (yield_expr | named_expression) - // Parenthesized with items - expression - // - // Here, the right side specifies the grammar for an element corresponding - // to the expression mentioned in the left side. - // - // So, the grammar used should be able to parse an element belonging to any - // of the above expression. At a later point, once the parser understands - // where the parenthesis belongs to, it'll validate and report errors for - // any invalid expression usage. - // - // Thus, we can conclude that the grammar used should be: - // (yield_expr | star_named_expression) - let parsed_expr = self - .parse_named_expression_or_higher(ExpressionContext::yield_or_starred_bitwise_or()); - - if matches!(self.current_token_kind(), TokenKind::Async | TokenKind::For) { - if parsed_expr.is_unparenthesized_starred_expr() { - self.add_error( - ParseErrorType::IterableUnpackingInComprehension, - &parsed_expr, - ); - } - - let generator_expr = - if let WithItemParsingState::AmbiguousLparFirstItem(lpar_start) = state { - // The parser is at the first with item after the ambiguous `(` token. - // For example: - // - // ```python - // with (x for x in range(10)): ... - // with (x for x in range(10)), item: ... - // ``` - let generator_expr = self.parse_generator_expression( - parsed_expr.expr, - GeneratorExpressionInParentheses::Maybe { - lpar_start, - expr_start: start, - }, - ); - used_ambiguous_lpar = generator_expr.parenthesized; - generator_expr - } else { - // For better error recovery. We would not take this path if the - // expression was parenthesized as it would be parsed as a generator - // expression by `parse_conditional_expression_or_higher`. - // - // ```python - // # This path will be taken for - // with (item, x for x in range(10)): ... - // - // # This path will not be taken for - // with (item, (x for x in range(10))): ... - // ``` - self.parse_generator_expression( - parsed_expr.expr, - GeneratorExpressionInParentheses::No(start), - ) - }; - - if !generator_expr.parenthesized { - self.add_error( - ParseErrorType::OtherError( - "Unparenthesized generator expression cannot be used here".to_string(), - ), - generator_expr.range(), - ); - } - - Expr::Generator(generator_expr).into() - } else { - parsed_expr + let context_expr = match state { + WithItemParsingState::Speculative => { + // If it's in a speculative state, the parenthesis (`(`) could be part of any of the + // following expression: + // + // Tuple expression - star_named_expression + // Generator expression - named_expression + // Parenthesized expression - (yield_expr | named_expression) + // Parenthesized with items - expression + // + // Here, the right side specifies the grammar for an element corresponding to the + // expression mentioned in the left side. + // + // So, the grammar used should be able to parse an element belonging to any of the + // above expression. At a later point, once the parser understands where the + // parenthesis belongs to, it'll validate and report errors for any invalid expression + // usage. + // + // Thus, we can conclude that the grammar used should be: + // (yield_expr | star_named_expression) + self.parse_named_expression_or_higher( + ExpressionContext::yield_or_starred_bitwise_or(), + ) } - } else { - // If it's not in an ambiguous state, then the grammar of the with item - // should be used which is `expression`. - self.parse_conditional_expression_or_higher() + WithItemParsingState::Regular => self.parse_conditional_expression_or_higher(), }; let optional_vars = self @@ -2344,7 +2104,6 @@ impl<'src> Parser<'src> { ParsedWithItem { is_parenthesized: context_expr.is_parenthesized, - used_ambiguous_lpar, item: ast::WithItem { range: self.node_range(start), context_expr: context_expr.expr, @@ -3768,46 +3527,19 @@ enum MatchTokenKind { #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum WithItemParsingState { - /// The parser is currently parsing a with item without any ambiguity. + /// Parsing the with items without any ambiguity. Regular, - /// The parser is currently parsing the first with item after an ambiguous - /// left parenthesis. The contained offset is the start of the left parenthesis. - /// - /// ```python - /// with (item1, item2): ... - /// ``` - /// - /// The parser is at the start of `item1`. - AmbiguousLparFirstItem(TextSize), - - /// The parser is currently parsing one of the with items after an ambiguous - /// left parenthesis, but not the first one. - /// - /// ```python - /// with (item1, item2, item3): ... - /// ``` - /// - /// The parser could be at the start of `item2` or `item3`, but not `item1`. - AmbiguousLparRest, -} - -impl WithItemParsingState { - const fn is_ambiguous_lpar(self) -> bool { - matches!( - self, - Self::AmbiguousLparFirstItem(_) | Self::AmbiguousLparRest - ) - } + /// Parsing the with items in a speculative mode. + Speculative, } +#[derive(Debug)] struct ParsedWithItem { /// The contained with item. item: WithItem, /// If the context expression of the item is parenthesized. is_parenthesized: bool, - /// If the parsing used the ambiguous left parenthesis. - used_ambiguous_lpar: bool, } #[derive(Debug, Copy, Clone)] diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__ambiguous_lpar_with_items.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__ambiguous_lpar_with_items.py.snap index 92b3a5916bf26d..11746d587b22ba 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__ambiguous_lpar_with_items.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__ambiguous_lpar_with_items.py.snap @@ -375,10 +375,10 @@ Module( is_async: false, items: [ WithItem { - range: 364..384, + range: 363..384, context_expr: Generator( ExprGenerator { - range: 364..384, + range: 363..384, elt: Name( ExprName { range: 364..365, @@ -426,7 +426,7 @@ Module( is_async: false, }, ], - parenthesized: false, + parenthesized: true, }, ), optional_vars: None, @@ -459,90 +459,89 @@ Module( ), With( StmtWith { - range: 397..435, + range: 397..410, is_async: false, items: [ WithItem { - range: 403..407, - context_expr: Name( - ExprName { - range: 403..407, - id: "item", + range: 402..410, + context_expr: Tuple( + ExprTuple { + range: 402..410, + elts: [ + Name( + ExprName { + range: 403..407, + id: "item", + ctx: Load, + }, + ), + Name( + ExprName { + range: 409..410, + id: "x", + ctx: Load, + }, + ), + ], ctx: Load, + parenthesized: true, }, ), optional_vars: None, }, - WithItem { - range: 409..429, - context_expr: Generator( - ExprGenerator { - range: 409..429, - elt: Name( - ExprName { - range: 409..410, - id: "x", - ctx: Load, - }, - ), - generators: [ - Comprehension { - range: 411..429, - target: Name( - ExprName { - range: 415..416, - id: "x", - ctx: Store, - }, - ), - iter: Call( - ExprCall { - range: 420..429, - func: Name( - ExprName { - range: 420..425, - id: "range", - ctx: Load, - }, - ), - arguments: Arguments { - range: 425..429, - args: [ - NumberLiteral( - ExprNumberLiteral { - range: 426..428, - value: Int( - 10, - ), - }, - ), - ], - keywords: [], - }, - }, - ), - ifs: [], - is_async: false, - }, - ], - parenthesized: false, + ], + body: [], + }, + ), + For( + StmtFor { + range: 411..429, + is_async: false, + target: Name( + ExprName { + range: 415..416, + id: "x", + ctx: Store, + }, + ), + iter: Call( + ExprCall { + range: 420..429, + func: Name( + ExprName { + range: 420..425, + id: "range", + ctx: Load, }, ), - optional_vars: None, - }, - ], - body: [ - Expr( - StmtExpr { - range: 432..435, - value: EllipsisLiteral( - ExprEllipsisLiteral { - range: 432..435, - }, - ), + arguments: Arguments { + range: 425..429, + args: [ + NumberLiteral( + ExprNumberLiteral { + range: 426..428, + value: Int( + 10, + ), + }, + ), + ], + keywords: [], }, - ), - ], + }, + ), + body: [], + orelse: [], + }, + ), + Expr( + StmtExpr { + range: 432..435, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 432..435, + }, + ), }, ), With( @@ -588,10 +587,10 @@ Module( is_async: false, items: [ WithItem { - range: 523..539, + range: 522..539, context_expr: Generator( ExprGenerator { - range: 523..539, + range: 522..539, elt: Starred( ExprStarred { range: 523..525, @@ -626,7 +625,7 @@ Module( is_async: false, }, ], - parenthesized: false, + parenthesized: true, }, ), optional_vars: None, @@ -659,88 +658,92 @@ Module( ), With( StmtWith { - range: 552..594, + range: 552..567, is_async: false, items: [ WithItem { - range: 558..563, - context_expr: Name( - ExprName { - range: 558..563, - id: "item1", - ctx: Load, - }, - ), - optional_vars: None, - }, - WithItem { - range: 565..581, - context_expr: Generator( - ExprGenerator { - range: 565..581, - elt: Starred( - ExprStarred { - range: 565..567, - value: Name( - ExprName { - range: 566..567, - id: "x", - ctx: Load, - }, - ), - ctx: Load, - }, - ), - generators: [ - Comprehension { - range: 568..581, - target: Name( - ExprName { - range: 572..573, - id: "x", - ctx: Store, - }, - ), - iter: Name( - ExprName { - range: 577..581, - id: "iter", - ctx: Load, - }, - ), - ifs: [], - is_async: false, - }, + range: 557..567, + context_expr: Tuple( + ExprTuple { + range: 557..567, + elts: [ + Name( + ExprName { + range: 558..563, + id: "item1", + ctx: Load, + }, + ), + Starred( + ExprStarred { + range: 565..567, + value: Name( + ExprName { + range: 566..567, + id: "x", + ctx: Load, + }, + ), + ctx: Load, + }, + ), ], - parenthesized: false, - }, - ), - optional_vars: None, - }, - WithItem { - range: 583..588, - context_expr: Name( - ExprName { - range: 583..588, - id: "item2", ctx: Load, + parenthesized: true, }, ), optional_vars: None, }, ], - body: [ - Expr( - StmtExpr { - range: 591..594, - value: EllipsisLiteral( - ExprEllipsisLiteral { - range: 591..594, + body: [], + }, + ), + For( + StmtFor { + range: 568..588, + is_async: false, + target: Name( + ExprName { + range: 572..573, + id: "x", + ctx: Store, + }, + ), + iter: Tuple( + ExprTuple { + range: 577..588, + elts: [ + Name( + ExprName { + range: 577..581, + id: "iter", + ctx: Load, }, ), - }, - ), - ], + Name( + ExprName { + range: 583..588, + id: "item2", + ctx: Load, + }, + ), + ], + ctx: Load, + parenthesized: false, + }, + ), + body: [], + orelse: [], + }, + ), + Expr( + StmtExpr { + range: 591..594, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 591..594, + }, + ), }, ), With( @@ -1095,10 +1098,10 @@ Module( is_async: false, items: [ WithItem { - range: 751..771, + range: 750..771, context_expr: Generator( ExprGenerator { - range: 751..766, + range: 750..766, elt: Name( ExprName { range: 751..752, @@ -1127,7 +1130,7 @@ Module( is_async: false, }, ], - parenthesized: false, + parenthesized: true, }, ), optional_vars: Some( @@ -1360,7 +1363,7 @@ Module( 2 | # These cases should raise the correct syntax error and recover properly. 3 | 4 | with (item1, item2),: ... - | ^ Syntax Error: Expected an expression + | ^ Syntax Error: Trailing comma not allowed 5 | with (item1, item2), as f: ... 6 | with (item1, item2), item3,: ... | @@ -1369,7 +1372,7 @@ Module( | 4 | with (item1, item2),: ... 5 | with (item1, item2), as f: ... - | ^^ Syntax Error: Expected an expression + | ^^ Syntax Error: Expected an expression or the end of the with item list 6 | with (item1, item2), item3,: ... 7 | with (*item): ... | @@ -1429,16 +1432,45 @@ Module( 9 | with (item := 10 as f): ... 10 | with (item1, item2 := 10 as f): ... 11 | with (x for x in range(10), item): ... - | ^^^^^^^^^^^^^^^^^^^^ Syntax Error: Unparenthesized generator expression cannot be used here + | ^ Syntax Error: Expected ')', found ',' 12 | with (item, x for x in range(10)): ... | | + 9 | with (item := 10 as f): ... 10 | with (item1, item2 := 10 as f): ... 11 | with (x for x in range(10), item): ... + | ^ Syntax Error: Expected ',', found ')' 12 | with (item, x for x in range(10)): ... - | ^^^^^^^^^^^^^^^^^^^^ Syntax Error: Unparenthesized generator expression cannot be used here + | + + + | +10 | with (item1, item2 := 10 as f): ... +11 | with (x for x in range(10), item): ... +12 | with (item, x for x in range(10)): ... + | ^^^ Syntax Error: Expected ',', found 'for' +13 | +14 | # Make sure the parser doesn't report the same error twice + | + + + | +10 | with (item1, item2 := 10 as f): ... +11 | with (x for x in range(10), item): ... +12 | with (item, x for x in range(10)): ... + | ^ Syntax Error: Expected ':', found ')' +13 | +14 | # Make sure the parser doesn't report the same error twice + | + + + | +10 | with (item1, item2 := 10 as f): ... +11 | with (x for x in range(10), item): ... +12 | with (item, x for x in range(10)): ... + | ^ Syntax Error: Expected a statement 13 | 14 | # Make sure the parser doesn't report the same error twice | @@ -1463,10 +1495,48 @@ Module( | + | +15 | with ((*item)): ... +16 | +17 | with (*x for x in iter, item): ... + | ^ Syntax Error: Expected ')', found ',' +18 | with (item1, *x for x in iter, item2): ... +19 | with (x as f, *y): ... + | + + + | +15 | with ((*item)): ... +16 | +17 | with (*x for x in iter, item): ... + | ^ Syntax Error: Expected ',', found ')' +18 | with (item1, *x for x in iter, item2): ... +19 | with (x as f, *y): ... + | + + + | +17 | with (*x for x in iter, item): ... +18 | with (item1, *x for x in iter, item2): ... + | ^^^ Syntax Error: Expected ',', found 'for' +19 | with (x as f, *y): ... +20 | with (*x, y as f): ... + | + + | 17 | with (*x for x in iter, item): ... 18 | with (item1, *x for x in iter, item2): ... - | ^^ Syntax Error: Iterable unpacking cannot be used in a comprehension + | ^ Syntax Error: Expected ':', found ')' +19 | with (x as f, *y): ... +20 | with (*x, y as f): ... + | + + + | +17 | with (*x for x in iter, item): ... +18 | with (item1, *x for x in iter, item2): ... + | ^ Syntax Error: Expected a statement 19 | with (x as f, *y): ... 20 | with (*x, y as f): ... | @@ -1535,7 +1605,17 @@ Module( 23 | with (x, yield from y): ... 24 | with (x as f, y) as f: ... 25 | with (x for x in iter as y): ... - | ^^^^^^^^^^^^^^^ Syntax Error: Unparenthesized generator expression cannot be used here + | ^^ Syntax Error: Expected ')', found 'as' +26 | +27 | # The inner `(...)` is parsed as parenthesized expression + | + + + | +23 | with (x, yield from y): ... +24 | with (x as f, y) as f: ... +25 | with (x for x in iter as y): ... + | ^ Syntax Error: Expected ',', found ')' 26 | 27 | # The inner `(...)` is parsed as parenthesized expression | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__unclosed_ambiguous_lpar.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__unclosed_ambiguous_lpar.py.snap index c9ee426b17ee34..cb8bddca0d2051 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__unclosed_ambiguous_lpar.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__unclosed_ambiguous_lpar.py.snap @@ -15,7 +15,7 @@ Module( is_async: false, items: [ WithItem { - range: 6..6, + range: 5..6, context_expr: Name( ExprName { range: 6..6, @@ -25,32 +25,34 @@ Module( ), optional_vars: None, }, - WithItem { - range: 9..14, - context_expr: BinOp( - ExprBinOp { - range: 9..14, - left: Name( - ExprName { - range: 9..10, - id: "x", - ctx: Load, - }, - ), - op: Add, - right: Name( - ExprName { - range: 13..14, - id: "y", - ctx: Load, - }, - ), - }, - ), - optional_vars: None, - }, ], - body: [], + body: [ + Expr( + StmtExpr { + range: 9..14, + value: BinOp( + ExprBinOp { + range: 9..14, + left: Name( + ExprName { + range: 9..10, + id: "x", + ctx: Load, + }, + ), + op: Add, + right: Name( + ExprName { + range: 13..14, + id: "y", + ctx: Load, + }, + ), + }, + ), + }, + ), + ], }, ), ], diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__unclosed_ambiguous_lpar_eof.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__unclosed_ambiguous_lpar_eof.py.snap index 64e5045ad08267..54267677e53e33 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__unclosed_ambiguous_lpar_eof.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__unclosed_ambiguous_lpar_eof.py.snap @@ -15,7 +15,7 @@ Module( is_async: false, items: [ WithItem { - range: 6..6, + range: 5..6, context_expr: Name( ExprName { range: 6..6, diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@with_items_parenthesized_missing_comma.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@with_items_parenthesized_missing_comma.py.snap index 53bb021c7bc76f..68009deba08375 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@with_items_parenthesized_missing_comma.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@with_items_parenthesized_missing_comma.py.snap @@ -239,42 +239,49 @@ Module( ), With( StmtWith { - range: 136..160, + range: 136..159, is_async: false, items: [ WithItem { - range: 142..147, - context_expr: Name( - ExprName { - range: 142..147, - id: "item1", - ctx: Load, - }, - ), - optional_vars: None, - }, - WithItem { - range: 149..154, - context_expr: Name( - ExprName { - range: 149..154, - id: "item2", + range: 141..154, + context_expr: Tuple( + ExprTuple { + range: 141..154, + elts: [ + Name( + ExprName { + range: 142..147, + id: "item1", + ctx: Load, + }, + ), + Name( + ExprName { + range: 149..154, + id: "item2", + ctx: Load, + }, + ), + ], ctx: Load, + parenthesized: true, }, ), optional_vars: None, }, - WithItem { - range: 156..159, - context_expr: EllipsisLiteral( - ExprEllipsisLiteral { - range: 156..159, - }, - ), - optional_vars: None, - }, ], - body: [], + body: [ + Expr( + StmtExpr { + range: 156..159, + value: EllipsisLiteral( + ExprEllipsisLiteral { + range: 156..159, + }, + ), + }, + ), + ], }, ), ],