Skip to content

Commit

Permalink
Disallow non-parenthesized lambda expr in f-string (#7263)
Browse files Browse the repository at this point in the history
## Summary

This PR updates the handling of disallowing non-parenthesized lambda
expr in f-strings.

Previously, the lexer was used to emit an empty `FStringMiddle` token in
certain cases for which there's no pattern in the parser to match. That would
then raise an unexpected token error while parsing.

This PR adds a new f-string error type `LambdaWithoutParentheses`. In
cases where the parser still can't detect the error, it's guaranteed to be
caught by the fact that there's no `FStringMiddle` token in the pattern.

## Test Plan

Add test cases wherever we throw the `LambdaWithoutParentheses` error.

## Benchmarks

As this is the final PR for the parser, I'm putting the parser benchmarks here:

```
group                         fstring-parser                         main
-----                         --------------                         ----
parser/large/dataset.py       1.00      4.7±0.24ms     8.7 MB/sec    1.03      4.8±0.25ms     8.4 MB/sec
parser/numpy/ctypeslib.py     1.03   921.8±39.00µs    18.1 MB/sec    1.00   897.6±39.03µs    18.6 MB/sec
parser/numpy/globals.py       1.01     90.4±5.23µs    32.6 MB/sec    1.00     89.6±6.24µs    32.9 MB/sec
parser/pydantic/types.py      1.00  1899.5±94.78µs    13.4 MB/sec    1.03  1954.4±105.88µs    13.0 MB/sec
parser/unicode/pypinyin.py    1.03   292.3±21.14µs    14.4 MB/sec    1.00   283.2±13.16µs    14.8 MB/sec
```
  • Loading branch information
dhruvmanila committed Sep 18, 2023
1 parent 4a37b53 commit fab6b9d
Show file tree
Hide file tree
Showing 11 changed files with 9,334 additions and 9,162 deletions.
6 changes: 6 additions & 0 deletions crates/ruff_python_ast/src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3254,6 +3254,12 @@ pub struct ParenthesizedExpr {
/// The underlying expression.
pub expr: Expr,
}
impl ParenthesizedExpr {
/// Returns `true` if the expression is may be parenthesized.
pub fn is_parenthesized(&self) -> bool {
self.range != self.expr.range()
}
}
impl Ranged for ParenthesizedExpr {
fn range(&self) -> TextRange {
self.range
Expand Down
33 changes: 8 additions & 25 deletions crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,6 @@ impl<'source> Lexer<'source> {
let mut last_offset = self.offset();

let mut in_named_unicode = false;
let mut end_format_spec = false;

loop {
match self.cursor.first() {
Expand Down Expand Up @@ -618,7 +617,6 @@ impl<'source> Lexer<'source> {
self.cursor.bump(); // Skip the second `}`
last_offset = self.offset();
} else {
end_format_spec = fstring.is_in_format_spec(self.nesting);
break;
}
}
Expand All @@ -628,36 +626,20 @@ impl<'source> Lexer<'source> {
}
}
let range = self.token_range();

// Avoid emitting the empty `FStringMiddle` token for anything other than
// the closing curly braces (`}`).
if range.is_empty() && !end_format_spec {
if range.is_empty() {
return Ok(None);
}

let value = if range.is_empty() {
// Emit an empty `FStringMiddle` token for a special case to disallow
// lambda expressions without parenthesis. For example, in `f"{lambda x:{x}}"`
// the lexer wouldn't have emitted a `FStringMiddle` token.
String::new()
} else if normalized.is_empty() {
let value = if normalized.is_empty() {
self.source[range].to_string()
} else {
normalized.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
normalized
};
let is_raw = fstring.is_raw_string();
if end_format_spec {
// We need to decrement the format spec depth to avoid going into infinite
// loop where the lexer keeps on emitting an empty `FStringMiddle` token.
// This is because the lexer still thinks that we're in a f-string expression
// but as we've encountered a `}` token, we need to decrement the depth so
// that the lexer can go forward with the next token.
//
// SAFETY: Safe because the function is only called when `self.fstrings` is not empty.
self.fstrings.current_mut().unwrap().end_format_spec();
}
Ok(Some(Tok::FStringMiddle { value, is_raw }))
Ok(Some(Tok::FStringMiddle {
value,
is_raw: fstring.is_raw_string(),
}))
}

/// Lex a string literal.
Expand Down Expand Up @@ -1103,13 +1085,14 @@ impl<'source> Lexer<'source> {
Tok::Lbrace
}
'}' => {
if let Some(fstring) = self.fstrings.current() {
if let Some(fstring) = self.fstrings.current_mut() {
if fstring.nesting() == self.nesting {
return Err(LexicalError {
error: LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
location: self.token_start(),
});
}
fstring.try_end_format_spec(self.nesting);
}
self.nesting = self.nesting.saturating_sub(1);
Tok::Rbrace
Expand Down
9 changes: 6 additions & 3 deletions crates/ruff_python_parser/src/lexer/fstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,12 @@ impl FStringContext {
}
}

/// Decrements the format spec depth unconditionally.
pub(crate) fn end_format_spec(&mut self) {
self.format_spec_depth = self.format_spec_depth.saturating_sub(1);
/// Decrements the format spec depth if the current f-string is in a format
/// spec.
pub(crate) fn try_end_format_spec(&mut self, current_nesting: u32) {
if self.is_in_format_spec(current_nesting) {
self.format_spec_depth = self.format_spec_depth.saturating_sub(1);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
//! return bool(i & 1)
//! "#;
//! let tokens = lex(python_source, Mode::Module);
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
//! let ast = parse_tokens(tokens, python_source, Mode::Module, "<embedded>");
//!
//! assert!(ast.is_ok());
//! ```
Expand Down
26 changes: 19 additions & 7 deletions crates/ruff_python_parser/src/python.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -1351,7 +1351,13 @@ NamedExpression: ast::ParenthesizedExpr = {
};

LambdaDef: ast::ParenthesizedExpr = {
<location:@L> "lambda" <location_args:@L> <parameters:ParameterList<UntypedParameter, StarUntypedParameter, StarUntypedParameter>?> <end_location_args:@R> ":" <body:Test<"all">> <end_location:@R> =>? {
<location:@L> "lambda" <location_args:@L> <parameters:ParameterList<UntypedParameter, StarUntypedParameter, StarUntypedParameter>?> <end_location_args:@R> ":" <fstring_middle:fstring_middle?> <body:Test<"all">> <end_location:@R> =>? {
if fstring_middle.is_some() {
return Err(LexicalError {
error: LexicalErrorType::FStringError(FStringErrorType::LambdaWithoutParentheses),
location,
})?;
}
parameters.as_ref().map(validate_arguments).transpose()?;

Ok(ast::ExprLambda {
Expand Down Expand Up @@ -1589,23 +1595,29 @@ StringLiteral: StringType = {
FStringExpr: StringType = {
<location:@L> FStringStart <values:FStringMiddlePattern*> FStringEnd <end_location:@R> => {
StringType::FString(ast::ExprFString {
values: values.into_iter().flatten().collect(),
values,
implicit_concatenated: false,
range: (location..end_location).into()
})
}
};

FStringMiddlePattern: Option<ast::Expr> = {
FStringMiddlePattern: ast::Expr = {
FStringReplacementField,
<start_location:@L> <fstring_middle:fstring_middle> =>? {
let (source, is_raw) = fstring_middle;
Ok(parse_fstring_middle(&source, is_raw, start_location)?)
}
};

FStringReplacementField: Option<ast::Expr> = {
<location:@L> "{" <value:TestListOrYieldExpr> <debug:"="?> <conversion:FStringConversion?> <format_spec:FStringFormatSpecSuffix?> "}" <end_location:@R> => {
FStringReplacementField: ast::Expr = {
<location:@L> "{" <value:TestListOrYieldExpr> <debug:"="?> <conversion:FStringConversion?> <format_spec:FStringFormatSpecSuffix?> "}" <end_location:@R> =>? {
if value.expr.is_lambda_expr() && !value.is_parenthesized() {
return Err(LexicalError {
error: LexicalErrorType::FStringError(FStringErrorType::LambdaWithoutParentheses),
location: value.start(),
})?;
}
let debug_text = debug.map(|_| {
let start_offset = location + "{".text_len();
let end_offset = if let Some((conversion_start, _)) = conversion {
Expand All @@ -1621,7 +1633,7 @@ FStringReplacementField: Option<ast::Expr> = {
trailing: source_code[TextRange::new(value.range().end(), end_offset)].to_string(),
}
});
Some(
Ok(
ast::ExprFormattedValue {
value: Box::new(value.into()),
debug_text,
Expand All @@ -1643,7 +1655,7 @@ FStringFormatSpecSuffix: ast::Expr = {
FStringFormatSpec: ast::Expr = {
<location:@L> <values:FStringMiddlePattern*> <end_location:@R> => {
ast::ExprFString {
values: values.into_iter().flatten().collect(),
values,
implicit_concatenated: false,
range: (location..end_location).into()
}.into()
Expand Down
Loading

0 comments on commit fab6b9d

Please sign in to comment.