From c361239963de034b38a3922b678c567aede73d5f Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 29 May 2024 11:43:02 +0530 Subject: [PATCH] Implement `TokenFlags` stored on each `Token` (#11578) ## Summary This PR implements the `TokenFlags` which will be stored on each `Token` and certain flags will be set depending on the token kind. Currently, it's equivalent to `AnyStringFlags` but it will help in the future to provide additional information regarding certain tokens like unterminated string, number kinds, etc. The main motivation to add a `TokenFlags` is to store certain information related to the token which will then be used by downstream tools. Currently, the information is only related to string tokens. The downstream tools should not be allowed access to the flags directly, it's an implementation detail. Instead, methods will be provided on `Token` to query certain information. An example can be seen in the follow-up PR (https://github.com/astral-sh/ruff/pull/11592). For example, the `Stylist` and `Indexer` uses the string flags stored on `String`/`FStringStart` token to get certain information. They will be updated to use these flags instead, thus removing the need for `Tok` completely. Prior art in TypeScript: https://github.com/microsoft/TypeScript/blob/16beff101ae1dae0600820ebf22632ac8a40cfc8/src/compiler/types.ts#L2788-L2827 --- crates/ruff_python_ast/src/str_prefix.rs | 39 --- crates/ruff_python_parser/src/lexer.rs | 275 +++++++++++++----- .../ruff_python_parser/src/lexer/fstring.rs | 14 +- .../src/parser/expression.rs | 26 +- crates/ruff_python_parser/src/token_source.rs | 13 +- 5 files changed, 232 insertions(+), 135 deletions(-) diff --git a/crates/ruff_python_ast/src/str_prefix.rs b/crates/ruff_python_ast/src/str_prefix.rs index e6784d2604840..b2da865d1772f 100644 --- a/crates/ruff_python_ast/src/str_prefix.rs +++ b/crates/ruff_python_ast/src/str_prefix.rs @@ -150,45 +150,6 @@ impl AnyStringPrefix { } } -impl TryFrom for AnyStringPrefix { - type Error = String; - - fn try_from(value: char) -> Result { - let result = match value { - 'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }), - 'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }), - 'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode), - 'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular), - 'f' | 'F' => Self::Format(FStringPrefix::Regular), - _ => return Err(format!("Unexpected prefix '{value}'")), - }; - Ok(result) - } -} - -impl TryFrom<[char; 2]> for AnyStringPrefix { - type Error = String; - - fn try_from(value: [char; 2]) -> Result { - let result = match value { - ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { - Self::Format(FStringPrefix::Raw { uppercase_r: false }) - } - ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { - Self::Format(FStringPrefix::Raw { uppercase_r: true }) - } - ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { - Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) - } - ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { - Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) - } - _ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])), - }; - Ok(result) - } -} - impl fmt::Display for AnyStringPrefix { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.as_str()) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index e57f88f9a9f80..d7b825e38457f 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -13,14 +13,15 @@ use std::{char, cmp::Ordering, str::FromStr}; +use bitflags::bitflags; +use ruff_python_ast::str::Quote; +use ruff_python_ast::str_prefix::{ + AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, +}; use unicode_ident::{is_xid_continue, is_xid_start}; use unicode_normalization::UnicodeNormalization; -use ruff_python_ast::{ - str::Quote, - str_prefix::{AnyStringPrefix, FStringPrefix}, - AnyStringFlags, Int, IpyEscapeKind, StringFlags, -}; +use ruff_python_ast::{AnyStringFlags, Int, IpyEscapeKind, StringFlags}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::error::FStringErrorType; @@ -35,6 +36,8 @@ mod indentation; #[deprecated] pub fn lex(_source: &str, _mode: Mode) {} +#[deprecated] +pub fn lex_starts_at(_source: &str, _mode: Mode, _offset: TextSize) {} /// A lexer for Python source code. #[derive(Debug)] @@ -54,6 +57,9 @@ pub struct Lexer<'src> { /// The value of the current token. current_value: TokenValue, + /// Flags for the current token. + current_flags: TokenFlags, + /// Lexer state. state: State, @@ -94,6 +100,7 @@ impl<'src> Lexer<'src> { current_kind: TokenKind::EndOfFile, current_range: TextRange::empty(start_offset), current_value: TokenValue::None, + current_flags: TokenFlags::empty(), nesting: 0, indentations: Indentations::default(), pending_indentation: None, @@ -123,44 +130,73 @@ impl<'src> Lexer<'src> { self.current_range } + /// Returns the flags for the current token. + pub(crate) fn current_flags(&self) -> TokenFlags { + self.current_flags + } + /// Helper function to push the given error and return the [`TokenKind::Unknown`] token. fn push_error(&mut self, error: LexicalError) -> TokenKind { self.errors.push(error); TokenKind::Unknown } - /// Lex an identifier. Also used for keywords and string/bytes literals with a prefix. - fn lex_identifier(&mut self, first: char) -> TokenKind { - // Detect potential string like rb'' b'' f'' u'' r'' - match (first, self.cursor.first()) { - ('f' | 'F', quote @ ('\'' | '"')) => { - self.cursor.bump(); - return self.lex_fstring_start(quote, FStringPrefix::Regular); + /// Try lexing the single character string prefix, updating the token flags accordingly. + /// Returns `true` if it matches. + fn try_single_char_prefix(&mut self, first: char) -> bool { + match first { + 'f' | 'F' => self.current_flags |= TokenFlags::F_STRING, + 'u' | 'U' => self.current_flags |= TokenFlags::UNICODE_STRING, + 'b' | 'B' => self.current_flags |= TokenFlags::BYTE_STRING, + 'r' => self.current_flags |= TokenFlags::RAW_STRING_LOWERCASE, + 'R' => self.current_flags |= TokenFlags::RAW_STRING_UPPERCASE, + _ => return false, + } + true + } + + /// Try lexing the double character string prefix, updating the token flags accordingly. + /// Returns `true` if it matches. + fn try_double_char_prefix(&mut self, value: [char; 2]) -> bool { + match value { + ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { + self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_LOWERCASE; } - ('r', 'f' | 'F') | ('f' | 'F', 'r') if is_quote(self.cursor.second()) => { - self.cursor.bump(); - let quote = self.cursor.bump().unwrap(); - return self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: false }); + ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { + self.current_flags |= TokenFlags::F_STRING | TokenFlags::RAW_STRING_UPPERCASE; } - ('R', 'f' | 'F') | ('f' | 'F', 'R') if is_quote(self.cursor.second()) => { - self.cursor.bump(); - let quote = self.cursor.bump().unwrap(); - return self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true }); + ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { + self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_LOWERCASE; } - (_, quote @ ('\'' | '"')) => { - if let Ok(prefix) = AnyStringPrefix::try_from(first) { + ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { + self.current_flags |= TokenFlags::BYTE_STRING | TokenFlags::RAW_STRING_UPPERCASE; + } + _ => return false, + } + true + } + + /// Lex an identifier. Also used for keywords and string/bytes literals with a prefix. + fn lex_identifier(&mut self, first: char) -> TokenKind { + // Detect potential string like rb'' b'' f'' u'' r'' + let quote = match (first, self.cursor.first()) { + (_, quote @ ('\'' | '"')) => self.try_single_char_prefix(first).then_some(quote), + (_, second) if is_quote(self.cursor.second()) => { + self.try_double_char_prefix([first, second]).then(|| { self.cursor.bump(); - return self.lex_string(prefix, quote); - } + // SAFETY: Safe because of the `is_quote` check in this match arm's guard + self.cursor.bump().unwrap() + }) } - (_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => { - self.cursor.bump(); - if let Ok(prefix) = AnyStringPrefix::try_from([first, second]) { - let quote = self.cursor.bump().unwrap(); - return self.lex_string(prefix, quote); - } + _ => None, + }; + + if let Some(quote) = quote { + if self.current_flags.is_f_string() { + return self.lex_fstring_start(quote); } - _ => {} + + return self.lex_string(quote); } // Keep track of whether the identifier is ASCII-only or not. @@ -528,25 +564,21 @@ impl<'src> Lexer<'src> { } /// Lex a f-string start token. - fn lex_fstring_start(&mut self, quote: char, prefix: FStringPrefix) -> TokenKind { + fn lex_fstring_start(&mut self, quote: char) -> TokenKind { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), quote); - let mut flags = AnyStringFlags::default() - .with_prefix(AnyStringPrefix::Format(prefix)) - .with_quote_style(if quote == '"' { - Quote::Double - } else { - Quote::Single - }); + if quote == '"' { + self.current_flags |= TokenFlags::DOUBLE_QUOTES; + } if self.cursor.eat_char2(quote, quote) { - flags = flags.with_triple_quotes(); + self.current_flags = TokenFlags::TRIPLE_QUOTED_STRING; } - self.fstrings.push(FStringContext::new(flags, self.nesting)); + self.fstrings + .push(FStringContext::new(self.current_flags, self.nesting)); - self.current_value = TokenValue::FStringStart(flags); TokenKind::FStringStart } @@ -555,6 +587,9 @@ impl<'src> Lexer<'src> { // SAFETY: Safe because the function is only called when `self.fstrings` is not empty. let fstring = self.fstrings.current().unwrap(); + // Keep the current flags in sync throughout the f-string context. + self.current_flags = fstring.flags(); + // Check if we're at the end of the f-string. if fstring.is_triple_quoted() { let quote_char = fstring.quote_char(); @@ -682,35 +717,30 @@ impl<'src> Lexer<'src> { self.current_value = TokenValue::FStringMiddle { value: value.into_boxed_str(), - flags: fstring.flags(), }; Some(TokenKind::FStringMiddle) } /// Lex a string literal. - fn lex_string(&mut self, prefix: AnyStringPrefix, quote: char) -> TokenKind { + fn lex_string(&mut self, quote: char) -> TokenKind { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), quote); - let mut flags = AnyStringFlags::default() - .with_prefix(prefix) - .with_quote_style(if quote == '"' { - Quote::Double - } else { - Quote::Single - }); + if quote == '"' { + self.current_flags |= TokenFlags::DOUBLE_QUOTES; + } // If the next two characters are also the quote character, then we have a triple-quoted // string; consume those two characters and ensure that we require a triple-quote to close if self.cursor.eat_char2(quote, quote) { - flags = flags.with_triple_quotes(); + self.current_flags = TokenFlags::TRIPLE_QUOTED_STRING; } let value_start = self.offset(); let quote_byte = u8::try_from(quote).expect("char that fits in u8"); - let value_end = if flags.is_triple_quoted() { + let value_end = if self.current_flags.is_triple_quoted() { // For triple-quoted strings, scan until we find the closing quote (ignoring escaped // quotes) or the end of the file. loop { @@ -798,7 +828,6 @@ impl<'src> Lexer<'src> { value: self.source[TextRange::new(value_start, value_end)] .to_string() .into_boxed_str(), - flags, }; TokenKind::String @@ -808,6 +837,7 @@ impl<'src> Lexer<'src> { pub(crate) fn next_token(&mut self) -> TokenKind { self.cursor.start_token(); self.current_value = TokenValue::None; + self.current_flags = TokenFlags::empty(); self.current_kind = self.lex_token(); self.current_range = self.token_range(); self.current_kind @@ -1026,7 +1056,7 @@ impl<'src> Lexer<'src> { c if is_ascii_identifier_start(c) => self.lex_identifier(c), '0'..='9' => self.lex_number(c), '#' => return self.lex_comment(), - '\'' | '"' => self.lex_string(AnyStringPrefix::default(), c), + '\'' | '"' => self.lex_string(c), '=' => { if self.cursor.eat_char('=') { TokenKind::EqEqual @@ -1310,6 +1340,7 @@ impl<'src> Lexer<'src> { value: self.current_value.clone(), current_kind: self.current_kind, current_range: self.current_range, + current_flags: self.current_flags, cursor: self.cursor.clone(), state: self.state, nesting: self.nesting, @@ -1322,16 +1353,31 @@ impl<'src> Lexer<'src> { /// Restore the lexer to the given checkpoint. pub(crate) fn rewind(&mut self, checkpoint: LexerCheckpoint<'src>) { - self.current_value = checkpoint.value; - self.current_kind = checkpoint.current_kind; - self.current_range = checkpoint.current_range; - self.cursor = checkpoint.cursor; - self.state = checkpoint.state; - self.nesting = checkpoint.nesting; - self.indentations.rewind(checkpoint.indentations_checkpoint); - self.pending_indentation = checkpoint.pending_indentation; - self.fstrings.rewind(checkpoint.fstrings_checkpoint); - self.errors.truncate(checkpoint.errors_position); + let LexerCheckpoint { + value, + current_kind, + current_range, + current_flags, + cursor, + state, + nesting, + indentations_checkpoint, + pending_indentation, + fstrings_checkpoint, + errors_position, + } = checkpoint; + + self.current_value = value; + self.current_kind = current_kind; + self.current_range = current_range; + self.current_flags = current_flags; + self.cursor = cursor; + self.state = state; + self.nesting = nesting; + self.indentations.rewind(indentations_checkpoint); + self.pending_indentation = pending_indentation; + self.fstrings.rewind(fstrings_checkpoint); + self.errors.truncate(errors_position); } pub fn finish(self) -> Vec { @@ -1339,17 +1385,102 @@ impl<'src> Lexer<'src> { } } +bitflags! { + #[derive(Clone, Copy, Debug)] + pub(crate) struct TokenFlags: u8 { + /// The token is a string with double quotes (`"`). + const DOUBLE_QUOTES = 1 << 0; + /// The token is a triple-quoted string i.e., it starts and ends with three consecutive + /// quote characters (`"""` or `'''`). + const TRIPLE_QUOTED_STRING = 1 << 1; + + /// The token is a unicode string i.e., prefixed with `u` or `U` + const UNICODE_STRING = 1 << 2; + /// The token is a byte string i.e., prefixed with `b` or `B` + const BYTE_STRING = 1 << 3; + /// The token is an f-string i.e., prefixed with `f` or `F` + const F_STRING = 1 << 4; + /// The token is a raw string and the prefix character is in lowercase. + const RAW_STRING_LOWERCASE = 1 << 5; + /// The token is a raw string and the prefix character is in uppercase. + const RAW_STRING_UPPERCASE = 1 << 6; + + /// The token is a raw string i.e., prefixed with `r` or `R` + const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits(); + } +} + +impl StringFlags for TokenFlags { + fn quote_style(self) -> Quote { + if self.contains(TokenFlags::DOUBLE_QUOTES) { + Quote::Double + } else { + Quote::Single + } + } + + fn is_triple_quoted(self) -> bool { + self.contains(TokenFlags::TRIPLE_QUOTED_STRING) + } + + fn prefix(self) -> AnyStringPrefix { + if self.contains(TokenFlags::F_STRING) { + if self.contains(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false }) + } else if self.contains(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Format(FStringPrefix::Regular) + } + } else if self.contains(TokenFlags::BYTE_STRING) { + if self.contains(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) + } else if self.contains(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) + } else { + AnyStringPrefix::Bytes(ByteStringPrefix::Regular) + } + } else if self.contains(TokenFlags::RAW_STRING_LOWERCASE) { + AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false }) + } else if self.contains(TokenFlags::RAW_STRING_UPPERCASE) { + AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true }) + } else if self.contains(TokenFlags::UNICODE_STRING) { + AnyStringPrefix::Regular(StringLiteralPrefix::Unicode) + } else { + AnyStringPrefix::Regular(StringLiteralPrefix::Empty) + } + } +} + +impl TokenFlags { + /// Returns `true` if the token is an f-string. + const fn is_f_string(self) -> bool { + self.contains(TokenFlags::F_STRING) + } + + /// Returns `true` if the token is a raw string. + const fn is_raw_string(self) -> bool { + self.contains(TokenFlags::RAW_STRING) + } + + pub(crate) fn as_any_string_flags(self) -> AnyStringFlags { + AnyStringFlags::new(self.prefix(), self.quote_style(), self.is_triple_quoted()) + } +} + #[derive(Clone, Copy, Debug)] pub struct Token { /// The kind of the token. kind: TokenKind, /// The range of the token. range: TextRange, + /// The set of flags describing this token. + flags: TokenFlags, } impl Token { - pub(crate) fn new(kind: TokenKind, range: TextRange) -> Self { - Self { kind, range } + pub(crate) fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token { + Self { kind, range, flags } } /// Returns the token kind. @@ -1533,21 +1664,12 @@ pub(crate) enum TokenValue { String { /// The string value. value: Box, - /// Flags that can be queried to determine the quote style - /// and prefixes of the string - flags: AnyStringFlags, }, - /// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix - /// and the opening quote(s). - FStringStart(AnyStringFlags), /// Token value that includes the portion of text inside the f-string that's not /// part of the expression part and isn't an opening or closing brace. FStringMiddle { /// The string value. value: Box, - /// Flags that can be queried to determine the quote style - /// and prefixes of the string - flags: AnyStringFlags, }, /// Token value for IPython escape commands. These are recognized by the lexer /// only when the mode is [`Mode::Ipython`]. @@ -1563,6 +1685,7 @@ pub(crate) struct LexerCheckpoint<'src> { value: TokenValue, current_kind: TokenKind, current_range: TextRange, + current_flags: TokenFlags, cursor: Cursor<'src>, state: State, nesting: u32, diff --git a/crates/ruff_python_parser/src/lexer/fstring.rs b/crates/ruff_python_parser/src/lexer/fstring.rs index 43fc932c1a0ea..7b702a77b7269 100644 --- a/crates/ruff_python_parser/src/lexer/fstring.rs +++ b/crates/ruff_python_parser/src/lexer/fstring.rs @@ -1,9 +1,11 @@ -use ruff_python_ast::{AnyStringFlags, StringFlags}; +use ruff_python_ast::StringFlags; + +use super::TokenFlags; /// The context representing the current f-string that the lexer is in. #[derive(Clone, Debug)] pub(crate) struct FStringContext { - flags: AnyStringFlags, + flags: TokenFlags, /// The level of nesting for the lexer when it entered the current f-string. /// The nesting level includes all kinds of parentheses i.e., round, square, @@ -17,8 +19,9 @@ pub(crate) struct FStringContext { } impl FStringContext { - pub(crate) const fn new(flags: AnyStringFlags, nesting: u32) -> Self { - debug_assert!(flags.is_f_string()); + pub(crate) const fn new(flags: TokenFlags, nesting: u32) -> Self { + assert!(flags.is_f_string()); + Self { flags, nesting, @@ -26,8 +29,7 @@ impl FStringContext { } } - pub(crate) const fn flags(&self) -> AnyStringFlags { - debug_assert!(self.flags.is_f_string()); + pub(crate) const fn flags(&self) -> TokenFlags { self.flags } diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index a1d656a4545da..b6ea1a59f6150 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -1255,7 +1255,9 @@ impl<'src> Parser<'src> { /// See: fn parse_string_or_byte_literal(&mut self) -> StringType { let range = self.current_token_range(); - let TokenValue::String { value, flags } = self.bump_value(TokenKind::String) else { + let flags = self.tokens.current_flags().as_any_string_flags(); + + let TokenValue::String { value } = self.bump_value(TokenKind::String) else { unreachable!() }; @@ -1301,18 +1303,17 @@ impl<'src> Parser<'src> { /// See: fn parse_fstring(&mut self) -> ast::FString { let start = self.node_start(); + let flags = self.tokens.current_flags().as_any_string_flags(); - let TokenValue::FStringStart(kind) = self.bump_value(TokenKind::FStringStart) else { - unreachable!() - }; - let elements = self.parse_fstring_elements(); + self.bump(TokenKind::FStringStart); + let elements = self.parse_fstring_elements(flags); self.expect(TokenKind::FStringEnd); ast::FString { elements, range: self.node_range(start), - flags: kind.into(), + flags: ast::FStringFlags::from(flags), } } @@ -1321,17 +1322,17 @@ impl<'src> Parser<'src> { /// # Panics /// /// If the parser isn't positioned at a `{` or `FStringMiddle` token. - fn parse_fstring_elements(&mut self) -> FStringElements { + fn parse_fstring_elements(&mut self, flags: ast::AnyStringFlags) -> FStringElements { let mut elements = vec![]; self.parse_list(RecoveryContextKind::FStringElements, |parser| { let element = match parser.current_token_kind() { TokenKind::Lbrace => { - FStringElement::Expression(parser.parse_fstring_expression_element()) + FStringElement::Expression(parser.parse_fstring_expression_element(flags)) } TokenKind::FStringMiddle => { let range = parser.current_token_range(); - let TokenValue::FStringMiddle { value, flags, .. } = + let TokenValue::FStringMiddle { value } = parser.bump_value(TokenKind::FStringMiddle) else { unreachable!() @@ -1382,7 +1383,10 @@ impl<'src> Parser<'src> { /// # Panics /// /// If the parser isn't positioned at a `{` token. - fn parse_fstring_expression_element(&mut self) -> ast::FStringExpressionElement { + fn parse_fstring_expression_element( + &mut self, + flags: ast::AnyStringFlags, + ) -> ast::FStringExpressionElement { let start = self.node_start(); self.bump(TokenKind::Lbrace); @@ -1458,7 +1462,7 @@ impl<'src> Parser<'src> { let format_spec = if self.eat(TokenKind::Colon) { let spec_start = self.node_start(); - let elements = self.parse_fstring_elements(); + let elements = self.parse_fstring_elements(flags); Some(Box::new(ast::FStringFormatSpec { range: self.node_range(spec_start), elements, diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index 7c9aec23e97b9..3cb52c878c803 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -1,7 +1,7 @@ use ruff_python_trivia::CommentRanges; use ruff_text_size::{TextRange, TextSize}; -use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenValue}; +use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue}; use crate::{Mode, TokenKind}; /// Token source for the parser that skips over any trivia tokens. @@ -50,6 +50,11 @@ impl<'src> TokenSource<'src> { self.lexer.current_range() } + /// Returns the flags for the current token. + pub(crate) fn current_flags(&self) -> TokenFlags { + self.lexer.current_flags() + } + /// Calls the underlying [`take_value`] method on the lexer. Refer to its documentation /// for more info. /// @@ -83,7 +88,8 @@ impl<'src> TokenSource<'src> { /// /// It pushes the given kind to the token vector with the current token range. pub(crate) fn bump(&mut self, kind: TokenKind) { - self.tokens.push(Token::new(kind, self.current_range())); + self.tokens + .push(Token::new(kind, self.current_range(), self.current_flags())); self.do_bump(); } @@ -96,7 +102,8 @@ impl<'src> TokenSource<'src> { if kind == TokenKind::Comment { self.comments.push(self.current_range()); } - self.tokens.push(Token::new(kind, self.current_range())); + self.tokens + .push(Token::new(kind, self.current_range(), self.current_flags())); continue; } break;