From 281c2fd5bf1604392bde16e32c7d57d2772406e6 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 31 Jul 2024 12:08:55 +1000 Subject: [PATCH 1/5] Inline and remove `parse_local_mk`. It has a single use. This makes the `let` handling case in `parse_stmt_without_recovery` more similar to the statement path and statement expression cases. --- compiler/rustc_parse/src/parser/stmt.rs | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index 7b0daaa14335f..28122a3ae294a 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -66,7 +66,12 @@ impl<'a> Parser<'a> { } Ok(Some(if self.token.is_keyword(kw::Let) { - self.parse_local_mk(lo, attrs, capture_semi, force_collect)? + self.collect_tokens_trailing_token(attrs, force_collect, |this, attrs| { + this.expect_keyword(kw::Let)?; + let local = this.parse_local(attrs)?; + let trailing = capture_semi && this.token.kind == token::Semi; + Ok((this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Let(local)), trailing)) + })? } else if self.is_kw_followed_by_ident(kw::Mut) && self.may_recover() { self.recover_stmt_local_after_let( lo, @@ -247,21 +252,6 @@ impl<'a> Parser<'a> { Ok(stmt) } - fn parse_local_mk( - &mut self, - lo: Span, - attrs: AttrWrapper, - capture_semi: bool, - force_collect: ForceCollect, - ) -> PResult<'a, Stmt> { - self.collect_tokens_trailing_token(attrs, force_collect, |this, attrs| { - this.expect_keyword(kw::Let)?; - let local = this.parse_local(attrs)?; - let trailing = capture_semi && this.token.kind == token::Semi; - Ok((this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Let(local)), trailing)) - }) - } - /// Parses a local variable declaration. fn parse_local(&mut self, attrs: AttrVec) -> PResult<'a, P> { let lo = self.prev_token.span; From fe647f053882f5745dfb339f22f13435bf8c4ca5 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 31 Jul 2024 12:56:25 +1000 Subject: [PATCH 2/5] Remove `LhsExpr`. `parse_expr_assoc_with` has an awkward structure -- sometimes the lhs is already parsed. This commit splits the post-lhs part into a new method `parse_expr_assoc_rest_with`, which makes everything shorter and simpler. --- compiler/rustc_parse/src/parser/expr.rs | 57 ++++++++++--------------- compiler/rustc_parse/src/parser/pat.rs | 7 +-- compiler/rustc_parse/src/parser/stmt.rs | 6 +-- 3 files changed, 28 insertions(+), 42 deletions(-) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index a242dc5cd582d..24e56c5bdee66 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -40,14 +40,6 @@ use super::{ }; use crate::{errors, maybe_recover_from_interpolated_ty_qpath}; -#[derive(Debug)] -pub(super) enum LhsExpr { - // Already parsed just the outer attributes. - Unparsed { attrs: AttrWrapper }, - // Already parsed the expression. - Parsed { expr: P, starts_statement: bool }, -} - #[derive(Debug)] enum DestructuredFloat { /// 1e2 @@ -113,30 +105,31 @@ impl<'a> Parser<'a> { r: Restrictions, attrs: AttrWrapper, ) -> PResult<'a, P> { - self.with_res(r, |this| this.parse_expr_assoc_with(0, LhsExpr::Unparsed { attrs })) + self.with_res(r, |this| this.parse_expr_assoc_with(0, attrs)) } /// Parses an associative expression with operators of at least `min_prec` precedence. pub(super) fn parse_expr_assoc_with( &mut self, min_prec: usize, - lhs: LhsExpr, + attrs: AttrWrapper, ) -> PResult<'a, P> { - let mut starts_stmt = false; - let mut lhs = match lhs { - LhsExpr::Parsed { expr, starts_statement } => { - starts_stmt = starts_statement; - expr - } - LhsExpr::Unparsed { attrs } => { - if self.token.is_range_separator() { - return self.parse_expr_prefix_range(attrs); - } else { - self.parse_expr_prefix(attrs)? - } - } + let lhs = if self.token.is_range_separator() { + return self.parse_expr_prefix_range(attrs); + } else { + self.parse_expr_prefix(attrs)? }; + self.parse_expr_assoc_rest_with(min_prec, false, lhs) + } + /// Parses the rest of an associative expression (i.e. the part after the lhs) with operators + /// of at least `min_prec` precedence. + pub(super) fn parse_expr_assoc_rest_with( + &mut self, + min_prec: usize, + starts_stmt: bool, + mut lhs: P, + ) -> PResult<'a, P> { if !self.should_continue_as_assoc_expr(&lhs) { return Ok(lhs); } @@ -272,7 +265,7 @@ impl<'a> Parser<'a> { }; let rhs = self.with_res(restrictions - Restrictions::STMT_EXPR, |this| { let attrs = this.parse_outer_attributes()?; - this.parse_expr_assoc_with(prec + prec_adjustment, LhsExpr::Unparsed { attrs }) + this.parse_expr_assoc_with(prec + prec_adjustment, attrs) })?; let span = self.mk_expr_sp(&lhs, lhs_span, rhs.span); @@ -447,7 +440,7 @@ impl<'a> Parser<'a> { let maybe_lt = self.token.clone(); let attrs = self.parse_outer_attributes()?; Some( - self.parse_expr_assoc_with(prec + 1, LhsExpr::Unparsed { attrs }) + self.parse_expr_assoc_with(prec + 1, attrs) .map_err(|err| self.maybe_err_dotdotlt_syntax(maybe_lt, err))?, ) } else { @@ -504,12 +497,9 @@ impl<'a> Parser<'a> { let (span, opt_end) = if this.is_at_start_of_range_notation_rhs() { // RHS must be parsed with more associativity than the dots. let attrs = this.parse_outer_attributes()?; - this.parse_expr_assoc_with( - op.unwrap().precedence() + 1, - LhsExpr::Unparsed { attrs }, - ) - .map(|x| (lo.to(x.span), Some(x))) - .map_err(|err| this.maybe_err_dotdotlt_syntax(maybe_lt, err))? + this.parse_expr_assoc_with(op.unwrap().precedence() + 1, attrs) + .map(|x| (lo.to(x.span), Some(x))) + .map_err(|err| this.maybe_err_dotdotlt_syntax(maybe_lt, err))? } else { (lo, None) }; @@ -2645,10 +2635,7 @@ impl<'a> Parser<'a> { self.expect(&token::Eq)?; } let attrs = self.parse_outer_attributes()?; - let expr = self.parse_expr_assoc_with( - 1 + prec_let_scrutinee_needs_par(), - LhsExpr::Unparsed { attrs }, - )?; + let expr = self.parse_expr_assoc_with(1 + prec_let_scrutinee_needs_par(), attrs)?; let span = lo.to(expr.span); Ok(self.mk_expr(span, ExprKind::Let(pat, expr, span, recovered))) } diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index b6f85cc90324c..5bfb8bdf776a0 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -25,7 +25,7 @@ use crate::errors::{ UnexpectedParenInRangePat, UnexpectedParenInRangePatSugg, UnexpectedVertVertBeforeFunctionParam, UnexpectedVertVertInPattern, WrapInParens, }; -use crate::parser::expr::{could_be_unclosed_char_literal, LhsExpr}; +use crate::parser::expr::could_be_unclosed_char_literal; use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole}; #[derive(PartialEq, Copy, Clone)] @@ -403,8 +403,9 @@ impl<'a> Parser<'a> { // Parse an associative expression such as `+ expr`, `% expr`, ... // Assignements, ranges and `|` are disabled by [`Restrictions::IS_PAT`]. - let lhs = LhsExpr::Parsed { expr, starts_statement: false }; - if let Ok(expr) = snapshot.parse_expr_assoc_with(0, lhs).map_err(|err| err.cancel()) { + if let Ok(expr) = + snapshot.parse_expr_assoc_rest_with(0, false, expr).map_err(|err| err.cancel()) + { // We got a valid expression. self.restore_snapshot(snapshot); self.restrictions.remove(Restrictions::IS_PAT); diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index 28122a3ae294a..e0676b72634e7 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -17,7 +17,6 @@ use thin_vec::{thin_vec, ThinVec}; use super::attr::InnerAttrForbiddenReason; use super::diagnostics::AttemptLocalParseRecovery; -use super::expr::LhsExpr; use super::pat::{PatternLocation, RecoverComma}; use super::path::PathStyle; use super::{ @@ -178,7 +177,7 @@ impl<'a> Parser<'a> { // Perform this outside of the `collect_tokens_trailing_token` closure, // since our outer attributes do not apply to this part of the expression let expr = self.with_res(Restrictions::STMT_EXPR, |this| { - this.parse_expr_assoc_with(0, LhsExpr::Parsed { expr, starts_statement: true }) + this.parse_expr_assoc_rest_with(0, true, expr) })?; Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr))) } else { @@ -211,8 +210,7 @@ impl<'a> Parser<'a> { let e = self.mk_expr(lo.to(hi), ExprKind::MacCall(mac)); let e = self.maybe_recover_from_bad_qpath(e)?; let e = self.parse_expr_dot_or_call_with(attrs, e, lo)?; - let e = self - .parse_expr_assoc_with(0, LhsExpr::Parsed { expr: e, starts_statement: false })?; + let e = self.parse_expr_assoc_rest_with(0, false, e)?; StmtKind::Expr(e) }; Ok(self.mk_stmt(lo.to(hi), kind)) From 2eb2ef1684e4df67389432fe0c1cc2776c790cd7 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 31 Jul 2024 19:16:09 +1000 Subject: [PATCH 3/5] Streamline attribute stitching on AST nodes. It can be done more concisely. --- .../rustc_parse/src/parser/attr_wrapper.rs | 7 +++---- compiler/rustc_parse/src/parser/expr.rs | 18 +++++++----------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 611dbc0535c61..ba20bd9daf708 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -53,10 +53,9 @@ impl AttrWrapper { /// Prepend `self.attrs` to `attrs`. // FIXME: require passing an NT to prevent misuse of this method - pub(crate) fn prepend_to_nt_inner(self, attrs: &mut AttrVec) { - let mut self_attrs = self.attrs; - mem::swap(attrs, &mut self_attrs); - attrs.extend(self_attrs); + pub(crate) fn prepend_to_nt_inner(mut self, attrs: &mut AttrVec) { + mem::swap(attrs, &mut self.attrs); + attrs.extend(self.attrs); } pub fn is_empty(&self) -> bool { diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 24e56c5bdee66..ae5c8af7c2d0d 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -877,7 +877,7 @@ impl<'a> Parser<'a> { mut e: P, lo: Span, ) -> PResult<'a, P> { - let res = ensure_sufficient_stack(|| { + let mut res = ensure_sufficient_stack(|| { loop { let has_question = if self.prev_token.kind == TokenKind::Ident(kw::Return, IdentIsRaw::No) { @@ -924,17 +924,13 @@ impl<'a> Parser<'a> { // Stitch the list of outer attributes onto the return value. A little // bit ugly, but the best way given the current code structure. - if attrs.is_empty() { - res - } else { - res.map(|expr| { - expr.map(|mut expr| { - attrs.extend(expr.attrs); - expr.attrs = attrs; - expr - }) - }) + if !attrs.is_empty() + && let Ok(expr) = &mut res + { + mem::swap(&mut expr.attrs, &mut attrs); + expr.attrs.extend(attrs) } + res } pub(super) fn parse_dot_suffix_expr( From 9d77d17f7127def53c6a3555ae9a6a1f271a8e37 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 1 Aug 2024 15:41:51 +1000 Subject: [PATCH 4/5] Move a comment to a better spot. --- compiler/rustc_parse/src/parser/stmt.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index e0676b72634e7..b3efb87a4a26a 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -116,13 +116,12 @@ impl<'a> Parser<'a> { } } } else if let Some(item) = self.parse_item_common( - attrs.clone(), + attrs.clone(), // FIXME: unwanted clone of attrs false, true, FnParseMode { req_name: |_| true, req_body: true }, force_collect, )? { - // FIXME: Bad copy of attrs self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item))) } else if self.eat(&token::Semi) { // Do not attempt to parse an expression if we're done here. From d1f05fd1848fc68ed89d17f7937e358dacd8aed4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 1 Aug 2024 06:44:39 +1000 Subject: [PATCH 5/5] Distinguish the two kinds of token range. When collecting tokens there are two kinds of range: - a range relative to the parser's full token stream (which we get when we are parsing); - a range relative to a single AST node's token stream (which we use within `LazyAttrTokenStreamImpl` when replacing tokens). These are currently both represented with `Range` and it's easy to mix them up -- until now I hadn't properly understood the difference. This commit introduces `ParserRange` and `NodeRange` to distinguish them. This also requires splitting `ReplaceRange` in two, giving the new types `ParserReplacement` and `NodeReplacement`. (These latter two names reduce the overloading of the word "range".) The commit also rewrites some comments to be clearer. The end result is a little more verbose, but much clearer. --- compiler/rustc_builtin_macros/src/cfg_eval.rs | 2 +- compiler/rustc_parse/src/parser/attr.rs | 6 +- .../rustc_parse/src/parser/attr_wrapper.rs | 124 ++++++++++-------- compiler/rustc_parse/src/parser/mod.rs | 70 +++++++--- 4 files changed, 124 insertions(+), 78 deletions(-) diff --git a/compiler/rustc_builtin_macros/src/cfg_eval.rs b/compiler/rustc_builtin_macros/src/cfg_eval.rs index dc1874bfecbda..4b05c144d37d7 100644 --- a/compiler/rustc_builtin_macros/src/cfg_eval.rs +++ b/compiler/rustc_builtin_macros/src/cfg_eval.rs @@ -202,7 +202,7 @@ impl CfgEval<'_> { } // Now that we have our re-parsed `AttrTokenStream`, recursively configuring - // our attribute target will correctly the tokens as well. + // our attribute target will correctly configure the tokens as well. flat_map_annotatable(self, annotatable) } } diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index 12b9414d1f760..a98382efc3eba 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -8,7 +8,7 @@ use rustc_span::{sym, BytePos, Span}; use thin_vec::ThinVec; use tracing::debug; -use super::{AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, PathStyle}; +use super::{AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle}; use crate::{errors, fluent_generated as fluent, maybe_whole}; // Public for rustfmt usage @@ -307,8 +307,8 @@ impl<'a> Parser<'a> { // inner attribute, for possible later processing in a `LazyAttrTokenStream`. if let Capturing::Yes = self.capture_state.capturing { let end_pos = self.num_bump_calls; - let range = start_pos..end_pos; - self.capture_state.inner_attr_ranges.insert(attr.id, range); + let parser_range = ParserRange(start_pos..end_pos); + self.capture_state.inner_attr_parser_ranges.insert(attr.id, parser_range); } attrs.push(attr); } else { diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index ba20bd9daf708..abf61036c2deb 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -10,7 +10,10 @@ use rustc_errors::PResult; use rustc_session::parse::ParseSess; use rustc_span::{sym, Span, DUMMY_SP}; -use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor}; +use super::{ + Capturing, FlatToken, ForceCollect, NodeRange, NodeReplacement, Parser, ParserRange, + TokenCursor, +}; /// A wrapper type to ensure that the parser handles outer attributes correctly. /// When we parse outer attributes, we need to ensure that we capture tokens @@ -28,8 +31,8 @@ use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCurso #[derive(Debug, Clone)] pub struct AttrWrapper { attrs: AttrVec, - // The start of the outer attributes in the token cursor. - // This allows us to create a `ReplaceRange` for the entire attribute + // The start of the outer attributes in the parser's token stream. + // This lets us create a `NodeReplacement` for the entire attribute // target, including outer attributes. start_pos: u32, } @@ -88,7 +91,7 @@ struct LazyAttrTokenStreamImpl { cursor_snapshot: TokenCursor, num_calls: u32, break_last_token: bool, - replace_ranges: Box<[ReplaceRange]>, + node_replacements: Box<[NodeReplacement]>, } impl ToAttrTokenStream for LazyAttrTokenStreamImpl { @@ -103,21 +106,24 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) .take(self.num_calls as usize); - if self.replace_ranges.is_empty() { + if self.node_replacements.is_empty() { make_attr_token_stream(tokens, self.break_last_token) } else { let mut tokens: Vec<_> = tokens.collect(); - let mut replace_ranges = self.replace_ranges.to_vec(); - replace_ranges.sort_by_key(|(range, _)| range.start); + let mut node_replacements = self.node_replacements.to_vec(); + node_replacements.sort_by_key(|(range, _)| range.0.start); #[cfg(debug_assertions)] - for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() { + for [(node_range, tokens), (next_node_range, next_tokens)] in + node_replacements.array_windows() + { assert!( - range.end <= next_range.start || range.end >= next_range.end, - "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", - range, + node_range.0.end <= next_node_range.0.start + || node_range.0.end >= next_node_range.0.end, + "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", + node_range, tokens, - next_range, + next_node_range, next_tokens, ); } @@ -135,20 +141,23 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { // start position, we ensure that any (outer) replace range which // encloses another (inner) replace range will fully overwrite the // inner range's replacement. - for (range, target) in replace_ranges.into_iter().rev() { - assert!(!range.is_empty(), "Cannot replace an empty range: {range:?}"); + for (node_range, target) in node_replacements.into_iter().rev() { + assert!( + !node_range.0.is_empty(), + "Cannot replace an empty node range: {:?}", + node_range.0 + ); // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus // enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the // total length of `tokens` constant throughout the replacement process, allowing - // us to use all of the `ReplaceRanges` entries without adjusting indices. + // us to do all replacements without adjusting indices. let target_len = target.is_some() as usize; tokens.splice( - (range.start as usize)..(range.end as usize), - target - .into_iter() - .map(|target| FlatToken::AttrsTarget(target)) - .chain(iter::repeat(FlatToken::Empty).take(range.len() - target_len)), + (node_range.0.start as usize)..(node_range.0.end as usize), + target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain( + iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len), + ), ); } make_attr_token_stream(tokens.into_iter(), self.break_last_token) @@ -215,7 +224,7 @@ impl<'a> Parser<'a> { let cursor_snapshot = self.token_cursor.clone(); let start_pos = self.num_bump_calls; let has_outer_attrs = !attrs.attrs.is_empty(); - let replace_ranges_start = self.capture_state.replace_ranges.len(); + let parser_replacements_start = self.capture_state.parser_replacements.len(); // We set and restore `Capturing::Yes` on either side of the call to // `f`, so we can distinguish the outermost call to @@ -270,7 +279,7 @@ impl<'a> Parser<'a> { return Ok(ret); } - let replace_ranges_end = self.capture_state.replace_ranges.len(); + let parser_replacements_end = self.capture_state.parser_replacements.len(); assert!( !(self.break_last_token && capture_trailing), @@ -287,15 +296,16 @@ impl<'a> Parser<'a> { let num_calls = end_pos - start_pos; - // Take the captured ranges for any inner attributes that we parsed in - // `Parser::parse_inner_attributes`, and pair them in a `ReplaceRange` - // with `None`, which means the relevant tokens will be removed. (More - // details below.) - let mut inner_attr_replace_ranges = Vec::new(); + // Take the captured `ParserRange`s for any inner attributes that we parsed in + // `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`, + // which means the relevant tokens will be removed. (More details below.) + let mut inner_attr_parser_replacements = Vec::new(); for attr in ret.attrs() { if attr.style == ast::AttrStyle::Inner { - if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&attr.id) { - inner_attr_replace_ranges.push((attr_range, None)); + if let Some(inner_attr_parser_range) = + self.capture_state.inner_attr_parser_ranges.remove(&attr.id) + { + inner_attr_parser_replacements.push((inner_attr_parser_range, None)); } else { self.dcx().span_delayed_bug(attr.span, "Missing token range for attribute"); } @@ -304,37 +314,41 @@ impl<'a> Parser<'a> { // This is hot enough for `deep-vector` that checking the conditions for an empty iterator // is measurably faster than actually executing the iterator. - let replace_ranges: Box<[ReplaceRange]> = - if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges.is_empty() { - Box::new([]) - } else { - // Grab any replace ranges that occur *inside* the current AST node. We will - // perform the actual replacement only when we convert the `LazyAttrTokenStream` to - // an `AttrTokenStream`. - self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end] - .iter() - .cloned() - .chain(inner_attr_replace_ranges.iter().cloned()) - .map(|(range, data)| ((range.start - start_pos)..(range.end - start_pos), data)) - .collect() - }; + let node_replacements: Box<[_]> = if parser_replacements_start == parser_replacements_end + && inner_attr_parser_replacements.is_empty() + { + Box::new([]) + } else { + // Grab any replace ranges that occur *inside* the current AST node. Convert them + // from `ParserRange` form to `NodeRange` form. We will perform the actual + // replacement only when we convert the `LazyAttrTokenStream` to an + // `AttrTokenStream`. + self.capture_state.parser_replacements + [parser_replacements_start..parser_replacements_end] + .iter() + .cloned() + .chain(inner_attr_parser_replacements.iter().cloned()) + .map(|(parser_range, data)| (NodeRange::new(parser_range, start_pos), data)) + .collect() + }; // What is the status here when parsing the example code at the top of this method? // // When parsing `g`: // - `start_pos..end_pos` is `12..33` (`fn g { ... }`, excluding the outer attr). - // - `inner_attr_replace_ranges` has one entry (`5..15`, when counting from `fn`), to + // - `inner_attr_parser_replacements` has one entry (`ParserRange(17..27)`), to // delete the inner attr's tokens. - // - This entry is put into the lazy tokens for `g`, i.e. deleting the inner attr from - // those tokens (if they get evaluated). + // - This entry is converted to `NodeRange(5..15)` (relative to the `fn`) and put into + // the lazy tokens for `g`, i.e. deleting the inner attr from those tokens (if they get + // evaluated). // - Those lazy tokens are also put into an `AttrsTarget` that is appended to `self`'s // replace ranges at the bottom of this function, for processing when parsing `m`. - // - `replace_ranges_start..replace_ranges_end` is empty. + // - `parser_replacements_start..parser_replacements_end` is empty. // // When parsing `m`: // - `start_pos..end_pos` is `0..34` (`mod m`, excluding the `#[cfg_eval]` attribute). - // - `inner_attr_replace_ranges` is empty. - // - `replace_range_start..replace_ranges_end` has one entry. + // - `inner_attr_parser_replacements` is empty. + // - `parser_replacements_start..parser_replacements_end` has one entry. // - One `AttrsTarget` (added below when parsing `g`) to replace all of `g` (`3..33`, // including its outer attribute), with: // - `attrs`: includes the outer and the inner attr. @@ -345,7 +359,7 @@ impl<'a> Parser<'a> { num_calls, cursor_snapshot, break_last_token: self.break_last_token, - replace_ranges, + node_replacements, }); // If we support tokens and don't already have them, store the newly captured tokens. @@ -366,7 +380,7 @@ impl<'a> Parser<'a> { // What is the status here when parsing the example code at the top of this method? // // When parsing `g`, we add one entry: - // - The `start_pos..end_pos` (`3..33`) entry has a new `AttrsTarget` with: + // - The pushed entry (`ParserRange(3..33)`) has a new `AttrsTarget` with: // - `attrs`: includes the outer and the inner attr. // - `tokens`: lazy tokens for `g` (with its inner attr deleted). // @@ -377,12 +391,14 @@ impl<'a> Parser<'a> { // cfg-expand this AST node. let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos }; let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens }; - self.capture_state.replace_ranges.push((start_pos..end_pos, Some(target))); + self.capture_state + .parser_replacements + .push((ParserRange(start_pos..end_pos), Some(target))); } else if matches!(self.capture_state.capturing, Capturing::No) { // Only clear the ranges once we've finished capturing entirely, i.e. we've finished // the outermost call to this method. - self.capture_state.replace_ranges.clear(); - self.capture_state.inner_attr_ranges.clear(); + self.capture_state.parser_replacements.clear(); + self.capture_state.inner_attr_parser_ranges.clear(); } Ok(ret) } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 26ee5bfdee42c..722fb41cd81ec 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -192,24 +192,54 @@ struct ClosureSpans { body: Span, } -/// Indicates a range of tokens that should be replaced by -/// the tokens in the provided `AttrsTarget`. This is used in two -/// places during token collection: +/// A token range within a `Parser`'s full token stream. +#[derive(Clone, Debug)] +struct ParserRange(Range); + +/// A token range within an individual AST node's (lazy) token stream, i.e. +/// relative to that node's first token. Distinct from `ParserRange` so the two +/// kinds of range can't be mixed up. +#[derive(Clone, Debug)] +struct NodeRange(Range); + +/// Indicates a range of tokens that should be replaced by an `AttrsTarget` +/// (replacement) or be replaced by nothing (deletion). This is used in two +/// places during token collection. +/// +/// 1. Replacement. During the parsing of an AST node that may have a +/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]` +/// or `#[cfg_attr]`, we replace the entire inner AST node with +/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an +/// `AttrTokenStream`. /// -/// 1. During the parsing of an AST node that may have a `#[derive]` -/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` -/// In this case, we use a `ReplaceRange` to replace the entire inner AST node -/// with `FlatToken::AttrsTarget`, allowing us to perform eager cfg-expansion -/// on an `AttrTokenStream`. +/// 2. Deletion. We delete inner attributes from all collected token streams, +/// and instead track them through the `attrs` field on the AST node. This +/// lets us manipulate them similarly to outer attributes. When we create a +/// `TokenStream`, the inner attributes are inserted into the proper place +/// in the token stream. /// -/// 2. When we parse an inner attribute while collecting tokens. We -/// remove inner attributes from the token stream entirely, and -/// instead track them through the `attrs` field on the AST node. -/// This allows us to easily manipulate them (for example, removing -/// the first macro inner attribute to invoke a proc-macro). -/// When create a `TokenStream`, the inner attributes get inserted -/// into the proper place in the token stream. -type ReplaceRange = (Range, Option); +/// Each replacement starts off in `ParserReplacement` form but is converted to +/// `NodeReplacement` form when it is attached to a single AST node, via +/// `LazyAttrTokenStreamImpl`. +type ParserReplacement = (ParserRange, Option); + +/// See the comment on `ParserReplacement`. +type NodeReplacement = (NodeRange, Option); + +impl NodeRange { + // Converts a range within a parser's tokens to a range within a + // node's tokens beginning at `start_pos`. + // + // For example, imagine a parser with 50 tokens in its token stream, a + // function that spans `ParserRange(20..40)` and an inner attribute within + // that function that spans `ParserRange(30..35)`. We would find the inner + // attribute's range within the function's tokens by subtracting 20, which + // is the position of the function's start token. This gives + // `NodeRange(10..15)`. + fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange { + NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos)) + } +} /// Controls how we capture tokens. Capturing can be expensive, /// so we try to avoid performing capturing in cases where @@ -226,8 +256,8 @@ enum Capturing { #[derive(Clone, Debug)] struct CaptureState { capturing: Capturing, - replace_ranges: Vec, - inner_attr_ranges: FxHashMap>, + parser_replacements: Vec, + inner_attr_parser_ranges: FxHashMap, } /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that @@ -417,8 +447,8 @@ impl<'a> Parser<'a> { subparser_name, capture_state: CaptureState { capturing: Capturing::No, - replace_ranges: Vec::new(), - inner_attr_ranges: Default::default(), + parser_replacements: Vec::new(), + inner_attr_parser_ranges: Default::default(), }, current_closure: None, recovery: Recovery::Allowed,