diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index a8fe35f45b31e..a2e40d3398a9a 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -303,17 +303,13 @@ impl<'a> Parser<'a> { None }; if let Some(attr) = attr { - let end_pos = self.num_bump_calls; - // If we are currently capturing tokens, mark the location of this inner attribute. - // If capturing ends up creating a `LazyAttrTokenStream`, we will include - // this replace range with it, removing the inner attribute from the final - // `AttrTokenStream`. Inner attributes are stored in the parsed AST note. - // During macro expansion, they are selectively inserted back into the - // token stream (the first inner attribute is removed each time we invoke the - // corresponding macro). - let range = start_pos..end_pos; + // If we are currently capturing tokens (i.e. we are within a call to + // `Parser::collect_tokens_trailing_tokens`) record the token positions of this + // inner attribute, for possible later processing in a `LazyAttrTokenStream`. if let Capturing::Yes = self.capture_state.capturing { - self.capture_state.inner_attr_ranges.insert(attr.id, (range, None)); + let end_pos = self.num_bump_calls; + let range = start_pos..end_pos; + self.capture_state.inner_attr_ranges.insert(attr.id, range); } attrs.push(attr); } else { @@ -463,7 +459,8 @@ impl<'a> Parser<'a> { } } -/// The attributes are complete if all attributes are either a doc comment or a builtin attribute other than `cfg_attr` +/// The attributes are complete if all attributes are either a doc comment or a +/// builtin attribute other than `cfg_attr`. pub fn is_complete(attrs: &[ast::Attribute]) -> bool { attrs.iter().all(|attr| { attr.is_doc_comment() diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 7e56e92f87b31..dc5f98f7be8b6 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -17,12 +17,12 @@ use std::{iter, mem}; /// /// This wrapper prevents direct access to the underlying `ast::AttrVec`. /// Parsing code can only get access to the underlying attributes -/// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`. +/// by passing an `AttrWrapper` to `collect_tokens_trailing_token`. /// This makes it difficult to accidentally construct an AST node /// (which stores an `ast::AttrVec`) without first collecting tokens. /// /// This struct has its own module, to ensure that the parser code -/// cannot directly access the `attrs` field +/// cannot directly access the `attrs` field. #[derive(Debug, Clone)] pub struct AttrWrapper { attrs: AttrVec, @@ -76,14 +76,13 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool { }) } -// Produces a `TokenStream` on-demand. Using `cursor_snapshot` -// and `num_calls`, we can reconstruct the `TokenStream` seen -// by the callback. This allows us to avoid producing a `TokenStream` -// if it is never needed - for example, a captured `macro_rules!` -// argument that is never passed to a proc macro. -// In practice token stream creation happens rarely compared to -// calls to `collect_tokens` (see some statistics in #78736), -// so we are doing as little up-front work as possible. +// From a value of this type we can reconstruct the `TokenStream` seen by the +// `f` callback passed to a call to `Parser::collect_tokens_trailing_token`, by +// replaying the getting of the tokens. This saves us producing a `TokenStream` +// if it is never needed, e.g. a captured `macro_rules!` argument that is never +// passed to a proc macro. In practice, token stream creation happens rarely +// compared to calls to `collect_tokens` (see some statistics in #78736) so we +// are doing as little up-front work as possible. // // This also makes `Parser` very cheap to clone, since // there is no intermediate collection buffer to clone. @@ -163,46 +162,55 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { } impl<'a> Parser<'a> { - /// Records all tokens consumed by the provided callback, - /// including the current token. These tokens are collected - /// into a `LazyAttrTokenStream`, and returned along with the first part of - /// the callback's result. The second (bool) part of the callback's result - /// indicates if an extra token should be captured, e.g. a comma or + /// Parses code with `f`. If appropriate, it records the tokens (in + /// `LazyAttrTokenStream` form) that were parsed in the result, accessible + /// via the `HasTokens` trait. The second (bool) part of the callback's + /// result indicates if an extra token should be captured, e.g. a comma or /// semicolon. /// /// The `attrs` passed in are in `AttrWrapper` form, which is opaque. The /// `AttrVec` within is passed to `f`. See the comment on `AttrWrapper` for /// details. /// - /// Note: If your callback consumes an opening delimiter - /// (including the case where you call `collect_tokens` - /// when the current token is an opening delimiter), - /// you must also consume the corresponding closing delimiter. + /// Note: If your callback consumes an opening delimiter (including the + /// case where `self.token` is an opening delimiter on entry to this + /// function), you must also consume the corresponding closing delimiter. + /// E.g. you can consume `something ([{ }])` or `([{}])`, but not `([{}]`. + /// This restriction isn't a problem in practice, because parsed AST items + /// always have matching delimiters. /// - /// That is, you can consume - /// `something ([{ }])` or `([{}])`, but not `([{}]` - /// - /// This restriction shouldn't be an issue in practice, - /// since this function is used to record the tokens for - /// a parsed AST item, which always has matching delimiters. + /// The following example code will be used to explain things in comments + /// below. It has an outer attribute and an inner attribute. Parsing it + /// involves two calls to this method, one of which is indirectly + /// recursive. + /// ```ignore (fake attributes) + /// #[cfg_eval] // token pos + /// mod m { // 0.. 3 + /// #[cfg_attr(cond1, attr1)] // 3..12 + /// fn g() { // 12..17 + /// #![cfg_attr(cond2, attr2)] // 17..27 + /// let _x = 3; // 27..32 + /// } // 32..33 + /// } // 33..34 + /// ``` pub fn collect_tokens_trailing_token( &mut self, attrs: AttrWrapper, force_collect: ForceCollect, f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, bool)>, ) -> PResult<'a, R> { - // We only bail out when nothing could possibly observe the collected tokens: - // 1. We cannot be force collecting tokens (since force-collecting requires tokens - // by definition + // Skip collection when nothing could observe the collected tokens, i.e. + // all of the following conditions hold. + // - We are not force collecting tokens (because force collection + // requires tokens by definition). if matches!(force_collect, ForceCollect::No) - // None of our outer attributes can require tokens (e.g. a proc-macro) + // - None of our outer attributes require tokens. && attrs.is_complete() - // If our target supports custom inner attributes, then we cannot bail - // out early, since we may need to capture tokens for a custom inner attribute - // invocation. + // - Our target doesn't support custom inner attributes (custom + // inner attribute invocation might require token capturing). && !R::SUPPORTS_CUSTOM_INNER_ATTRS - // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]` - // or `#[cfg_attr]` attributes. + // - We are not in `capture_cfg` mode (which requires tokens if + // the parsed node has `#[cfg]` or `#[cfg_attr]` attributes). && !self.capture_cfg { return Ok(f(self, attrs.attrs)?.0); @@ -214,6 +222,12 @@ impl<'a> Parser<'a> { let has_outer_attrs = !attrs.attrs.is_empty(); let replace_ranges_start = self.capture_state.replace_ranges.len(); + // We set and restore `Capturing::Yes` on either side of the call to + // `f`, so we can distinguish the outermost call to + // `collect_tokens_trailing_token` (e.g. parsing `m` in the example + // above) from any inner (indirectly recursive) calls (e.g. parsing `g` + // in the example above). This distinction is used below and in + // `Parser::parse_inner_attributes`. let (mut ret, capture_trailing) = { let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes); let ret_and_trailing = f(self, attrs.attrs); @@ -221,51 +235,46 @@ impl<'a> Parser<'a> { ret_and_trailing? }; - // When we're not in `capture-cfg` mode, then bail out early if: - // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`) - // so there's nothing for us to do. - // 2. Our target already has tokens set (e.g. we've parsed something - // like `#[my_attr] $item`). The actual parsing code takes care of - // prepending any attributes to the nonterminal, so we don't need to - // modify the already captured tokens. - // Note that this check is independent of `force_collect`- if we already - // have tokens, or can't even store them, then there's never a need to - // force collection of new tokens. + // When we're not in `capture_cfg` mode, then skip collecting and + // return early if either of the following conditions hold. + // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`). + // - `Some(Some(_))`: Our target already has tokens set (e.g. we've + // parsed something like `#[my_attr] $item`). The actual parsing code + // takes care of prepending any attributes to the nonterminal, so we + // don't need to modify the already captured tokens. + // + // Note that this check is independent of `force_collect`. There's no + // need to collect tokens when we don't support tokens or already have + // tokens. if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) { return Ok(ret); } - // This is very similar to the bail out check at the start of this function. - // Now that we've parsed an AST node, we have more information available. + // This is similar to the "skip collection" check at the start of this + // function, but now that we've parsed an AST node we have more + // information available. (If we return early here that means the + // setup, such as cloning the token cursor, was unnecessary. That's + // hard to avoid.) + // + // Skip collection when nothing could observe the collected tokens, i.e. + // all of the following conditions hold. + // - We are not force collecting tokens. if matches!(force_collect, ForceCollect::No) - // We now have inner attributes available, so this check is more precise - // than `attrs.is_complete()` at the start of the function. - // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS` + // - None of our outer *or* inner attributes require tokens. + // (`attrs` was just outer attributes, but `ret.attrs()` is outer + // and inner attributes. That makes this check more precise than + // `attrs.is_complete()` at the start of the function, and we can + // skip the subsequent check on `R::SUPPORTS_CUSTOM_INNER_ATTRS`. && crate::parser::attr::is_complete(ret.attrs()) - // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`. - // This ensures that we consider inner attributes (e.g. `#![cfg]`), - // which require us to have tokens available - // We also call `has_cfg_or_cfg_attr` at the beginning of this function, - // but we only bail out if there's no possibility of inner attributes - // (!R::SUPPORTS_CUSTOM_INNER_ATTRS) - // We only capture about `#[cfg]` or `#[cfg_attr]` in `capture_cfg` - // mode - during normal parsing, we don't need any special capturing - // for those attributes, since they're builtin. - && !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs())) + // - We are not in `capture_cfg` mode, or we are but there are no + // `#[cfg]` or `#[cfg_attr]` attributes. (During normal + // non-`capture_cfg` parsing, we don't need any special capturing + // for those attributes, because they're builtin.) + && (!self.capture_cfg || !has_cfg_or_cfg_attr(ret.attrs())) { return Ok(ret); } - let mut inner_attr_replace_ranges = Vec::new(); - // Take the captured ranges for any inner attributes that we parsed. - for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) { - if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) { - inner_attr_replace_ranges.push(attr_range); - } else { - self.dcx().span_delayed_bug(inner_attr.span, "Missing token range for attribute"); - } - } - let replace_ranges_end = self.capture_state.replace_ranges.len(); assert!( @@ -283,15 +292,28 @@ impl<'a> Parser<'a> { let num_calls = end_pos - start_pos; + // Take the captured ranges for any inner attributes that we parsed in + // `Parser::parse_inner_attributes`, and pair them in a `ReplaceRange` + // with `None`, which means the relevant tokens will be removed. (More + // details below.) + let mut inner_attr_replace_ranges = Vec::new(); + for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) { + if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) { + inner_attr_replace_ranges.push((attr_range, None)); + } else { + self.dcx().span_delayed_bug(inner_attr.span, "Missing token range for attribute"); + } + } + // This is hot enough for `deep-vector` that checking the conditions for an empty iterator // is measurably faster than actually executing the iterator. let replace_ranges: Box<[ReplaceRange]> = if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges.is_empty() { Box::new([]) } else { - // Grab any replace ranges that occur *inside* the current AST node. - // We will perform the actual replacement when we convert the `LazyAttrTokenStream` - // to an `AttrTokenStream`. + // Grab any replace ranges that occur *inside* the current AST node. We will + // perform the actual replacement only when we convert the `LazyAttrTokenStream` to + // an `AttrTokenStream`. self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end] .iter() .cloned() @@ -300,6 +322,28 @@ impl<'a> Parser<'a> { .collect() }; + // What is the status here when parsing the example code at the top of this method? + // + // When parsing `g`: + // - `start_pos..end_pos` is `12..33` (`fn g { ... }`, excluding the outer attr). + // - `inner_attr_replace_ranges` has one entry (`5..15`, when counting from `fn`), to + // delete the inner attr's tokens. + // - This entry is put into the lazy tokens for `g`, i.e. deleting the inner attr from + // those tokens (if they get evaluated). + // - Those lazy tokens are also put into an `AttrsTarget` that is appended to `self`'s + // replace ranges at the bottom of this function, for processing when parsing `m`. + // - `replace_ranges_start..replace_ranges_end` is empty. + // + // When parsing `m`: + // - `start_pos..end_pos` is `0..34` (`mod m`, excluding the `#[cfg_eval]` attribute). + // - `inner_attr_replace_ranges` is empty. + // - `replace_range_start..replace_ranges_end` has two entries. + // - One to delete the inner attribute (`17..27`), obtained when parsing `g` (see above). + // - One `AttrsTarget` (added below when parsing `g`) to replace all of `g` (`3..33`, + // including its outer attribute), with: + // - `attrs`: includes the outer and the inner attr. + // - `tokens`: lazy tokens for `g` (with its inner attr deleted). + let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl { start_token, num_calls, @@ -313,27 +357,37 @@ impl<'a> Parser<'a> { *target_tokens = Some(tokens.clone()); } - let final_attrs = ret.attrs(); - // If `capture_cfg` is set and we're inside a recursive call to // `collect_tokens_trailing_token`, then we need to register a replace range // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion // on the captured token stream. if self.capture_cfg && matches!(self.capture_state.capturing, Capturing::Yes) - && has_cfg_or_cfg_attr(final_attrs) + && has_cfg_or_cfg_attr(ret.attrs()) { assert!(!self.break_last_token, "Should not have unglued last token with cfg attr"); - // Replace the entire AST node that we just parsed, including attributes, with - // `target`. If this AST node is inside an item that has `#[derive]`, then this will - // allow us to cfg-expand this AST node. + // What is the status here when parsing the example code at the top of this method? + // + // When parsing `g`, we add two entries: + // - The `start_pos..end_pos` (`3..33`) entry has a new `AttrsTarget` with: + // - `attrs`: includes the outer and the inner attr. + // - `tokens`: lazy tokens for `g` (with its inner attr deleted). + // - `inner_attr_replace_ranges` contains the one entry to delete the inner attr's + // tokens (`17..27`). + // + // When parsing `m`, we do nothing here. + + // Set things up so that the entire AST node that we just parsed, including attributes, + // will be replaced with `target` in the lazy token stream. This will allow us to + // cfg-expand this AST node. let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos }; - let target = AttrsTarget { attrs: final_attrs.iter().cloned().collect(), tokens }; + let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens }; self.capture_state.replace_ranges.push((start_pos..end_pos, Some(target))); self.capture_state.replace_ranges.extend(inner_attr_replace_ranges); } else if matches!(self.capture_state.capturing, Capturing::No) { - // Only clear the ranges once we've finished capturing entirely. + // Only clear the ranges once we've finished capturing entirely, i.e. we've finished + // the outermost call to this method. self.capture_state.replace_ranges.clear(); self.capture_state.inner_attr_ranges.clear(); } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index c03527acb2cfe..06545e85dd161 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -221,11 +221,12 @@ enum Capturing { Yes, } +// This state is used by `Parser::collect_tokens_trailing_token`. #[derive(Clone, Debug)] struct CaptureState { capturing: Capturing, replace_ranges: Vec, - inner_attr_ranges: FxHashMap, + inner_attr_ranges: FxHashMap>, } /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that @@ -425,6 +426,11 @@ impl<'a> Parser<'a> { // Make parser point to the first token. parser.bump(); + // Change this from 1 back to 0 after the bump. This eases debugging of + // `Parser::collect_tokens_trailing_token` nicer because it makes the + // token positions 0-indexed which is nicer than 1-indexed. + parser.num_bump_calls = 0; + parser } diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index cf791d332a2fc..491aa71155af2 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -1522,7 +1522,7 @@ fn debug_lookahead() { }, }, tokens: [], - approx_token_stream_pos: 1, + approx_token_stream_pos: 0, .. }" ); @@ -1566,7 +1566,7 @@ fn debug_lookahead() { Parenthesis, ), ], - approx_token_stream_pos: 1, + approx_token_stream_pos: 0, .. }" ); @@ -1631,7 +1631,7 @@ fn debug_lookahead() { Semi, Eof, ], - approx_token_stream_pos: 1, + approx_token_stream_pos: 0, .. }" ); @@ -1663,7 +1663,7 @@ fn debug_lookahead() { No, ), ], - approx_token_stream_pos: 9, + approx_token_stream_pos: 8, .. }" ); @@ -1701,7 +1701,7 @@ fn debug_lookahead() { No, ), ], - approx_token_stream_pos: 9, + approx_token_stream_pos: 8, .. }" ); @@ -1728,7 +1728,7 @@ fn debug_lookahead() { tokens: [ Eof, ], - approx_token_stream_pos: 15, + approx_token_stream_pos: 14, .. }" );