Auto merge of #81286 - Aaron1011:collect-tokens-attrs, r=petrochenkov

Require passing an `AttrWrapper` to `collect_tokens_trailing_token` This is a pure refactoring split out from #80689. It represents the most invasive part of that PR, requiring changes in every caller of `parse_outer_attributes` In order to eagerly expand `#[cfg]` attributes while preserving the original `TokenStream`, we need to know the range of tokens that corresponds to every attribute target. This is accomplished by making `parse_outer_attributes` return an opaque `AttrWrapper` struct. An `AttrWrapper` must be converted to a plain `AttrVec` by passing it to `collect_tokens_trailing_token`. This makes it difficult to accidentally construct an AST node with attributes without calling `collect_tokens_trailing_token`, since AST nodes store an `AttrVec`, not an `AttrWrapper`. As a result, we now call `collect_tokens_trailing_token` for attribute targets which only support inert attributes, such as generic arguments and struct fields. Currently, the constructed `LazyTokenStream` is simply discarded. Future PRs will record the token range corresponding to the attribute target, allowing those tokens to be removed from an enclosing `collect_tokens_trailing_token` call if necessary.
rust-lang · Feb 14, 2021 · 5fa22fe · 5fa22fe
2 parents b86674e + 3321d70
commit 5fa22fe
Show file tree

Hide file tree

Showing 10 changed files with 777 additions and 541 deletions.
diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs
@@ -2975,3 +2975,18 @@ macro_rules! derive_has_tokens {
 derive_has_tokens! {
     Item, Expr, Ty, AttrItem, Visibility, Path, Block, Pat
 }
+
+macro_rules! derive_has_attrs_no_tokens {
+    ($($ty:path),*) => { $(
+        impl HasTokens for $ty {
+            fn finalize_tokens(&mut self, _tokens: LazyTokenStream) {}
+        }
+    )* }
+}
+
+// These ast nodes only support inert attributes, so they don't
+// store tokens (since nothing can observe them)
+derive_has_attrs_no_tokens! {
+    StructField, Arm,
+    Field, FieldPat, Variant, Param, GenericParam
+}
diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs
@@ -1,4 +1,4 @@
-use super::{Parser, PathStyle};
+use super::{AttrWrapper, Parser, PathStyle};
 use rustc_ast as ast;
 use rustc_ast::attr;
 use rustc_ast::token::{self, Nonterminal};
@@ -26,7 +26,7 @@ pub(super) const DEFAULT_INNER_ATTR_FORBIDDEN: InnerAttrPolicy<'_> = InnerAttrPo
 
 impl<'a> Parser<'a> {
     /// Parses attributes that appear before an item.
-    pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> {
+    pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, AttrWrapper> {
         let mut attrs: Vec<ast::Attribute> = Vec::new();
         let mut just_parsed_doc_comment = false;
         loop {
@@ -74,7 +74,7 @@ impl<'a> Parser<'a> {
                 break;
             }
         }
-        Ok(attrs)
+        Ok(AttrWrapper::new(attrs))
     }
 
     /// Matches `attribute = # ! [ meta_item ]`.
@@ -89,7 +89,8 @@ impl<'a> Parser<'a> {
             inner_parse_policy, self.token
         );
         let lo = self.token.span;
-        self.collect_tokens(|this| {
+        // Attributse can't have attributes of their own
+        self.collect_tokens_no_attrs(|this| {
             if this.eat(&token::Pound) {
                 let style = if this.eat(&token::Not) {
                     ast::AttrStyle::Inner
@@ -163,7 +164,8 @@ impl<'a> Parser<'a> {
                 let args = this.parse_attr_args()?;
                 Ok(ast::AttrItem { path, args, tokens: None })
             };
-            if capture_tokens { self.collect_tokens(do_parse) } else { do_parse(self) }?
+            // Attr items don't have attributes
+            if capture_tokens { self.collect_tokens_no_attrs(do_parse) } else { do_parse(self) }?
         })
     }
 

diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -0,0 +1,185 @@
+use super::attr;
+use super::{ForceCollect, Parser, TokenCursor, TrailingToken};
+use rustc_ast::token::{self, Token, TokenKind};
+use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
+use rustc_ast::tokenstream::{DelimSpan, LazyTokenStream, Spacing};
+use rustc_ast::HasTokens;
+use rustc_ast::{self as ast};
+use rustc_errors::PResult;
+use rustc_span::{Span, DUMMY_SP};
+
+/// A wrapper type to ensure that the parser handles outer attributes correctly.
+/// When we parse outer attributes, we need to ensure that we capture tokens
+/// for the attribute target. This allows us to perform cfg-expansion on
+/// a token stream before we invoke a derive proc-macro.
+///
+/// This wrapper prevents direct access to the underlying `Vec<ast::Attribute>`.
+/// Parsing code can only get access to the underlying attributes
+/// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
+/// This makes it difficult to accidentally construct an AST node
+/// (which stores a `Vec<ast::Attribute>`) without first collecting tokens.
+///
+/// This struct has its own module, to ensure that the parser code
+/// cannot directly access the `attrs` field
+#[derive(Debug, Clone)]
+pub struct AttrWrapper {
+    attrs: Vec<ast::Attribute>,
+}
+
+impl AttrWrapper {
+    pub fn empty() -> AttrWrapper {
+        AttrWrapper { attrs: vec![] }
+    }
+    pub fn new(attrs: Vec<ast::Attribute>) -> AttrWrapper {
+        AttrWrapper { attrs }
+    }
+    // FIXME: Delay span bug here?
+    pub(crate) fn take_for_recovery(self) -> Vec<ast::Attribute> {
+        self.attrs
+    }
+    pub fn is_empty(&self) -> bool {
+        self.attrs.is_empty()
+    }
+}
+
+impl<'a> Parser<'a> {
+    /// Records all tokens consumed by the provided callback,
+    /// including the current token. These tokens are collected
+    /// into a `LazyTokenStream`, and returned along with the result
+    /// of the callback.
+    ///
+    /// Note: If your callback consumes an opening delimiter
+    /// (including the case where you call `collect_tokens`
+    /// when the current token is an opening delimeter),
+    /// you must also consume the corresponding closing delimiter.
+    ///
+    /// That is, you can consume
+    /// `something ([{ }])` or `([{}])`, but not `([{}]`
+    ///
+    /// This restriction shouldn't be an issue in practice,
+    /// since this function is used to record the tokens for
+    /// a parsed AST item, which always has matching delimiters.
+    pub fn collect_tokens_trailing_token<R: HasTokens>(
+        &mut self,
+        attrs: AttrWrapper,
+        force_collect: ForceCollect,
+        f: impl FnOnce(&mut Self, Vec<ast::Attribute>) -> PResult<'a, (R, TrailingToken)>,
+    ) -> PResult<'a, R> {
+        if matches!(force_collect, ForceCollect::No) && !attr::maybe_needs_tokens(&attrs.attrs) {
+            return Ok(f(self, attrs.attrs)?.0);
+        }
+        let start_token = (self.token.clone(), self.token_spacing);
+        let cursor_snapshot = self.token_cursor.clone();
+
+        let (mut ret, trailing_token) = f(self, attrs.attrs)?;
+
+        // Produces a `TokenStream` on-demand. Using `cursor_snapshot`
+        // and `num_calls`, we can reconstruct the `TokenStream` seen
+        // by the callback. This allows us to avoid producing a `TokenStream`
+        // if it is never needed - for example, a captured `macro_rules!`
+        // argument that is never passed to a proc macro.
+        // In practice token stream creation happens rarely compared to
+        // calls to `collect_tokens` (see some statistics in #78736),
+        // so we are doing as little up-front work as possible.
+        //
+        // This also makes `Parser` very cheap to clone, since
+        // there is no intermediate collection buffer to clone.
+        #[derive(Clone)]
+        struct LazyTokenStreamImpl {
+            start_token: (Token, Spacing),
+            cursor_snapshot: TokenCursor,
+            num_calls: usize,
+            desugar_doc_comments: bool,
+            append_unglued_token: Option<TreeAndSpacing>,
+        }
+        impl CreateTokenStream for LazyTokenStreamImpl {
+            fn create_token_stream(&self) -> TokenStream {
+                // The token produced by the final call to `next` or `next_desugared`
+                // was not actually consumed by the callback. The combination
+                // of chaining the initial token and using `take` produces the desired
+                // result - we produce an empty `TokenStream` if no calls were made,
+                // and omit the final token otherwise.
+                let mut cursor_snapshot = self.cursor_snapshot.clone();
+                let tokens = std::iter::once(self.start_token.clone())
+                    .chain((0..self.num_calls).map(|_| {
+                        if self.desugar_doc_comments {
+                            cursor_snapshot.next_desugared()
+                        } else {
+                            cursor_snapshot.next()
+                        }
+                    }))
+                    .take(self.num_calls);
+
+                make_token_stream(tokens, self.append_unglued_token.clone())
+            }
+        }
+
+        let mut num_calls = self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls;
+        match trailing_token {
+            TrailingToken::None => {}
+            TrailingToken::Semi => {
+                assert_eq!(self.token.kind, token::Semi);
+                num_calls += 1;
+            }
+            TrailingToken::MaybeComma => {
+                if self.token.kind == token::Comma {
+                    num_calls += 1;
+                }
+            }
+        }
+
+        let lazy_impl = LazyTokenStreamImpl {
+            start_token,
+            num_calls,
+            cursor_snapshot,
+            desugar_doc_comments: self.desugar_doc_comments,
+            append_unglued_token: self.token_cursor.append_unglued_token.clone(),
+        };
+        ret.finalize_tokens(LazyTokenStream::new(lazy_impl));
+        Ok(ret)
+    }
+}
+
+/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
+/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
+/// of open and close delims.
+fn make_token_stream(
+    tokens: impl Iterator<Item = (Token, Spacing)>,
+    append_unglued_token: Option<TreeAndSpacing>,
+) -> TokenStream {
+    #[derive(Debug)]
+    struct FrameData {
+        open: Span,
+        inner: Vec<(TokenTree, Spacing)>,
+    }
+    let mut stack = vec![FrameData { open: DUMMY_SP, inner: vec![] }];
+    for (token, spacing) in tokens {
+        match token {
+            Token { kind: TokenKind::OpenDelim(_), span } => {
+                stack.push(FrameData { open: span, inner: vec![] });
+            }
+            Token { kind: TokenKind::CloseDelim(delim), span } => {
+                let frame_data = stack.pop().expect("Token stack was empty!");
+                let dspan = DelimSpan::from_pair(frame_data.open, span);
+                let stream = TokenStream::new(frame_data.inner);
+                let delimited = TokenTree::Delimited(dspan, delim, stream);
+                stack
+                    .last_mut()
+                    .unwrap_or_else(|| panic!("Bottom token frame is missing for tokens!"))
+                    .inner
+                    .push((delimited, Spacing::Alone));
+            }
+            token => {
+                stack
+                    .last_mut()
+                    .expect("Bottom token frame is missing!")
+                    .inner
+                    .push((TokenTree::Token(token), spacing));
+            }
+        }
+    }
+    let mut final_buf = stack.pop().expect("Missing final buf!");
+    final_buf.inner.extend(append_unglued_token);
+    assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
+    TokenStream::new(final_buf.inner)
+}