Skip to content

Commit

Permalink
Auto merge of #81286 - Aaron1011:collect-tokens-attrs, r=petrochenkov
Browse files Browse the repository at this point in the history
Require passing an `AttrWrapper` to `collect_tokens_trailing_token`

This is a pure refactoring split out from #80689.
It represents the most invasive part of that PR, requiring changes in
every caller of `parse_outer_attributes`

In order to eagerly expand `#[cfg]` attributes while preserving the
original `TokenStream`, we need to know the range of tokens that
corresponds to every attribute target. This is accomplished by making
`parse_outer_attributes` return an opaque `AttrWrapper` struct. An
`AttrWrapper` must be converted to a plain `AttrVec` by passing it to
`collect_tokens_trailing_token`. This makes it difficult to accidentally
construct an AST node with attributes without calling `collect_tokens_trailing_token`,
since AST nodes store an `AttrVec`, not an `AttrWrapper`.

As a result, we now call `collect_tokens_trailing_token` for attribute
targets which only support inert attributes, such as generic arguments
and struct fields. Currently, the constructed `LazyTokenStream` is
simply discarded. Future PRs will record the token range corresponding
to the attribute target, allowing those tokens to be removed from an
enclosing `collect_tokens_trailing_token` call if necessary.
  • Loading branch information
bors committed Feb 14, 2021
2 parents b86674e + 3321d70 commit 5fa22fe
Show file tree
Hide file tree
Showing 10 changed files with 777 additions and 541 deletions.
15 changes: 15 additions & 0 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2975,3 +2975,18 @@ macro_rules! derive_has_tokens {
derive_has_tokens! {
Item, Expr, Ty, AttrItem, Visibility, Path, Block, Pat
}

macro_rules! derive_has_attrs_no_tokens {
($($ty:path),*) => { $(
impl HasTokens for $ty {
fn finalize_tokens(&mut self, _tokens: LazyTokenStream) {}
}
)* }
}

// These ast nodes only support inert attributes, so they don't
// store tokens (since nothing can observe them)
derive_has_attrs_no_tokens! {
StructField, Arm,
Field, FieldPat, Variant, Param, GenericParam
}
12 changes: 7 additions & 5 deletions compiler/rustc_parse/src/parser/attr.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{Parser, PathStyle};
use super::{AttrWrapper, Parser, PathStyle};
use rustc_ast as ast;
use rustc_ast::attr;
use rustc_ast::token::{self, Nonterminal};
Expand Down Expand Up @@ -26,7 +26,7 @@ pub(super) const DEFAULT_INNER_ATTR_FORBIDDEN: InnerAttrPolicy<'_> = InnerAttrPo

impl<'a> Parser<'a> {
/// Parses attributes that appear before an item.
pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> {
pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, AttrWrapper> {
let mut attrs: Vec<ast::Attribute> = Vec::new();
let mut just_parsed_doc_comment = false;
loop {
Expand Down Expand Up @@ -74,7 +74,7 @@ impl<'a> Parser<'a> {
break;
}
}
Ok(attrs)
Ok(AttrWrapper::new(attrs))
}

/// Matches `attribute = # ! [ meta_item ]`.
Expand All @@ -89,7 +89,8 @@ impl<'a> Parser<'a> {
inner_parse_policy, self.token
);
let lo = self.token.span;
self.collect_tokens(|this| {
// Attributse can't have attributes of their own
self.collect_tokens_no_attrs(|this| {
if this.eat(&token::Pound) {
let style = if this.eat(&token::Not) {
ast::AttrStyle::Inner
Expand Down Expand Up @@ -163,7 +164,8 @@ impl<'a> Parser<'a> {
let args = this.parse_attr_args()?;
Ok(ast::AttrItem { path, args, tokens: None })
};
if capture_tokens { self.collect_tokens(do_parse) } else { do_parse(self) }?
// Attr items don't have attributes
if capture_tokens { self.collect_tokens_no_attrs(do_parse) } else { do_parse(self) }?
})
}

Expand Down
185 changes: 185 additions & 0 deletions compiler/rustc_parse/src/parser/attr_wrapper.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
use super::attr;
use super::{ForceCollect, Parser, TokenCursor, TrailingToken};
use rustc_ast::token::{self, Token, TokenKind};
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
use rustc_ast::tokenstream::{DelimSpan, LazyTokenStream, Spacing};
use rustc_ast::HasTokens;
use rustc_ast::{self as ast};
use rustc_errors::PResult;
use rustc_span::{Span, DUMMY_SP};

/// A wrapper type to ensure that the parser handles outer attributes correctly.
/// When we parse outer attributes, we need to ensure that we capture tokens
/// for the attribute target. This allows us to perform cfg-expansion on
/// a token stream before we invoke a derive proc-macro.
///
/// This wrapper prevents direct access to the underlying `Vec<ast::Attribute>`.
/// Parsing code can only get access to the underlying attributes
/// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
/// This makes it difficult to accidentally construct an AST node
/// (which stores a `Vec<ast::Attribute>`) without first collecting tokens.
///
/// This struct has its own module, to ensure that the parser code
/// cannot directly access the `attrs` field
#[derive(Debug, Clone)]
pub struct AttrWrapper {
attrs: Vec<ast::Attribute>,
}

impl AttrWrapper {
pub fn empty() -> AttrWrapper {
AttrWrapper { attrs: vec![] }
}
pub fn new(attrs: Vec<ast::Attribute>) -> AttrWrapper {
AttrWrapper { attrs }
}
// FIXME: Delay span bug here?
pub(crate) fn take_for_recovery(self) -> Vec<ast::Attribute> {
self.attrs
}
pub fn is_empty(&self) -> bool {
self.attrs.is_empty()
}
}

impl<'a> Parser<'a> {
/// Records all tokens consumed by the provided callback,
/// including the current token. These tokens are collected
/// into a `LazyTokenStream`, and returned along with the result
/// of the callback.
///
/// Note: If your callback consumes an opening delimiter
/// (including the case where you call `collect_tokens`
/// when the current token is an opening delimeter),
/// you must also consume the corresponding closing delimiter.
///
/// That is, you can consume
/// `something ([{ }])` or `([{}])`, but not `([{}]`
///
/// This restriction shouldn't be an issue in practice,
/// since this function is used to record the tokens for
/// a parsed AST item, which always has matching delimiters.
pub fn collect_tokens_trailing_token<R: HasTokens>(
&mut self,
attrs: AttrWrapper,
force_collect: ForceCollect,
f: impl FnOnce(&mut Self, Vec<ast::Attribute>) -> PResult<'a, (R, TrailingToken)>,
) -> PResult<'a, R> {
if matches!(force_collect, ForceCollect::No) && !attr::maybe_needs_tokens(&attrs.attrs) {
return Ok(f(self, attrs.attrs)?.0);
}
let start_token = (self.token.clone(), self.token_spacing);
let cursor_snapshot = self.token_cursor.clone();

let (mut ret, trailing_token) = f(self, attrs.attrs)?;

// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
// and `num_calls`, we can reconstruct the `TokenStream` seen
// by the callback. This allows us to avoid producing a `TokenStream`
// if it is never needed - for example, a captured `macro_rules!`
// argument that is never passed to a proc macro.
// In practice token stream creation happens rarely compared to
// calls to `collect_tokens` (see some statistics in #78736),
// so we are doing as little up-front work as possible.
//
// This also makes `Parser` very cheap to clone, since
// there is no intermediate collection buffer to clone.
#[derive(Clone)]
struct LazyTokenStreamImpl {
start_token: (Token, Spacing),
cursor_snapshot: TokenCursor,
num_calls: usize,
desugar_doc_comments: bool,
append_unglued_token: Option<TreeAndSpacing>,
}
impl CreateTokenStream for LazyTokenStreamImpl {
fn create_token_stream(&self) -> TokenStream {
// The token produced by the final call to `next` or `next_desugared`
// was not actually consumed by the callback. The combination
// of chaining the initial token and using `take` produces the desired
// result - we produce an empty `TokenStream` if no calls were made,
// and omit the final token otherwise.
let mut cursor_snapshot = self.cursor_snapshot.clone();
let tokens = std::iter::once(self.start_token.clone())
.chain((0..self.num_calls).map(|_| {
if self.desugar_doc_comments {
cursor_snapshot.next_desugared()
} else {
cursor_snapshot.next()
}
}))
.take(self.num_calls);

make_token_stream(tokens, self.append_unglued_token.clone())
}
}

let mut num_calls = self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls;
match trailing_token {
TrailingToken::None => {}
TrailingToken::Semi => {
assert_eq!(self.token.kind, token::Semi);
num_calls += 1;
}
TrailingToken::MaybeComma => {
if self.token.kind == token::Comma {
num_calls += 1;
}
}
}

let lazy_impl = LazyTokenStreamImpl {
start_token,
num_calls,
cursor_snapshot,
desugar_doc_comments: self.desugar_doc_comments,
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
};
ret.finalize_tokens(LazyTokenStream::new(lazy_impl));
Ok(ret)
}
}

/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
/// of open and close delims.
fn make_token_stream(
tokens: impl Iterator<Item = (Token, Spacing)>,
append_unglued_token: Option<TreeAndSpacing>,
) -> TokenStream {
#[derive(Debug)]
struct FrameData {
open: Span,
inner: Vec<(TokenTree, Spacing)>,
}
let mut stack = vec![FrameData { open: DUMMY_SP, inner: vec![] }];
for (token, spacing) in tokens {
match token {
Token { kind: TokenKind::OpenDelim(_), span } => {
stack.push(FrameData { open: span, inner: vec![] });
}
Token { kind: TokenKind::CloseDelim(delim), span } => {
let frame_data = stack.pop().expect("Token stack was empty!");
let dspan = DelimSpan::from_pair(frame_data.open, span);
let stream = TokenStream::new(frame_data.inner);
let delimited = TokenTree::Delimited(dspan, delim, stream);
stack
.last_mut()
.unwrap_or_else(|| panic!("Bottom token frame is missing for tokens!"))
.inner
.push((delimited, Spacing::Alone));
}
token => {
stack
.last_mut()
.expect("Bottom token frame is missing!")
.inner
.push((TokenTree::Token(token), spacing));
}
}
}
let mut final_buf = stack.pop().expect("Missing final buf!");
final_buf.inner.extend(append_unglued_token);
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
TokenStream::new(final_buf.inner)
}
Loading

0 comments on commit 5fa22fe

Please sign in to comment.