Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite Parser::collect_tokens #72393

Merged
merged 1 commit into from
May 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 113 additions & 73 deletions src/librustc_parse/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ impl<'a> Drop for Parser<'a> {
struct TokenCursor {
frame: TokenCursorFrame,
stack: Vec<TokenCursorFrame>,
cur_token: Option<TreeAndJoint>,
collecting: Option<Collecting>,
}

#[derive(Clone)]
Expand All @@ -127,30 +129,24 @@ struct TokenCursorFrame {
open_delim: bool,
tree_cursor: tokenstream::Cursor,
close_delim: bool,
last_token: LastToken,
}

/// This is used in `TokenCursorFrame` above to track tokens that are consumed
/// by the parser, and then that's transitively used to record the tokens that
/// each parse AST item is created with.
///
/// Right now this has two states, either collecting tokens or not collecting
/// tokens. If we're collecting tokens we just save everything off into a local
/// `Vec`. This should eventually though likely save tokens from the original
/// token stream and just use slicing of token streams to avoid creation of a
/// whole new vector.
///
/// The second state is where we're passively not recording tokens, but the last
/// token is still tracked for when we want to start recording tokens. This
/// "last token" means that when we start recording tokens we'll want to ensure
/// that this, the first token, is included in the output.
///
/// You can find some more example usage of this in the `collect_tokens` method
/// on the parser.
#[derive(Clone)]
enum LastToken {
Collecting(Vec<TreeAndJoint>),
Was(Option<TreeAndJoint>),
/// Used to track additional state needed by `collect_tokens`
#[derive(Clone, Debug)]
struct Collecting {
/// Holds the current tokens captured during the most
/// recent call to `collect_tokens`
buf: Vec<TreeAndJoint>,
/// The depth of the `TokenCursor` stack at the time
/// collection was started. When we encounter a `TokenTree::Delimited`,
/// we want to record the `TokenTree::Delimited` itself,
/// but *not* any of the inner tokens while we are inside
/// the new frame (this would cause us to record duplicate tokens).
///
/// This `depth` fields tracks stack depth we are recording tokens.
/// Only tokens encountered at this depth will be recorded. See
/// `TokenCursor::next` for more details.
depth: usize,
}

impl TokenCursorFrame {
Expand All @@ -161,7 +157,6 @@ impl TokenCursorFrame {
open_delim: delim == token::NoDelim,
tree_cursor: tts.clone().into_trees(),
close_delim: delim == token::NoDelim,
last_token: LastToken::Was(None),
}
}
}
Expand All @@ -171,25 +166,38 @@ impl TokenCursor {
loop {
let tree = if !self.frame.open_delim {
self.frame.open_delim = true;
TokenTree::open_tt(self.frame.span, self.frame.delim)
} else if let Some(tree) = self.frame.tree_cursor.next() {
TokenTree::open_tt(self.frame.span, self.frame.delim).into()
} else if let Some(tree) = self.frame.tree_cursor.next_with_joint() {
tree
} else if !self.frame.close_delim {
self.frame.close_delim = true;
TokenTree::close_tt(self.frame.span, self.frame.delim)
TokenTree::close_tt(self.frame.span, self.frame.delim).into()
} else if let Some(frame) = self.stack.pop() {
self.frame = frame;
continue;
} else {
return Token::new(token::Eof, DUMMY_SP);
};

match self.frame.last_token {
LastToken::Collecting(ref mut v) => v.push(tree.clone().into()),
LastToken::Was(ref mut t) => *t = Some(tree.clone().into()),
// Don't set an open delimiter as our current token - we want
// to leave it as the full `TokenTree::Delimited` from the previous
// iteration of this loop
if !matches!(tree.0, TokenTree::Token(Token { kind: TokenKind::OpenDelim(_), .. })) {
self.cur_token = Some(tree.clone());
}

if let Some(collecting) = &mut self.collecting {
if collecting.depth == self.stack.len() {
debug!(
"TokenCursor::next(): collected {:?} at depth {:?}",
tree,
self.stack.len()
);
collecting.buf.push(tree.clone().into())
Aaron1011 marked this conversation as resolved.
Show resolved Hide resolved
}
}

match tree {
match tree.0 {
TokenTree::Token(token) => return token,
TokenTree::Delimited(sp, delim, tts) => {
let frame = TokenCursorFrame::new(sp, delim, &tts);
Expand Down Expand Up @@ -350,6 +358,8 @@ impl<'a> Parser<'a> {
token_cursor: TokenCursor {
frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, &tokens),
stack: Vec::new(),
cur_token: None,
collecting: None,
},
desugar_doc_comments,
unmatched_angle_bracket_count: 0,
Expand Down Expand Up @@ -1105,65 +1115,95 @@ impl<'a> Parser<'a> {
}
}

/// Records all tokens consumed by the provided callback,
/// including the current token. These tokens are collected
/// into a `TokenStream`, and returned along with the result
/// of the callback.
///
/// Note: If your callback consumes an opening delimiter
/// (including the case where you call `collect_tokens`
/// when the current token is an opening delimeter),
/// you must also consume the corresponding closing delimiter.
///
/// That is, you can consume
/// `something ([{ }])` or `([{}])`, but not `([{}]`
///
/// This restriction shouldn't be an issue in practice,
/// since this function is used to record the tokens for
/// a parsed AST item, which always has matching delimiters.
fn collect_tokens<R>(
&mut self,
f: impl FnOnce(&mut Self) -> PResult<'a, R>,
) -> PResult<'a, (R, TokenStream)> {
// Record all tokens we parse when parsing this item.
let mut tokens = Vec::new();
let prev_collecting = match self.token_cursor.frame.last_token {
LastToken::Collecting(ref mut list) => Some(mem::take(list)),
LastToken::Was(ref mut last) => {
tokens.extend(last.take());
None
}
};
self.token_cursor.frame.last_token = LastToken::Collecting(tokens);
let prev = self.token_cursor.stack.len();
let tokens: Vec<TreeAndJoint> = self.token_cursor.cur_token.clone().into_iter().collect();
debug!("collect_tokens: starting with {:?}", tokens);

// We need special handling for the case where `collect_tokens` is called
// on an opening delimeter (e.g. '('). At this point, we have already pushed
// a new frame - however, we want to record the original `TokenTree::Delimited`,
// for consistency with the case where we start recording one token earlier.
// See `TokenCursor::next` to see how `cur_token` is set up.
let prev_depth =
if matches!(self.token_cursor.cur_token, Some((TokenTree::Delimited(..), _))) {
if self.token_cursor.stack.is_empty() {
// There is nothing below us in the stack that
// the function could consume, so the only thing it can legally
// capture is the entire contents of the current frame.
return Ok((f(self)?, TokenStream::new(tokens)));
}
// We have already recorded the full `TokenTree::Delimited` when we created
// our `tokens` vector at the start of this function. We are now inside
// a new frame corresponding to the `TokenTree::Delimited` we already recoreded.
// We don't want to record any of the tokens inside this frame, since they
// will be duplicates of the tokens nested inside the `TokenTree::Delimited`.
// Therefore, we set our recording depth to the *previous* frame. This allows
// us to record a sequence like: `(foo).bar()`: the `(foo)` will be recored
// as our initial `cur_token`, while the `.bar()` will be recored after we
// pop the `(foo)` frame.
self.token_cursor.stack.len() - 1
} else {
self.token_cursor.stack.len()
};
let prev_collecting =
self.token_cursor.collecting.replace(Collecting { buf: tokens, depth: prev_depth });

let ret = f(self);
let last_token = if self.token_cursor.stack.len() == prev {
&mut self.token_cursor.frame.last_token
} else if self.token_cursor.stack.get(prev).is_none() {
// This can happen due to a bad interaction of two unrelated recovery mechanisms with
// mismatched delimiters *and* recovery lookahead on the likely typo `pub ident(`
// (#62881).
return Ok((ret?, TokenStream::default()));

let mut collected_tokens = if let Some(collecting) = self.token_cursor.collecting.take() {
collecting.buf
} else {
&mut self.token_cursor.stack[prev].last_token
let msg = format!("our vector went away?");
debug!("collect_tokens: {}", msg);
self.sess.span_diagnostic.delay_span_bug(self.token.span, &msg);
// This can happen due to a bad interaction of two unrelated recovery mechanisms
// with mismatched delimiters *and* recovery lookahead on the likely typo
// `pub ident(` (#62895, different but similar to the case above).
return Ok((ret?, TokenStream::default()));
};

// Pull out the tokens that we've collected from the call to `f` above.
let mut collected_tokens = match *last_token {
LastToken::Collecting(ref mut v) => mem::take(v),
LastToken::Was(ref was) => {
let msg = format!("our vector went away? - found Was({:?})", was);
debug!("collect_tokens: {}", msg);
self.sess.span_diagnostic.delay_span_bug(self.token.span, &msg);
// This can happen due to a bad interaction of two unrelated recovery mechanisms
// with mismatched delimiters *and* recovery lookahead on the likely typo
// `pub ident(` (#62895, different but similar to the case above).
return Ok((ret?, TokenStream::default()));
}
};
debug!("collect_tokens: got raw tokens {:?}", collected_tokens);

// If we're not at EOF our current token wasn't actually consumed by
// `f`, but it'll still be in our list that we pulled out. In that case
// put it back.
let extra_token = if self.token != token::Eof { collected_tokens.pop() } else { None };

// If we were previously collecting tokens, then this was a recursive
// call. In that case we need to record all the tokens we collected in
// our parent list as well. To do that we push a clone of our stream
// onto the previous list.
match prev_collecting {
Some(mut list) => {
list.extend(collected_tokens.iter().cloned());
list.extend(extra_token);
*last_token = LastToken::Collecting(list);
}
None => {
*last_token = LastToken::Was(extra_token);
if let Some(mut collecting) = prev_collecting {
// If we were previously collecting at the same depth,
// then the previous call to `collect_tokens` needs to see
// the tokens we just recorded.
//
// If we were previously recording at an lower `depth`,
// then the previous `collect_tokens` call already recorded
// this entire frame in the form of a `TokenTree::Delimited`,
// so there is nothing else for us to do.
if collecting.depth == prev_depth {
collecting.buf.extend(collected_tokens.iter().cloned());
collecting.buf.extend(extra_token);
debug!("collect_tokens: updating previous buf to {:?}", collecting);
}
self.token_cursor.collecting = Some(collecting)
}

Ok((ret?, TokenStream::new(collected_tokens)))
Expand Down
2 changes: 1 addition & 1 deletion src/test/ui/ast-json/ast-json-noexpand-output.stdout
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"module":{"inner":{"lo":0,"hi":0},"items":[{"attrs":[],"id":0,"span":{"lo":0,"hi":0},"vis":{"node":"Inherited","span":{"lo":0,"hi":0}},"ident":{"name":"core","span":{"lo":0,"hi":0}},"kind":{"variant":"ExternCrate","fields":[null]},"tokens":{"_field0":[[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["extern",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["crate",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["core",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":"Semi","span":{"lo":0,"hi":0}}]},"NonJoint"]]}}],"inline":true},"attrs":[{"kind":{"variant":"Normal","fields":[{"path":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"crate_type","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"args":{"variant":"Eq","fields":[{"lo":0,"hi":0},{"_field0":[[{"variant":"Token","fields":[{"kind":{"variant":"Literal","fields":[{"kind":"Str","symbol":"lib","suffix":null}]},"span":{"lo":0,"hi":0}}]},"NonJoint"]]}]}}]},"id":null,"style":"Inner","span":{"lo":0,"hi":0}}],"span":{"lo":0,"hi":0},"proc_macros":[]}
{"module":{"inner":{"lo":0,"hi":0},"items":[{"attrs":[],"id":0,"span":{"lo":0,"hi":0},"vis":{"node":"Inherited","span":{"lo":0,"hi":0}},"ident":{"name":"core","span":{"lo":0,"hi":0}},"kind":{"variant":"ExternCrate","fields":[null]},"tokens":{"_field0":[[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["extern",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["crate",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["core",false]},"span":{"lo":0,"hi":0}}]},"Joint"],[{"variant":"Token","fields":[{"kind":"Semi","span":{"lo":0,"hi":0}}]},"NonJoint"]]}}],"inline":true},"attrs":[{"kind":{"variant":"Normal","fields":[{"path":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"crate_type","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"args":{"variant":"Eq","fields":[{"lo":0,"hi":0},{"_field0":[[{"variant":"Token","fields":[{"kind":{"variant":"Literal","fields":[{"kind":"Str","symbol":"lib","suffix":null}]},"span":{"lo":0,"hi":0}}]},"NonJoint"]]}]}}]},"id":null,"style":"Inner","span":{"lo":0,"hi":0}}],"span":{"lo":0,"hi":0},"proc_macros":[]}
2 changes: 1 addition & 1 deletion src/test/ui/ast-json/ast-json-output.stdout
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"module":{"inner":{"lo":0,"hi":0},"items":[{"attrs":[{"kind":{"variant":"Normal","fields":[{"path":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"prelude_import","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"args":"Empty"}]},"id":null,"style":"Outer","span":{"lo":0,"hi":0}}],"id":0,"span":{"lo":0,"hi":0},"vis":{"node":"Inherited","span":{"lo":0,"hi":0}},"ident":{"name":"","span":{"lo":0,"hi":0}},"kind":{"variant":"Use","fields":[{"prefix":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"{{root}}","span":{"lo":0,"hi":0}},"id":0,"args":null},{"ident":{"name":"std","span":{"lo":0,"hi":0}},"id":0,"args":null},{"ident":{"name":"prelude","span":{"lo":0,"hi":0}},"id":0,"args":null},{"ident":{"name":"v1","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"kind":"Glob","span":{"lo":0,"hi":0}}]},"tokens":null},{"attrs":[{"kind":{"variant":"Normal","fields":[{"path":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"macro_use","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"args":"Empty"}]},"id":null,"style":"Outer","span":{"lo":0,"hi":0}}],"id":0,"span":{"lo":0,"hi":0},"vis":{"node":"Inherited","span":{"lo":0,"hi":0}},"ident":{"name":"std","span":{"lo":0,"hi":0}},"kind":{"variant":"ExternCrate","fields":[null]},"tokens":null},{"attrs":[],"id":0,"span":{"lo":0,"hi":0},"vis":{"node":"Inherited","span":{"lo":0,"hi":0}},"ident":{"name":"core","span":{"lo":0,"hi":0}},"kind":{"variant":"ExternCrate","fields":[null]},"tokens":{"_field0":[[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["extern",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["crate",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["core",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":"Semi","span":{"lo":0,"hi":0}}]},"NonJoint"]]}}],"inline":true},"attrs":[{"kind":{"variant":"Normal","fields":[{"path":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"crate_type","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"args":{"variant":"Eq","fields":[{"lo":0,"hi":0},{"_field0":[[{"variant":"Token","fields":[{"kind":{"variant":"Literal","fields":[{"kind":"Str","symbol":"lib","suffix":null}]},"span":{"lo":0,"hi":0}}]},"NonJoint"]]}]}}]},"id":null,"style":"Inner","span":{"lo":0,"hi":0}}],"span":{"lo":0,"hi":0},"proc_macros":[]}
{"module":{"inner":{"lo":0,"hi":0},"items":[{"attrs":[{"kind":{"variant":"Normal","fields":[{"path":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"prelude_import","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"args":"Empty"}]},"id":null,"style":"Outer","span":{"lo":0,"hi":0}}],"id":0,"span":{"lo":0,"hi":0},"vis":{"node":"Inherited","span":{"lo":0,"hi":0}},"ident":{"name":"","span":{"lo":0,"hi":0}},"kind":{"variant":"Use","fields":[{"prefix":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"{{root}}","span":{"lo":0,"hi":0}},"id":0,"args":null},{"ident":{"name":"std","span":{"lo":0,"hi":0}},"id":0,"args":null},{"ident":{"name":"prelude","span":{"lo":0,"hi":0}},"id":0,"args":null},{"ident":{"name":"v1","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"kind":"Glob","span":{"lo":0,"hi":0}}]},"tokens":null},{"attrs":[{"kind":{"variant":"Normal","fields":[{"path":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"macro_use","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"args":"Empty"}]},"id":null,"style":"Outer","span":{"lo":0,"hi":0}}],"id":0,"span":{"lo":0,"hi":0},"vis":{"node":"Inherited","span":{"lo":0,"hi":0}},"ident":{"name":"std","span":{"lo":0,"hi":0}},"kind":{"variant":"ExternCrate","fields":[null]},"tokens":null},{"attrs":[],"id":0,"span":{"lo":0,"hi":0},"vis":{"node":"Inherited","span":{"lo":0,"hi":0}},"ident":{"name":"core","span":{"lo":0,"hi":0}},"kind":{"variant":"ExternCrate","fields":[null]},"tokens":{"_field0":[[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["extern",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["crate",false]},"span":{"lo":0,"hi":0}}]},"NonJoint"],[{"variant":"Token","fields":[{"kind":{"variant":"Ident","fields":["core",false]},"span":{"lo":0,"hi":0}}]},"Joint"],[{"variant":"Token","fields":[{"kind":"Semi","span":{"lo":0,"hi":0}}]},"NonJoint"]]}}],"inline":true},"attrs":[{"kind":{"variant":"Normal","fields":[{"path":{"span":{"lo":0,"hi":0},"segments":[{"ident":{"name":"crate_type","span":{"lo":0,"hi":0}},"id":0,"args":null}]},"args":{"variant":"Eq","fields":[{"lo":0,"hi":0},{"_field0":[[{"variant":"Token","fields":[{"kind":{"variant":"Literal","fields":[{"kind":"Str","symbol":"lib","suffix":null}]},"span":{"lo":0,"hi":0}}]},"NonJoint"]]}]}}]},"id":null,"style":"Inner","span":{"lo":0,"hi":0}}],"span":{"lo":0,"hi":0},"proc_macros":[]}