Skip to content

Commit

Permalink
Auto merge of rust-lang#79472 - Aaron1011:new-remove-pretty-print-hac…
Browse files Browse the repository at this point in the history
…k, r=petrochenkov

Replace pretty-print/compare/retokenize hack with targeted workarounds

Based on rust-lang#78296
cc rust-lang#43081

The 'pretty-print/compare/retokenize' hack is used to try to avoid passing an outdated `TokenStream` to a proc-macro when the underlying AST is modified in some way (e.g. cfg-stripping before derives). Unfortunately, retokenizing throws away spans (including hygiene information), which causes issues of its own. Every improvement to the accuracy of the pretty-print/retokenize comparison has resulted in non-trivial ecosystem breakage due to hygiene changes. In extreme cases, users deliberately wrote unhygienic `macro_rules!` macros (likely because they did not realize that the compiler's behavior was a bug).

Additionaly, the comparison between the original and pretty-printed/retoknized token streams comes at a non-trivial runtime cost, as shown by rust-lang#79338

This PR removes the pretty-print/compare/retokenize logic from `nt_to_tokenstream`. We only discard the original `TokenStream` under two circumstances:
* Inner attributes are used (detected by examining the AST)
* `cfg`/`cfg_attr` processing modifies the AST. This is detected by making the visitor update a flag when it performs a modification, instead of trying to detect the modification after-the-fact. Note that a 'matching' `cfg` (e.g. `#[cfg(not(FALSE)]`) does not actually get removed from the AST, allowing us to preserve the original `TokenStream`.

In all other cases, we preserve the original `TokenStream`.

This could use a bit of refactoring/renaming - opening for a Crater run.

r? `@ghost`
  • Loading branch information
bors committed Dec 30, 2020
2 parents f3eead1 + 530a629 commit b9c403b
Show file tree
Hide file tree
Showing 8 changed files with 184 additions and 408 deletions.
6 changes: 6 additions & 0 deletions compiler/rustc_ast/src/tokenstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ pub enum TokenTree {
Delimited(DelimSpan, DelimToken, TokenStream),
}

#[derive(Copy, Clone)]
pub enum CanSynthesizeMissingTokens {
Yes,
No,
}

// Ensure all fields of `TokenTree` is `Send` and `Sync`.
#[cfg(parallel_compiler)]
fn _dummy()
Expand Down
113 changes: 82 additions & 31 deletions compiler/rustc_ast_lowering/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

use rustc_ast::node_id::NodeMap;
use rustc_ast::token::{self, DelimToken, Nonterminal, Token};
use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree};
use rustc_ast::tokenstream::{CanSynthesizeMissingTokens, DelimSpan, TokenStream, TokenTree};
use rustc_ast::visit::{self, AssocCtxt, Visitor};
use rustc_ast::walk_list;
use rustc_ast::{self as ast, *};
Expand Down Expand Up @@ -206,7 +206,8 @@ pub trait ResolverAstLowering {
) -> LocalDefId;
}

type NtToTokenstream = fn(&Nonterminal, &ParseSess, Span) -> TokenStream;
type NtToTokenstream =
fn(&Nonterminal, &ParseSess, Span, CanSynthesizeMissingTokens) -> TokenStream;

/// Context of `impl Trait` in code, which determines whether it is allowed in an HIR subtree,
/// and if so, what meaning it has.
Expand Down Expand Up @@ -393,6 +394,47 @@ enum AnonymousLifetimeMode {
PassThrough,
}

struct TokenStreamLowering<'a> {
parse_sess: &'a ParseSess,
synthesize_tokens: CanSynthesizeMissingTokens,
nt_to_tokenstream: NtToTokenstream,
}

impl<'a> TokenStreamLowering<'a> {
fn lower_token_stream(&mut self, tokens: TokenStream) -> TokenStream {
tokens.into_trees().flat_map(|tree| self.lower_token_tree(tree).into_trees()).collect()
}

fn lower_token_tree(&mut self, tree: TokenTree) -> TokenStream {
match tree {
TokenTree::Token(token) => self.lower_token(token),
TokenTree::Delimited(span, delim, tts) => {
TokenTree::Delimited(span, delim, self.lower_token_stream(tts)).into()
}
}
}

fn lower_token(&mut self, token: Token) -> TokenStream {
match token.kind {
token::Interpolated(nt) => {
let tts = (self.nt_to_tokenstream)(
&nt,
self.parse_sess,
token.span,
self.synthesize_tokens,
);
TokenTree::Delimited(
DelimSpan::from_single(token.span),
DelimToken::NoDelim,
self.lower_token_stream(tts),
)
.into()
}
_ => TokenTree::Token(token).into(),
}
}
}

struct ImplTraitTypeIdVisitor<'a> {
ids: &'a mut SmallVec<[NodeId; 1]>,
}
Expand Down Expand Up @@ -955,40 +997,49 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
match *args {
MacArgs::Empty => MacArgs::Empty,
MacArgs::Delimited(dspan, delim, ref tokens) => {
MacArgs::Delimited(dspan, delim, self.lower_token_stream(tokens.clone()))
}
MacArgs::Eq(eq_span, ref tokens) => {
MacArgs::Eq(eq_span, self.lower_token_stream(tokens.clone()))
}
}
}

fn lower_token_stream(&mut self, tokens: TokenStream) -> TokenStream {
tokens.into_trees().flat_map(|tree| self.lower_token_tree(tree).into_trees()).collect()
}

fn lower_token_tree(&mut self, tree: TokenTree) -> TokenStream {
match tree {
TokenTree::Token(token) => self.lower_token(token),
TokenTree::Delimited(span, delim, tts) => {
TokenTree::Delimited(span, delim, self.lower_token_stream(tts)).into()
// This is either a non-key-value attribute, or a `macro_rules!` body.
// We either not have any nonterminals present (in the case of an attribute),
// or have tokens available for all nonterminals in the case of a nested
// `macro_rules`: e.g:
//
// ```rust
// macro_rules! outer {
// ($e:expr) => {
// macro_rules! inner {
// () => { $e }
// }
// }
// }
// ```
//
// In both cases, we don't want to synthesize any tokens
MacArgs::Delimited(
dspan,
delim,
self.lower_token_stream(tokens.clone(), CanSynthesizeMissingTokens::No),
)
}
// This is an inert key-value attribute - it will never be visible to macros
// after it gets lowered to HIR. Therefore, we can synthesize tokens with fake
// spans to handle nonterminals in `#[doc]` (e.g. `#[doc = $e]`).
MacArgs::Eq(eq_span, ref tokens) => MacArgs::Eq(
eq_span,
self.lower_token_stream(tokens.clone(), CanSynthesizeMissingTokens::Yes),
),
}
}

fn lower_token(&mut self, token: Token) -> TokenStream {
match token.kind {
token::Interpolated(nt) => {
let tts = (self.nt_to_tokenstream)(&nt, &self.sess.parse_sess, token.span);
TokenTree::Delimited(
DelimSpan::from_single(token.span),
DelimToken::NoDelim,
self.lower_token_stream(tts),
)
.into()
}
_ => TokenTree::Token(token).into(),
fn lower_token_stream(
&self,
tokens: TokenStream,
synthesize_tokens: CanSynthesizeMissingTokens,
) -> TokenStream {
TokenStreamLowering {
parse_sess: &self.sess.parse_sess,
synthesize_tokens,
nt_to_tokenstream: self.nt_to_tokenstream,
}
.lower_token_stream(tokens)
}

/// Given an associated type constraint like one of these:
Expand Down
15 changes: 9 additions & 6 deletions compiler/rustc_expand/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use crate::expand::{self, AstFragment, Invocation};
use crate::module::DirectoryOwnership;

use rustc_ast::ptr::P;
use rustc_ast::token;
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::token::{self, Nonterminal};
use rustc_ast::tokenstream::{CanSynthesizeMissingTokens, TokenStream};
use rustc_ast::visit::{AssocCtxt, Visitor};
use rustc_ast::{self as ast, Attribute, NodeId, PatKind};
use rustc_attr::{self as attr, Deprecation, HasAttrs, Stability};
Expand Down Expand Up @@ -119,8 +119,8 @@ impl Annotatable {
}
}

crate fn into_tokens(self, sess: &ParseSess) -> TokenStream {
let nt = match self {
crate fn into_nonterminal(self) -> Nonterminal {
match self {
Annotatable::Item(item) => token::NtItem(item),
Annotatable::TraitItem(item) | Annotatable::ImplItem(item) => {
token::NtItem(P(item.and_then(ast::AssocItem::into_item)))
Expand All @@ -137,8 +137,11 @@ impl Annotatable {
| Annotatable::Param(..)
| Annotatable::StructField(..)
| Annotatable::Variant(..) => panic!("unexpected annotatable"),
};
nt_to_tokenstream(&nt, sess, DUMMY_SP)
}
}

crate fn into_tokens(self, sess: &ParseSess) -> TokenStream {
nt_to_tokenstream(&self.into_nonterminal(), sess, DUMMY_SP, CanSynthesizeMissingTokens::No)
}

pub fn expect_item(self) -> P<ast::Item> {
Expand Down
13 changes: 11 additions & 2 deletions compiler/rustc_expand/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use smallvec::SmallVec;
pub struct StripUnconfigured<'a> {
pub sess: &'a Session,
pub features: Option<&'a Features>,
pub modified: bool,
}

fn get_features(
Expand Down Expand Up @@ -199,7 +200,7 @@ fn get_features(

// `cfg_attr`-process the crate's attributes and compute the crate's features.
pub fn features(sess: &Session, mut krate: ast::Crate) -> (ast::Crate, Features) {
let mut strip_unconfigured = StripUnconfigured { sess, features: None };
let mut strip_unconfigured = StripUnconfigured { sess, features: None, modified: false };

let unconfigured_attrs = krate.attrs.clone();
let diag = &sess.parse_sess.span_diagnostic;
Expand Down Expand Up @@ -243,7 +244,12 @@ const CFG_ATTR_NOTE_REF: &str = "for more information, visit \
impl<'a> StripUnconfigured<'a> {
pub fn configure<T: HasAttrs>(&mut self, mut node: T) -> Option<T> {
self.process_cfg_attrs(&mut node);
self.in_cfg(node.attrs()).then_some(node)
if self.in_cfg(node.attrs()) {
Some(node)
} else {
self.modified = true;
None
}
}

/// Parse and expand all `cfg_attr` attributes into a list of attributes
Expand All @@ -270,6 +276,9 @@ impl<'a> StripUnconfigured<'a> {
return vec![attr];
}

// A `#[cfg_attr]` either gets removed, or replaced with a new attribute
self.modified = true;

let (cfg_predicate, expanded_attrs) = match self.parse_cfg_attr(&attr) {
None => return vec![],
Some(r) => r,
Expand Down
41 changes: 35 additions & 6 deletions compiler/rustc_expand/src/expand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use rustc_ast::ptr::P;
use rustc_ast::token;
use rustc_ast::tokenstream::TokenStream;
use rustc_ast::visit::{self, AssocCtxt, Visitor};
use rustc_ast::{self as ast, AttrItem, Block, LitKind, NodeId, PatKind, Path};
use rustc_ast::{self as ast, AttrItem, AttrStyle, Block, LitKind, NodeId, PatKind, Path};
use rustc_ast::{ItemKind, MacArgs, MacCallStmt, MacStmtStyle, StmtKind, Unsafe};
use rustc_ast_pretty::pprust;
use rustc_attr::{self as attr, is_builtin_attr, HasAttrs};
Expand Down Expand Up @@ -522,12 +522,29 @@ impl<'a, 'b> MacroExpander<'a, 'b> {
item.visit_attrs(|attrs| attrs.retain(|a| !a.has_name(sym::derive)));
(item, Vec::new())
} else {
let mut item = StripUnconfigured {
let mut visitor = StripUnconfigured {
sess: self.cx.sess,
features: self.cx.ecfg.features,
}
.fully_configure(item);
modified: false,
};
let mut item = visitor.fully_configure(item);
item.visit_attrs(|attrs| attrs.retain(|a| !a.has_name(sym::derive)));
if visitor.modified && !derives.is_empty() {
// Erase the tokens if cfg-stripping modified the item
// This will cause us to synthesize fake tokens
// when `nt_to_tokenstream` is called on this item.
match &mut item {
Annotatable::Item(item) => item.tokens = None,
Annotatable::Stmt(stmt) => {
if let StmtKind::Item(item) = &mut stmt.kind {
item.tokens = None
} else {
panic!("Unexpected stmt {:?}", stmt);
}
}
_ => panic!("Unexpected annotatable {:?}", item),
}
}

invocations.reserve(derives.len());
let derive_placeholders = derives
Expand Down Expand Up @@ -622,7 +639,11 @@ impl<'a, 'b> MacroExpander<'a, 'b> {

let invocations = {
let mut collector = InvocationCollector {
cfg: StripUnconfigured { sess: &self.cx.sess, features: self.cx.ecfg.features },
cfg: StripUnconfigured {
sess: &self.cx.sess,
features: self.cx.ecfg.features,
modified: false,
},
cx: self.cx,
invocations: Vec::new(),
monotonic: self.monotonic,
Expand Down Expand Up @@ -716,7 +737,15 @@ impl<'a, 'b> MacroExpander<'a, 'b> {
SyntaxExtensionKind::Attr(expander) => {
self.gate_proc_macro_input(&item);
self.gate_proc_macro_attr_item(span, &item);
let tokens = item.into_tokens(&self.cx.sess.parse_sess);
let tokens = match attr.style {
AttrStyle::Outer => item.into_tokens(&self.cx.sess.parse_sess),
// FIXME: Properly collect tokens for inner attributes
AttrStyle::Inner => rustc_parse::fake_token_stream(
&self.cx.sess.parse_sess,
&item.into_nonterminal(),
span,
),
};
let attr_item = attr.unwrap_normal_item();
if let MacArgs::Eq(..) = attr_item.args {
self.cx.span_err(span, "key-value macro attributes are not supported");
Expand Down
9 changes: 7 additions & 2 deletions compiler/rustc_expand/src/proc_macro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::proc_macro_server;

use rustc_ast::ptr::P;
use rustc_ast::token;
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::tokenstream::{CanSynthesizeMissingTokens, TokenStream, TokenTree};
use rustc_ast::{self as ast, *};
use rustc_data_structures::sync::Lrc;
use rustc_errors::{struct_span_err, Applicability, ErrorReported};
Expand Down Expand Up @@ -94,7 +94,12 @@ impl MultiItemModifier for ProcMacroDerive {
let input = if item.pretty_printing_compatibility_hack() {
TokenTree::token(token::Interpolated(Lrc::new(item)), DUMMY_SP).into()
} else {
nt_to_tokenstream(&item, &ecx.sess.parse_sess, DUMMY_SP)
nt_to_tokenstream(
&item,
&ecx.sess.parse_sess,
DUMMY_SP,
CanSynthesizeMissingTokens::Yes,
)
};

let server = proc_macro_server::Rustc::new(ecx);
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_expand/src/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use crate::base::ExtCtxt;

use rustc_ast as ast;
use rustc_ast::token;
use rustc_ast::tokenstream::{self, DelimSpan, Spacing::*, TokenStream, TreeAndSpacing};
use rustc_ast::tokenstream::{self, CanSynthesizeMissingTokens};
use rustc_ast::tokenstream::{DelimSpan, Spacing::*, TokenStream, TreeAndSpacing};
use rustc_ast_pretty::pprust;
use rustc_data_structures::sync::Lrc;
use rustc_errors::Diagnostic;
Expand Down Expand Up @@ -178,7 +179,7 @@ impl FromInternal<(TreeAndSpacing, &'_ ParseSess, &'_ mut Vec<Self>)>
{
TokenTree::Ident(Ident::new(sess, name.name, is_raw, name.span))
} else {
let stream = nt_to_tokenstream(&nt, sess, span);
let stream = nt_to_tokenstream(&nt, sess, span, CanSynthesizeMissingTokens::No);
TokenTree::Group(Group {
delimiter: Delimiter::None,
stream,
Expand Down
Loading

0 comments on commit b9c403b

Please sign in to comment.