From 17e96879d8ea81ff9b4194ad6529df1a5784a70c Mon Sep 17 00:00:00 2001 From: Nika Layzell Date: Sun, 3 Jul 2022 00:53:20 -0400 Subject: [PATCH 1/5] proc_macro: use fxhash within the proc_macro crate Unfortunately, as it is difficult to depend on crates from within proc_macro, this is done by vendoring a copy of the hasher as a module rather than depending on the rustc_hash crate. This probably doesn't have a substantial impact up-front, however will be more relevant once symbols are interned within the proc_macro client. --- library/proc_macro/src/bridge/fxhash.rs | 117 ++++++++++++++++++++++++ library/proc_macro/src/bridge/handle.rs | 26 ++---- library/proc_macro/src/bridge/mod.rs | 2 + 3 files changed, 125 insertions(+), 20 deletions(-) create mode 100644 library/proc_macro/src/bridge/fxhash.rs diff --git a/library/proc_macro/src/bridge/fxhash.rs b/library/proc_macro/src/bridge/fxhash.rs new file mode 100644 index 0000000000000..4b1e412e24b70 --- /dev/null +++ b/library/proc_macro/src/bridge/fxhash.rs @@ -0,0 +1,117 @@ +//! This is a copy of the `rustc_hash` crate, adapted to work as a module. +//! +//! If in the future it becomes more reasonable to add dependencies to +//! `proc_macro`, this module should be removed and replaced with a dependency +//! on the `rustc_hash` crate. + +use std::collections::HashMap; +use std::convert::TryInto; +use std::default::Default; +use std::hash::BuildHasherDefault; +use std::hash::Hasher; +use std::mem::size_of; +use std::ops::BitXor; + +/// Type alias for a hashmap using the `fx` hash algorithm. +pub type FxHashMap = HashMap>; + +/// A speedy hash algorithm for use within rustc. The hashmap in liballoc +/// by default uses SipHash which isn't quite as speedy as we want. In the +/// compiler we're not really worried about DOS attempts, so we use a fast +/// non-cryptographic hash. +/// +/// This is the same as the algorithm used by Firefox -- which is a homespun +/// one not based on any widely-known algorithm -- though modified to produce +/// 64-bit hash values instead of 32-bit hash values. It consistently +/// out-performs an FNV-based hash within rustc itself -- the collision rate is +/// similar or slightly worse than FNV, but the speed of the hash function +/// itself is much higher because it works on up to 8 bytes at a time. +pub struct FxHasher { + hash: usize, +} + +#[cfg(target_pointer_width = "32")] +const K: usize = 0x9e3779b9; +#[cfg(target_pointer_width = "64")] +const K: usize = 0x517cc1b727220a95; + +impl Default for FxHasher { + #[inline] + fn default() -> FxHasher { + FxHasher { hash: 0 } + } +} + +impl FxHasher { + #[inline] + fn add_to_hash(&mut self, i: usize) { + self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K); + } +} + +impl Hasher for FxHasher { + #[inline] + fn write(&mut self, mut bytes: &[u8]) { + #[cfg(target_pointer_width = "32")] + let read_usize = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap()); + #[cfg(target_pointer_width = "64")] + let read_usize = |bytes: &[u8]| u64::from_ne_bytes(bytes[..8].try_into().unwrap()); + + let mut hash = FxHasher { hash: self.hash }; + assert!(size_of::() <= 8); + while bytes.len() >= size_of::() { + hash.add_to_hash(read_usize(bytes) as usize); + bytes = &bytes[size_of::()..]; + } + if (size_of::() > 4) && (bytes.len() >= 4) { + hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()) as usize); + bytes = &bytes[4..]; + } + if (size_of::() > 2) && bytes.len() >= 2 { + hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as usize); + bytes = &bytes[2..]; + } + if (size_of::() > 1) && bytes.len() >= 1 { + hash.add_to_hash(bytes[0] as usize); + } + self.hash = hash.hash; + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.add_to_hash(i as usize); + } + + #[cfg(target_pointer_width = "32")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + self.add_to_hash((i >> 32) as usize); + } + + #[cfg(target_pointer_width = "64")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.add_to_hash(i); + } + + #[inline] + fn finish(&self) -> u64 { + self.hash as u64 + } +} diff --git a/library/proc_macro/src/bridge/handle.rs b/library/proc_macro/src/bridge/handle.rs index c219a9465d39f..00954107b7769 100644 --- a/library/proc_macro/src/bridge/handle.rs +++ b/library/proc_macro/src/bridge/handle.rs @@ -1,11 +1,13 @@ //! Server-side handles and storage for per-handle data. -use std::collections::{BTreeMap, HashMap}; -use std::hash::{BuildHasher, Hash}; +use std::collections::BTreeMap; +use std::hash::Hash; use std::num::NonZeroU32; use std::ops::{Index, IndexMut}; use std::sync::atomic::{AtomicUsize, Ordering}; +use super::fxhash::FxHashMap; + pub(super) type Handle = NonZeroU32; /// A store that associates values of type `T` with numeric handles. A value can @@ -51,31 +53,15 @@ impl IndexMut for OwnedStore { } } -// HACK(eddyb) deterministic `std::collections::hash_map::RandomState` replacement -// that doesn't require adding any dependencies to `proc_macro` (like `rustc-hash`). -#[derive(Clone)] -struct NonRandomState; - -impl BuildHasher for NonRandomState { - type Hasher = std::collections::hash_map::DefaultHasher; - #[inline] - fn build_hasher(&self) -> Self::Hasher { - Self::Hasher::new() - } -} - /// Like `OwnedStore`, but avoids storing any value more than once. pub(super) struct InternedStore { owned: OwnedStore, - interner: HashMap, + interner: FxHashMap, } impl InternedStore { pub(super) fn new(counter: &'static AtomicUsize) -> Self { - InternedStore { - owned: OwnedStore::new(counter), - interner: HashMap::with_hasher(NonRandomState), - } + InternedStore { owned: OwnedStore::new(counter), interner: FxHashMap::default() } } pub(super) fn alloc(&mut self, x: T) -> Handle { diff --git a/library/proc_macro/src/bridge/mod.rs b/library/proc_macro/src/bridge/mod.rs index 048ba3a8fdb79..c97d1099ecc28 100644 --- a/library/proc_macro/src/bridge/mod.rs +++ b/library/proc_macro/src/bridge/mod.rs @@ -177,6 +177,8 @@ pub mod client; #[allow(unsafe_code)] mod closure; #[forbid(unsafe_code)] +mod fxhash; +#[forbid(unsafe_code)] mod handle; #[macro_use] #[forbid(unsafe_code)] From e0dce6ec8df23adbe7c45c07ed080caf3d81a14b Mon Sep 17 00:00:00 2001 From: Nika Layzell Date: Sun, 3 Jul 2022 00:54:17 -0400 Subject: [PATCH 2/5] proc_macro: Specialize Punct::to_string This was removed in a previous part, however it should be specialized for to_string performance and consistency. --- library/proc_macro/src/lib.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index 9ab5061c66807..80f28a5029658 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -1004,6 +1004,13 @@ impl Punct { } } +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] +impl ToString for Punct { + fn to_string(&self) -> String { + self.as_char().to_string() + } +} + /// Prints the punctuation character as a string that should be losslessly convertible /// back into the same character. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] From 491fccfbe3561b0674a7dd13ac9f00820662aa59 Mon Sep 17 00:00:00 2001 From: Nika Layzell Date: Thu, 30 Jun 2022 21:05:46 -0400 Subject: [PATCH 3/5] proc_macro: stop using a remote object handle for Ident Doing this for all unicode identifiers would require a dependency on `unicode-normalization` and `rustc_lexer`, which is currently not possible for `proc_macro` due to it being built concurrently with `std` and `core`. Instead, ASCII identifiers are validated locally, and an RPC message is used to validate unicode identifiers when needed. String values are interned on the both the server and client when deserializing, to avoid unnecessary copies and keep Ident cheap to copy and move. This appears to be important for performance. The client-side interner is based roughly on the one from rustc_span, and uses an arena inspired by rustc_arena. RPC messages passing symbols always include the full value. This could potentially be optimized in the future if it is revealed to be a performance bottleneck. Despite now having a relevant implementaion of Display for Ident, ToString is still specialized, as it is a hot-path for this object. The symbol infrastructure will also be used for literals in the next part. --- .../rustc_expand/src/proc_macro_server.rs | 91 +++----- library/proc_macro/src/bridge/arena.rs | 113 ++++++++++ library/proc_macro/src/bridge/client.rs | 10 +- library/proc_macro/src/bridge/mod.rs | 33 ++- library/proc_macro/src/bridge/server.rs | 16 +- library/proc_macro/src/bridge/symbol.rs | 205 ++++++++++++++++++ library/proc_macro/src/lib.rs | 47 ++-- .../ui/proc-macro/invalid-punct-ident-2.rs | 18 +- .../proc-macro/invalid-punct-ident-2.stderr | 2 +- .../ui/proc-macro/invalid-punct-ident-3.rs | 18 +- .../proc-macro/invalid-punct-ident-3.stderr | 2 +- 11 files changed, 441 insertions(+), 114 deletions(-) create mode 100644 library/proc_macro/src/bridge/arena.rs create mode 100644 library/proc_macro/src/bridge/symbol.rs diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 411d6be9c44f4..ffd5e8763e36b 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -11,13 +11,13 @@ use rustc_parse::lexer::nfc_normalize; use rustc_parse::parse_stream_from_source_str; use rustc_session::parse::ParseSess; use rustc_span::def_id::CrateNum; -use rustc_span::symbol::{self, kw, sym, Symbol}; +use rustc_span::symbol::{self, sym, Symbol}; use rustc_span::{BytePos, FileName, Pos, SourceFile, Span}; -use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Punct, TokenTree}; +use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Ident, Punct, TokenTree}; use pm::{Delimiter, Level, LineColumn}; +use std::ascii; use std::ops::Bound; -use std::{ascii, panic}; trait FromInternal { fn from_internal(x: T) -> Self; @@ -50,7 +50,7 @@ impl ToInternal for Delimiter { } impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> - for Vec> + for Vec> { fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self { use rustc_ast::token::*; @@ -135,13 +135,12 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> Question => op("?"), SingleQuote => op("'"), - Ident(name, false) if name == kw::DollarCrate => trees.push(TokenTree::Ident(Ident::dollar_crate(span))), - Ident(name, is_raw) => trees.push(TokenTree::Ident(Ident::new(rustc.sess(), name, is_raw, span))), + Ident(sym, is_raw) => trees.push(TokenTree::Ident(Ident { sym, is_raw, span })), Lifetime(name) => { let ident = symbol::Ident::new(name, span).without_first_quote(); trees.extend([ TokenTree::Punct(Punct { ch: b'\'', joint: true, span }), - TokenTree::Ident(Ident::new(rustc.sess(), ident.name, false, span)), + TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }), ]); } Literal(lit) => trees.push(TokenTree::Literal(self::Literal { lit, span })), @@ -170,7 +169,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> } Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => { - trees.push(TokenTree::Ident(Ident::new(rustc.sess(), ident.name, is_raw, ident.span))) + trees.push(TokenTree::Ident(Ident { sym: ident.name, is_raw, span: ident.span })) } Interpolated(nt) => { @@ -200,11 +199,14 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> } } -impl ToInternal for TokenTree { +impl ToInternal + for (TokenTree, &mut Rustc<'_, '_>) +{ fn to_internal(self) -> TokenStream { use rustc_ast::token::*; - let (ch, joint, span) = match self { + let (tree, rustc) = self; + let (ch, joint, span) = match tree { TokenTree::Punct(Punct { ch, joint, span }) => (ch, joint, span), TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => { return tokenstream::TokenTree::Delimited( @@ -215,6 +217,7 @@ impl ToInternal for TokenTree { .into(); } TokenTree::Ident(self::Ident { sym, is_raw, span }) => { + rustc.sess().symbol_gallery.insert(sym, span); return tokenstream::TokenTree::token(Ident(sym, is_raw), span).into(); } TokenTree::Literal(self::Literal { @@ -289,33 +292,6 @@ impl ToInternal for Level { pub struct FreeFunctions; -#[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub struct Ident { - sym: Symbol, - is_raw: bool, - span: Span, -} - -impl Ident { - fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident { - let sym = nfc_normalize(sym.as_str()); - let string = sym.as_str(); - if !rustc_lexer::is_ident(string) { - panic!("`{:?}` is not a valid identifier", string) - } - if is_raw && !sym.can_be_raw() { - panic!("`{}` cannot be a raw identifier", string); - } - sess.symbol_gallery.insert(sym, span); - Ident { sym, is_raw, span } - } - - fn dollar_crate(span: Span) -> Ident { - // `$crate` is accepted as an ident only if it comes from the compiler. - Ident { sym: kw::DollarCrate, is_raw: false, span } - } -} - // FIXME(eddyb) `Literal` should not expose internal `Debug` impls. #[derive(Clone, Debug)] pub struct Literal { @@ -357,12 +333,12 @@ impl<'a, 'b> Rustc<'a, 'b> { impl server::Types for Rustc<'_, '_> { type FreeFunctions = FreeFunctions; type TokenStream = TokenStream; - type Ident = Ident; type Literal = Literal; type SourceFile = Lrc; type MultiSpan = Vec; type Diagnostic = Diagnostic; type Span = Span; + type Symbol = Symbol; } impl server::FreeFunctions for Rustc<'_, '_> { @@ -453,22 +429,22 @@ impl server::TokenStream for Rustc<'_, '_> { fn from_token_tree( &mut self, - tree: TokenTree, + tree: TokenTree, ) -> Self::TokenStream { - tree.to_internal() + (tree, &mut *self).to_internal() } fn concat_trees( &mut self, base: Option, - trees: Vec>, + trees: Vec>, ) -> Self::TokenStream { let mut builder = tokenstream::TokenStreamBuilder::new(); if let Some(base) = base { builder.push(base); } for tree in trees { - builder.push(tree.to_internal()); + builder.push((tree, &mut *self).to_internal()); } builder.build() } @@ -491,25 +467,11 @@ impl server::TokenStream for Rustc<'_, '_> { fn into_trees( &mut self, stream: Self::TokenStream, - ) -> Vec> { + ) -> Vec> { FromInternal::from_internal((stream, self)) } } -impl server::Ident for Rustc<'_, '_> { - fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident { - Ident::new(self.sess(), Symbol::intern(string), is_raw, span) - } - - fn span(&mut self, ident: Self::Ident) -> Self::Span { - ident.span - } - - fn with_span(&mut self, ident: Self::Ident, span: Self::Span) -> Self::Ident { - Ident { span, ..ident } - } -} - impl server::Literal for Rustc<'_, '_> { fn from_str(&mut self, s: &str) -> Result { let name = FileName::proc_macro_source_code(s); @@ -812,6 +774,13 @@ impl server::Span for Rustc<'_, '_> { } } +impl server::Symbol for Rustc<'_, '_> { + fn normalize_and_validate_ident(&mut self, string: &str) -> Result { + let sym = nfc_normalize(string); + if rustc_lexer::is_ident(sym.as_str()) { Ok(sym) } else { Err(()) } + } +} + impl server::Server for Rustc<'_, '_> { fn globals(&mut self) -> ExpnGlobals { ExpnGlobals { @@ -820,4 +789,12 @@ impl server::Server for Rustc<'_, '_> { mixed_site: self.mixed_site, } } + + fn intern_symbol(string: &str) -> Self::Symbol { + Symbol::intern(string) + } + + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) { + f(&symbol.as_str()) + } } diff --git a/library/proc_macro/src/bridge/arena.rs b/library/proc_macro/src/bridge/arena.rs new file mode 100644 index 0000000000000..fa72d2816ebfe --- /dev/null +++ b/library/proc_macro/src/bridge/arena.rs @@ -0,0 +1,113 @@ +//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`. +//! +//! This is unfortunately a minimal re-implementation rather than a dependency +//! as it is difficult to depend on crates from within `proc_macro`, due to it +//! being built at the same time as `std`. + +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ptr; +use std::slice; +use std::str; + +// The arenas start with PAGE-sized chunks, and then each new chunk is twice as +// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon +// we stop growing. This scales well, from arenas that are barely used up to +// arenas that are used for 100s of MiBs. Note also that the chosen sizes match +// the usual sizes of pages and huge pages on Linux. +const PAGE: usize = 4096; +const HUGE_PAGE: usize = 2 * 1024 * 1024; + +/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`. +/// +/// This is unfortunately a complete re-implementation rather than a dependency +/// as it is difficult to depend on crates from within `proc_macro`, due to it +/// being built at the same time as `std`. +/// +/// This arena doesn't have support for allocating anything other than byte +/// slices, as that is all that is necessary. +pub(crate) struct Arena { + start: Cell<*mut MaybeUninit>, + end: Cell<*mut MaybeUninit>, + chunks: RefCell]>>>, +} + +impl Arena { + pub(crate) fn new() -> Self { + Arena { + start: Cell::new(ptr::null_mut()), + end: Cell::new(ptr::null_mut()), + chunks: RefCell::new(Vec::new()), + } + } + + /// Add a new chunk with at least `additional` free bytes. + #[inline(never)] + #[cold] + fn grow(&self, additional: usize) { + let mut chunks = self.chunks.borrow_mut(); + let mut new_cap; + if let Some(last_chunk) = chunks.last_mut() { + // If the previous chunk's len is less than HUGE_PAGE + // bytes, then this chunk will be least double the previous + // chunk's size. + new_cap = last_chunk.len().min(HUGE_PAGE / 2); + new_cap *= 2; + } else { + new_cap = PAGE; + } + // Also ensure that this chunk can fit `additional`. + new_cap = cmp::max(additional, new_cap); + + let mut chunk = Box::new_uninit_slice(new_cap); + let Range { start, end } = chunk.as_mut_ptr_range(); + self.start.set(start); + self.end.set(end); + chunks.push(chunk); + } + + /// Allocates a byte slice with specified size from the current memory + /// chunk. Returns `None` if there is no free space left to satisfy the + /// request. + fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit]> { + let start = self.start.get().addr(); + let old_end = self.end.get(); + let end = old_end.addr(); + + let new_end = end.checked_sub(bytes)?; + if start <= new_end { + let new_end = old_end.with_addr(new_end); + self.end.set(new_end); + // SAFETY: `bytes` bytes starting at `new_end` were just reserved. + Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) }) + } else { + None + } + } + + fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit] { + if bytes == 0 { + return &mut []; + } + + loop { + if let Some(a) = self.alloc_raw_without_grow(bytes) { + break a; + } + // No free space left. Allocate a new chunk to satisfy the request. + // On failure the grow will panic or abort. + self.grow(bytes); + } + } + + pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str { + let alloc = self.alloc_raw(string.len()); + let bytes = MaybeUninit::write_slice(alloc, string.as_bytes()); + + // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, + // and immediately convert the clone back to `&str`. + unsafe { str::from_utf8_unchecked_mut(bytes) } + } +} diff --git a/library/proc_macro/src/bridge/client.rs b/library/proc_macro/src/bridge/client.rs index 8254bd6e5024a..6d48faf5413e8 100644 --- a/library/proc_macro/src/bridge/client.rs +++ b/library/proc_macro/src/bridge/client.rs @@ -181,7 +181,6 @@ define_handles! { Diagnostic, 'interned: - Ident, Span, } @@ -242,6 +241,8 @@ impl fmt::Debug for Span { } } +pub(crate) use super::symbol::Symbol; + macro_rules! define_client_side { ($($name:ident { $(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)* @@ -405,6 +406,9 @@ fn run_client DecodeMut<'a, 's, ()>, R: Encode<()>>( panic::catch_unwind(panic::AssertUnwindSafe(|| { maybe_install_panic_hook(force_show_panics); + // Make sure the symbol store is empty before decoding inputs. + Symbol::invalidate_all(); + let reader = &mut &buf[..]; let (globals, input) = <(ExpnGlobals, A)>::decode(reader, &mut ()); @@ -438,6 +442,10 @@ fn run_client DecodeMut<'a, 's, ()>, R: Encode<()>>( buf.clear(); Err::<(), _>(e).encode(&mut buf, &mut ()); }); + + // Now that a response has been serialized, invalidate all symbols + // registered with the interner. + Symbol::invalidate_all(); buf } diff --git a/library/proc_macro/src/bridge/mod.rs b/library/proc_macro/src/bridge/mod.rs index c97d1099ecc28..1015c0d725980 100644 --- a/library/proc_macro/src/bridge/mod.rs +++ b/library/proc_macro/src/bridge/mod.rs @@ -65,11 +65,11 @@ macro_rules! with_api { fn from_str(src: &str) -> $S::TokenStream; fn to_string($self: &$S::TokenStream) -> String; fn from_token_tree( - tree: TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>, + tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>, ) -> $S::TokenStream; fn concat_trees( base: Option<$S::TokenStream>, - trees: Vec>, + trees: Vec>, ) -> $S::TokenStream; fn concat_streams( base: Option<$S::TokenStream>, @@ -77,12 +77,7 @@ macro_rules! with_api { ) -> $S::TokenStream; fn into_trees( $self: $S::TokenStream - ) -> Vec>; - }, - Ident { - fn new(string: &str, span: $S::Span, is_raw: bool) -> $S::Ident; - fn span($self: $S::Ident) -> $S::Span; - fn with_span($self: $S::Ident, span: $S::Span) -> $S::Ident; + ) -> Vec>; }, Literal { fn drop($self: $S::Literal); @@ -146,6 +141,9 @@ macro_rules! with_api { fn save_span($self: $S::Span) -> usize; fn recover_proc_macro_span(id: usize) -> $S::Span; }, + Symbol { + fn normalize_and_validate_ident(string: &str) -> Result<$S::Symbol, ()>; + }, } }; } @@ -170,6 +168,8 @@ macro_rules! reverse_decode { } } +#[allow(unsafe_code)] +mod arena; #[allow(unsafe_code)] mod buffer; #[forbid(unsafe_code)] @@ -189,6 +189,8 @@ mod scoped_cell; mod selfless_reify; #[forbid(unsafe_code)] pub mod server; +#[allow(unsafe_code)] +mod symbol; use buffer::Buffer; pub use rpc::PanicMessage; @@ -466,16 +468,25 @@ pub struct Punct { compound_traits!(struct Punct { ch, joint, span }); +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Ident { + pub sym: Symbol, + pub is_raw: bool, + pub span: Span, +} + +compound_traits!(struct Ident { sym, is_raw, span }); + #[derive(Clone)] -pub enum TokenTree { +pub enum TokenTree { Group(Group), Punct(Punct), - Ident(Ident), + Ident(Ident), Literal(Literal), } compound_traits!( - enum TokenTree { + enum TokenTree { Group(tt), Punct(tt), Ident(tt), diff --git a/library/proc_macro/src/bridge/server.rs b/library/proc_macro/src/bridge/server.rs index ea8b833b48fde..53668ecfead78 100644 --- a/library/proc_macro/src/bridge/server.rs +++ b/library/proc_macro/src/bridge/server.rs @@ -8,12 +8,12 @@ use super::client::HandleStore; pub trait Types { type FreeFunctions: 'static; type TokenStream: 'static + Clone; - type Ident: 'static + Copy + Eq + Hash; type Literal: 'static + Clone; type SourceFile: 'static + Clone; type MultiSpan: 'static; type Diagnostic: 'static; type Span: 'static + Copy + Eq + Hash; + type Symbol: 'static; } /// Declare an associated fn of one of the traits below, adding necessary @@ -38,6 +38,12 @@ macro_rules! declare_server_traits { pub trait Server: Types $(+ $name)* { fn globals(&mut self) -> ExpnGlobals; + + /// Intern a symbol received from RPC + fn intern_symbol(ident: &str) -> Self::Symbol; + + /// Recover the string value of a symbol, and invoke a callback with it. + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)); } } } @@ -49,6 +55,12 @@ impl Server for MarkedTypes { fn globals(&mut self) -> ExpnGlobals { <_>::mark(Server::globals(&mut self.0)) } + fn intern_symbol(ident: &str) -> Self::Symbol { + <_>::mark(S::intern_symbol(ident)) + } + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) { + S::with_symbol_string(symbol.unmark(), f) + } } macro_rules! define_mark_types_impls { @@ -81,11 +93,13 @@ macro_rules! define_dispatcher_impl { pub trait DispatcherTrait { // HACK(eddyb) these are here to allow `Self::$name` to work below. $(type $name;)* + fn dispatch(&mut self, buf: Buffer) -> Buffer; } impl DispatcherTrait for Dispatcher> { $(type $name = as Types>::$name;)* + fn dispatch(&mut self, mut buf: Buffer) -> Buffer { let Dispatcher { handle_store, server } = self; diff --git a/library/proc_macro/src/bridge/symbol.rs b/library/proc_macro/src/bridge/symbol.rs new file mode 100644 index 0000000000000..930c111455df0 --- /dev/null +++ b/library/proc_macro/src/bridge/symbol.rs @@ -0,0 +1,205 @@ +//! Client-side interner used for symbols. +//! +//! This is roughly based on the symbol interner from `rustc_span` and the +//! DroplessArena from `rustc_arena`. It is unfortunately a complete +//! copy/re-implementation rather than a dependency as it is difficult to depend +//! on crates from within `proc_macro`, due to it being built at the same time +//! as `std`. +//! +//! If at some point in the future it becomes easier to add dependencies to +//! proc_macro, this module should probably be removed or simplified. + +use std::cell::RefCell; +use std::num::NonZeroU32; +use std::str; + +use super::*; + +/// Handle for a symbol string stored within the Interner. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Symbol(NonZeroU32); + +impl !Send for Symbol {} +impl !Sync for Symbol {} + +impl Symbol { + /// Intern a new `Symbol` + pub(crate) fn new(string: &str) -> Self { + INTERNER.with_borrow_mut(|i| i.intern(string)) + } + + /// Create a new `Symbol` for an identifier. + /// + /// Validates and normalizes before converting it to a symbol. + pub(crate) fn new_ident(string: &str, is_raw: bool) -> Self { + // Fast-path: check if this is a valid ASCII identifier + if Self::is_valid_ascii_ident(string.as_bytes()) { + if is_raw && !Self::can_be_raw(string) { + panic!("`{}` cannot be a raw identifier", string); + } + return Self::new(string); + } + + // Slow-path: If the string is already ASCII we're done, otherwise ask + // our server to do this for us over RPC. + // We don't need to check for identifiers which can't be raw here, + // because all of them are ASCII. + if string.is_ascii() { + Err(()) + } else { + client::Symbol::normalize_and_validate_ident(string) + } + .unwrap_or_else(|_| panic!("`{:?}` is not a valid identifier", string)) + } + + /// Run a callback with the symbol's string value. + pub(crate) fn with(self, f: impl FnOnce(&str) -> R) -> R { + INTERNER.with_borrow(|i| f(i.get(self))) + } + + /// Clear out the thread-local symbol interner, making all previously + /// created symbols invalid such that `with` will panic when called on them. + pub(crate) fn invalidate_all() { + INTERNER.with_borrow_mut(|i| i.clear()); + } + + /// Check if the ident is a valid ASCII identifier. + /// + /// This is a short-circuit which is cheap to implement within the + /// proc-macro client to avoid RPC when creating simple idents, but may + /// return `false` for a valid identifier if it contains non-ASCII + /// characters. + fn is_valid_ascii_ident(bytes: &[u8]) -> bool { + matches!(bytes.first(), Some(b'_' | b'a'..=b'z' | b'A'..=b'Z')) + && bytes[1..] + .iter() + .all(|b| matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')) + } + + // Mimics the behaviour of `Symbol::can_be_raw` from `rustc_span` + fn can_be_raw(string: &str) -> bool { + match string { + "_" | "super" | "self" | "Self" | "crate" => false, + _ => true, + } + } +} + +impl fmt::Debug for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|s| fmt::Debug::fmt(s, f)) + } +} + +impl ToString for Symbol { + fn to_string(&self) -> String { + self.with(|s| s.to_owned()) + } +} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|s| fmt::Display::fmt(s, f)) + } +} + +impl Encode for Symbol { + fn encode(self, w: &mut Writer, s: &mut S) { + self.with(|sym| sym.encode(w, s)) + } +} + +impl DecodeMut<'_, '_, client::HandleStore>> + for Marked +{ + fn decode(r: &mut Reader<'_>, s: &mut client::HandleStore>) -> Self { + Mark::mark(S::intern_symbol(<&str>::decode(r, s))) + } +} + +impl Encode>> + for Marked +{ + fn encode(self, w: &mut Writer, s: &mut client::HandleStore>) { + S::with_symbol_string(&self.unmark(), |sym| sym.encode(w, s)) + } +} + +impl DecodeMut<'_, '_, S> for Symbol { + fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { + Symbol::new(<&str>::decode(r, s)) + } +} + +thread_local! { + static INTERNER: RefCell = RefCell::new(Interner { + arena: arena::Arena::new(), + names: fxhash::FxHashMap::default(), + strings: Vec::new(), + // Start with a base of 1 to make sure that `NonZeroU32` works. + sym_base: NonZeroU32::new(1).unwrap(), + }); +} + +/// Basic interner for a `Symbol`, inspired by the one in `rustc_span`. +struct Interner { + arena: arena::Arena, + // SAFETY: These `'static` lifetimes are actually references to data owned + // by the Arena. This is safe, as we never return them as static references + // from `Interner`. + names: fxhash::FxHashMap<&'static str, Symbol>, + strings: Vec<&'static str>, + // The offset to apply to symbol names stored in the interner. This is used + // to ensure that symbol names are not re-used after the interner is + // cleared. + sym_base: NonZeroU32, +} + +impl Interner { + fn intern(&mut self, string: &str) -> Symbol { + if let Some(&name) = self.names.get(string) { + return name; + } + + let name = Symbol( + self.sym_base + .checked_add(self.strings.len() as u32) + .expect("`proc_macro` symbol name overflow"), + ); + + let string: &str = self.arena.alloc_str(string); + + // SAFETY: we can extend the arena allocation to `'static` because we + // only access these while the arena is still alive. + let string: &'static str = unsafe { &*(string as *const str) }; + self.strings.push(string); + self.names.insert(string, name); + name + } + + /// Read a symbol's value from the store while it is held. + fn get(&self, symbol: Symbol) -> &str { + // NOTE: Subtract out the offset which was added to make the symbol + // nonzero and prevent symbol name re-use. + let name = symbol + .0 + .get() + .checked_sub(self.sym_base.get()) + .expect("use-after-free of `proc_macro` symbol"); + self.strings[name as usize] + } + + /// Clear all symbols from the store, invalidating them such that `get` will + /// panic if they are accessed in the future. + fn clear(&mut self) { + // NOTE: Be careful not to panic here, as we may be called on the client + // when a `catch_unwind` isn't installed. + self.sym_base = self.sym_base.saturating_add(self.strings.len() as u32); + self.names.clear(); + self.strings.clear(); + + // SAFETY: This is cleared after the names and strings tables are + // cleared out, so no references into the arena should remain. + self.arena = arena::Arena::new(); + } +} diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index 80f28a5029658..911deaef8c978 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -24,10 +24,14 @@ #![feature(staged_api)] #![feature(allow_internal_unstable)] #![feature(decl_macro)] +#![feature(local_key_cell_methods)] +#![feature(maybe_uninit_write_slice)] #![feature(negative_impls)] +#![feature(new_uninit)] #![feature(restricted_std)] #![feature(rustc_attrs)] #![feature(min_specialization)] +#![feature(strict_provenance)] #![recursion_limit = "256"] #[unstable(feature = "proc_macro_internals", issue = "27812")] @@ -214,7 +218,7 @@ fn tree_to_bridge_tree( ) -> bridge::TokenTree< bridge::client::TokenStream, bridge::client::Span, - bridge::client::Ident, + bridge::client::Symbol, bridge::client::Literal, > { match tree { @@ -240,7 +244,7 @@ struct ConcatTreesHelper { bridge::TokenTree< bridge::client::TokenStream, bridge::client::Span, - bridge::client::Ident, + bridge::client::Symbol, bridge::client::Literal, >, >, @@ -367,7 +371,7 @@ pub mod token_stream { bridge::TokenTree< bridge::client::TokenStream, bridge::client::Span, - bridge::client::Ident, + bridge::client::Symbol, bridge::client::Literal, >, >, @@ -1048,7 +1052,7 @@ impl PartialEq for char { /// An identifier (`ident`). #[derive(Clone)] #[stable(feature = "proc_macro_lib2", since = "1.29.0")] -pub struct Ident(bridge::client::Ident); +pub struct Ident(bridge::Ident); impl Ident { /// Creates a new `Ident` with the given `string` as well as the specified @@ -1072,7 +1076,11 @@ impl Ident { /// tokens, requires a `Span` to be specified at construction. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn new(string: &str, span: Span) -> Ident { - Ident(bridge::client::Ident::new(string, span.0, false)) + Ident(bridge::Ident { + sym: bridge::client::Symbol::new_ident(string, false), + is_raw: false, + span: span.0, + }) } /// Same as `Ident::new`, but creates a raw identifier (`r#ident`). @@ -1081,38 +1089,45 @@ impl Ident { /// (e.g. `self`, `super`) are not supported, and will cause a panic. #[stable(feature = "proc_macro_raw_ident", since = "1.47.0")] pub fn new_raw(string: &str, span: Span) -> Ident { - Ident(bridge::client::Ident::new(string, span.0, true)) + Ident(bridge::Ident { + sym: bridge::client::Symbol::new_ident(string, true), + is_raw: true, + span: span.0, + }) } /// Returns the span of this `Ident`, encompassing the entire string returned - /// by [`to_string`](Self::to_string). + /// by [`to_string`](ToString::to_string). #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn span(&self) -> Span { - Span(self.0.span()) + Span(self.0.span) } /// Configures the span of this `Ident`, possibly changing its hygiene context. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn set_span(&mut self, span: Span) { - self.0 = self.0.with_span(span.0); + self.0.span = span.0; } } -// N.B., the bridge only provides `to_string`, implement `fmt::Display` -// based on it (the reverse of the usual relationship between the two). -#[stable(feature = "proc_macro_lib", since = "1.15.0")] +/// Converts the identifier to a string that should be losslessly convertible +/// back into the same identifier. +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl ToString for Ident { fn to_string(&self) -> String { - TokenStream::from(TokenTree::from(self.clone())).to_string() + self.0.sym.with(|sym| if self.0.is_raw { ["r#", sym].concat() } else { sym.to_owned() }) } } -/// Prints the identifier as a string that should be losslessly convertible -/// back into the same identifier. +/// Prints the identifier as a string that should be losslessly convertible back +/// into the same identifier. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Display for Ident { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.to_string()) + if self.0.is_raw { + f.write_str("r#")?; + } + fmt::Display::fmt(&self.0.sym, f) } } diff --git a/src/test/ui/proc-macro/invalid-punct-ident-2.rs b/src/test/ui/proc-macro/invalid-punct-ident-2.rs index 04a0a8733115a..151f6203439f7 100644 --- a/src/test/ui/proc-macro/invalid-punct-ident-2.rs +++ b/src/test/ui/proc-macro/invalid-punct-ident-2.rs @@ -1,17 +1,9 @@ // aux-build:invalid-punct-ident.rs -// rustc-env:RUST_BACKTRACE=0 - -// FIXME https://github.com/rust-lang/rust/issues/59998 -// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> "" -// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> "" -// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> "" -// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> "" -// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> "" -// normalize-stderr-test "note: compiler flags.*\n\n" -> "" -// normalize-stderr-test "note: rustc.*running on.*\n\n" -> "" -// normalize-stderr-test "query stack during panic:\n" -> "" -// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> "" -// normalize-stderr-test "end of query stack\n" -> "" +// ignore-stage1 +// only-linux +// +// FIXME: This should be a normal (stage1, all platforms) test in +// src/test/ui/proc-macro once issue #59998 is fixed. #[macro_use] extern crate invalid_punct_ident; diff --git a/src/test/ui/proc-macro/invalid-punct-ident-2.stderr b/src/test/ui/proc-macro/invalid-punct-ident-2.stderr index f7e1f4bc7d361..0bd07bd649e47 100644 --- a/src/test/ui/proc-macro/invalid-punct-ident-2.stderr +++ b/src/test/ui/proc-macro/invalid-punct-ident-2.stderr @@ -1,5 +1,5 @@ error: proc macro panicked - --> $DIR/invalid-punct-ident-2.rs:19:1 + --> $DIR/invalid-punct-ident-2.rs:11:1 | LL | invalid_ident!(); | ^^^^^^^^^^^^^^^^ diff --git a/src/test/ui/proc-macro/invalid-punct-ident-3.rs b/src/test/ui/proc-macro/invalid-punct-ident-3.rs index aebba341625ae..7c22a56b6fbe9 100644 --- a/src/test/ui/proc-macro/invalid-punct-ident-3.rs +++ b/src/test/ui/proc-macro/invalid-punct-ident-3.rs @@ -1,17 +1,9 @@ // aux-build:invalid-punct-ident.rs -// rustc-env:RUST_BACKTRACE=0 - -// FIXME https://github.com/rust-lang/rust/issues/59998 -// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> "" -// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> "" -// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> "" -// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> "" -// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> "" -// normalize-stderr-test "note: compiler flags.*\n\n" -> "" -// normalize-stderr-test "note: rustc.*running on.*\n\n" -> "" -// normalize-stderr-test "query stack during panic:\n" -> "" -// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> "" -// normalize-stderr-test "end of query stack\n" -> "" +// ignore-stage1 +// only-linux +// +// FIXME: This should be a normal (stage1, all platforms) test in +// src/test/ui/proc-macro once issue #59998 is fixed. #[macro_use] extern crate invalid_punct_ident; diff --git a/src/test/ui/proc-macro/invalid-punct-ident-3.stderr b/src/test/ui/proc-macro/invalid-punct-ident-3.stderr index 541c71d74db53..a0cc5ef6e2d62 100644 --- a/src/test/ui/proc-macro/invalid-punct-ident-3.stderr +++ b/src/test/ui/proc-macro/invalid-punct-ident-3.stderr @@ -1,5 +1,5 @@ error: proc macro panicked - --> $DIR/invalid-punct-ident-3.rs:19:1 + --> $DIR/invalid-punct-ident-3.rs:11:1 | LL | invalid_raw_ident!(); | ^^^^^^^^^^^^^^^^^^^^ From b34c79f8f1ef4d0149ad4bf77e1759c07a9a01a8 Mon Sep 17 00:00:00 2001 From: Nika Layzell Date: Sun, 3 Jul 2022 01:04:31 -0400 Subject: [PATCH 4/5] proc_macro: stop using a remote object handle for Literal This builds on the symbol infrastructure built for `Ident` to replicate the `LitKind` and `Lit` structures in rustc within the `proc_macro` client, allowing literals to be fully created and interacted with from the client thread. Only parsing and subspan operations still require sync RPC. --- .../rustc_expand/src/proc_macro_server.rs | 330 ++++++++---------- library/proc_macro/src/bridge/client.rs | 20 -- library/proc_macro/src/bridge/mod.rs | 76 ++-- library/proc_macro/src/bridge/server.rs | 1 - library/proc_macro/src/lib.rs | 138 ++++++-- 5 files changed, 305 insertions(+), 260 deletions(-) diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index ffd5e8763e36b..0618c9aa084d6 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -14,9 +14,10 @@ use rustc_span::def_id::CrateNum; use rustc_span::symbol::{self, sym, Symbol}; use rustc_span::{BytePos, FileName, Pos, SourceFile, Span}; -use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Ident, Punct, TokenTree}; +use pm::bridge::{ + server, DelimSpan, ExpnGlobals, Group, Ident, LitKind, Literal, Punct, TokenTree, +}; use pm::{Delimiter, Level, LineColumn}; -use std::ascii; use std::ops::Bound; trait FromInternal { @@ -49,9 +50,40 @@ impl ToInternal for Delimiter { } } -impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> - for Vec> -{ +impl FromInternal for LitKind { + fn from_internal(kind: token::LitKind) -> Self { + match kind { + token::Byte => LitKind::Byte, + token::Char => LitKind::Char, + token::Integer => LitKind::Integer, + token::Float => LitKind::Float, + token::Str => LitKind::Str, + token::StrRaw(n) => LitKind::StrRaw(n), + token::ByteStr => LitKind::ByteStr, + token::ByteStrRaw(n) => LitKind::ByteStrRaw(n), + token::Err => LitKind::Err, + token::Bool => unreachable!(), + } + } +} + +impl ToInternal for LitKind { + fn to_internal(self) -> token::LitKind { + match self { + LitKind::Byte => token::Byte, + LitKind::Char => token::Char, + LitKind::Integer => token::Integer, + LitKind::Float => token::Float, + LitKind::Str => token::Str, + LitKind::StrRaw(n) => token::StrRaw(n), + LitKind::ByteStr => token::ByteStr, + LitKind::ByteStrRaw(n) => token::ByteStrRaw(n), + LitKind::Err => token::Err, + } + } +} + +impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec> { fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self { use rustc_ast::token::*; @@ -143,7 +175,14 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }), ]); } - Literal(lit) => trees.push(TokenTree::Literal(self::Literal { lit, span })), + Literal(token::Lit { kind, symbol, suffix }) => { + trees.push(TokenTree::Literal(self::Literal { + kind: FromInternal::from_internal(kind), + symbol, + suffix, + span, + })); + } DocComment(_, attr_style, data) => { let mut escaped = String::new(); for ch in data.as_str().chars() { @@ -199,9 +238,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> } } -impl ToInternal - for (TokenTree, &mut Rustc<'_, '_>) -{ +impl ToInternal for (TokenTree, &mut Rustc<'_, '_>) { fn to_internal(self) -> TokenStream { use rustc_ast::token::*; @@ -221,7 +258,9 @@ impl ToInternal return tokenstream::TokenTree::token(Ident(sym, is_raw), span).into(); } TokenTree::Literal(self::Literal { - lit: token::Lit { kind: token::Integer, symbol, suffix }, + kind: self::LitKind::Integer, + symbol, + suffix, span, }) if symbol.as_str().starts_with('-') => { let minus = BinOp(BinOpToken::Minus); @@ -232,7 +271,9 @@ impl ToInternal return [a, b].into_iter().collect(); } TokenTree::Literal(self::Literal { - lit: token::Lit { kind: token::Float, symbol, suffix }, + kind: self::LitKind::Float, + symbol, + suffix, span, }) if symbol.as_str().starts_with('-') => { let minus = BinOp(BinOpToken::Minus); @@ -242,8 +283,12 @@ impl ToInternal let b = tokenstream::TokenTree::token(float, span); return [a, b].into_iter().collect(); } - TokenTree::Literal(self::Literal { lit, span }) => { - return tokenstream::TokenTree::token(Literal(lit), span).into(); + TokenTree::Literal(self::Literal { kind, symbol, suffix, span }) => { + return tokenstream::TokenTree::token( + TokenKind::lit(kind.to_internal(), symbol, suffix), + span, + ) + .into(); } }; @@ -292,13 +337,6 @@ impl ToInternal for Level { pub struct FreeFunctions; -// FIXME(eddyb) `Literal` should not expose internal `Debug` impls. -#[derive(Clone, Debug)] -pub struct Literal { - lit: token::Lit, - span: Span, -} - pub(crate) struct Rustc<'a, 'b> { ecx: &'a mut ExtCtxt<'b>, def_site: Span, @@ -324,16 +362,11 @@ impl<'a, 'b> Rustc<'a, 'b> { fn sess(&self) -> &ParseSess { self.ecx.parse_sess() } - - fn lit(&mut self, kind: token::LitKind, symbol: Symbol, suffix: Option) -> Literal { - Literal { lit: token::Lit::new(kind, symbol, suffix), span: self.call_site } - } } impl server::Types for Rustc<'_, '_> { type FreeFunctions = FreeFunctions; type TokenStream = TokenStream; - type Literal = Literal; type SourceFile = Lrc; type MultiSpan = Vec; type Diagnostic = Diagnostic; @@ -352,6 +385,94 @@ impl server::FreeFunctions for Rustc<'_, '_> { fn track_path(&mut self, path: &str) { self.sess().file_depinfo.borrow_mut().insert(Symbol::intern(path)); } + + fn literal_from_str(&mut self, s: &str) -> Result, ()> { + let name = FileName::proc_macro_source_code(s); + let mut parser = rustc_parse::new_parser_from_source_str(self.sess(), name, s.to_owned()); + + let first_span = parser.token.span.data(); + let minus_present = parser.eat(&token::BinOp(token::Minus)); + + let lit_span = parser.token.span.data(); + let token::Literal(mut lit) = parser.token.kind else { + return Err(()); + }; + + // Check no comment or whitespace surrounding the (possibly negative) + // literal, or more tokens after it. + if (lit_span.hi.0 - first_span.lo.0) as usize != s.len() { + return Err(()); + } + + if minus_present { + // If minus is present, check no comment or whitespace in between it + // and the literal token. + if first_span.hi.0 != lit_span.lo.0 { + return Err(()); + } + + // Check literal is a kind we allow to be negated in a proc macro token. + match lit.kind { + token::LitKind::Bool + | token::LitKind::Byte + | token::LitKind::Char + | token::LitKind::Str + | token::LitKind::StrRaw(_) + | token::LitKind::ByteStr + | token::LitKind::ByteStrRaw(_) + | token::LitKind::Err => return Err(()), + token::LitKind::Integer | token::LitKind::Float => {} + } + + // Synthesize a new symbol that includes the minus sign. + let symbol = Symbol::intern(&s[..1 + lit.symbol.as_str().len()]); + lit = token::Lit::new(lit.kind, symbol, lit.suffix); + } + let token::Lit { kind, symbol, suffix } = lit; + Ok(Literal { + kind: FromInternal::from_internal(kind), + symbol, + suffix, + span: self.call_site, + }) + } + + fn literal_subspan( + &mut self, + literal: Literal, + start: Bound, + end: Bound, + ) -> Option { + let span = literal.span; + let length = span.hi().to_usize() - span.lo().to_usize(); + + let start = match start { + Bound::Included(lo) => lo, + Bound::Excluded(lo) => lo.checked_add(1)?, + Bound::Unbounded => 0, + }; + + let end = match end { + Bound::Included(hi) => hi.checked_add(1)?, + Bound::Excluded(hi) => hi, + Bound::Unbounded => length, + }; + + // Bounds check the values, preventing addition overflow and OOB spans. + if start > u32::MAX as usize + || end > u32::MAX as usize + || (u32::MAX - start as u32) < span.lo().to_u32() + || (u32::MAX - end as u32) < span.lo().to_u32() + || start >= end + || end > length + { + return None; + } + + let new_lo = span.lo() + BytePos::from_usize(start); + let new_hi = span.lo() + BytePos::from_usize(end); + Some(span.with_lo(new_lo).with_hi(new_hi)) + } } impl server::TokenStream for Rustc<'_, '_> { @@ -429,7 +550,7 @@ impl server::TokenStream for Rustc<'_, '_> { fn from_token_tree( &mut self, - tree: TokenTree, + tree: TokenTree, ) -> Self::TokenStream { (tree, &mut *self).to_internal() } @@ -437,7 +558,7 @@ impl server::TokenStream for Rustc<'_, '_> { fn concat_trees( &mut self, base: Option, - trees: Vec>, + trees: Vec>, ) -> Self::TokenStream { let mut builder = tokenstream::TokenStreamBuilder::new(); if let Some(base) = base { @@ -467,164 +588,11 @@ impl server::TokenStream for Rustc<'_, '_> { fn into_trees( &mut self, stream: Self::TokenStream, - ) -> Vec> { + ) -> Vec> { FromInternal::from_internal((stream, self)) } } -impl server::Literal for Rustc<'_, '_> { - fn from_str(&mut self, s: &str) -> Result { - let name = FileName::proc_macro_source_code(s); - let mut parser = rustc_parse::new_parser_from_source_str(self.sess(), name, s.to_owned()); - - let first_span = parser.token.span.data(); - let minus_present = parser.eat(&token::BinOp(token::Minus)); - - let lit_span = parser.token.span.data(); - let token::Literal(mut lit) = parser.token.kind else { - return Err(()); - }; - - // Check no comment or whitespace surrounding the (possibly negative) - // literal, or more tokens after it. - if (lit_span.hi.0 - first_span.lo.0) as usize != s.len() { - return Err(()); - } - - if minus_present { - // If minus is present, check no comment or whitespace in between it - // and the literal token. - if first_span.hi.0 != lit_span.lo.0 { - return Err(()); - } - - // Check literal is a kind we allow to be negated in a proc macro token. - match lit.kind { - token::LitKind::Bool - | token::LitKind::Byte - | token::LitKind::Char - | token::LitKind::Str - | token::LitKind::StrRaw(_) - | token::LitKind::ByteStr - | token::LitKind::ByteStrRaw(_) - | token::LitKind::Err => return Err(()), - token::LitKind::Integer | token::LitKind::Float => {} - } - - // Synthesize a new symbol that includes the minus sign. - let symbol = Symbol::intern(&s[..1 + lit.symbol.as_str().len()]); - lit = token::Lit::new(lit.kind, symbol, lit.suffix); - } - - Ok(Literal { lit, span: self.call_site }) - } - - fn to_string(&mut self, literal: &Self::Literal) -> String { - literal.lit.to_string() - } - - fn debug_kind(&mut self, literal: &Self::Literal) -> String { - format!("{:?}", literal.lit.kind) - } - - fn symbol(&mut self, literal: &Self::Literal) -> String { - literal.lit.symbol.to_string() - } - - fn suffix(&mut self, literal: &Self::Literal) -> Option { - literal.lit.suffix.as_ref().map(Symbol::to_string) - } - - fn integer(&mut self, n: &str) -> Self::Literal { - self.lit(token::Integer, Symbol::intern(n), None) - } - - fn typed_integer(&mut self, n: &str, kind: &str) -> Self::Literal { - self.lit(token::Integer, Symbol::intern(n), Some(Symbol::intern(kind))) - } - - fn float(&mut self, n: &str) -> Self::Literal { - self.lit(token::Float, Symbol::intern(n), None) - } - - fn f32(&mut self, n: &str) -> Self::Literal { - self.lit(token::Float, Symbol::intern(n), Some(sym::f32)) - } - - fn f64(&mut self, n: &str) -> Self::Literal { - self.lit(token::Float, Symbol::intern(n), Some(sym::f64)) - } - - fn string(&mut self, string: &str) -> Self::Literal { - let quoted = format!("{:?}", string); - assert!(quoted.starts_with('"') && quoted.ends_with('"')); - let symbol = "ed[1..quoted.len() - 1]; - self.lit(token::Str, Symbol::intern(symbol), None) - } - - fn character(&mut self, ch: char) -> Self::Literal { - let quoted = format!("{:?}", ch); - assert!(quoted.starts_with('\'') && quoted.ends_with('\'')); - let symbol = "ed[1..quoted.len() - 1]; - self.lit(token::Char, Symbol::intern(symbol), None) - } - - fn byte_string(&mut self, bytes: &[u8]) -> Self::Literal { - let string = bytes - .iter() - .cloned() - .flat_map(ascii::escape_default) - .map(Into::::into) - .collect::(); - self.lit(token::ByteStr, Symbol::intern(&string), None) - } - - fn span(&mut self, literal: &Self::Literal) -> Self::Span { - literal.span - } - - fn set_span(&mut self, literal: &mut Self::Literal, span: Self::Span) { - literal.span = span; - } - - fn subspan( - &mut self, - literal: &Self::Literal, - start: Bound, - end: Bound, - ) -> Option { - let span = literal.span; - let length = span.hi().to_usize() - span.lo().to_usize(); - - let start = match start { - Bound::Included(lo) => lo, - Bound::Excluded(lo) => lo.checked_add(1)?, - Bound::Unbounded => 0, - }; - - let end = match end { - Bound::Included(hi) => hi.checked_add(1)?, - Bound::Excluded(hi) => hi, - Bound::Unbounded => length, - }; - - // Bounds check the values, preventing addition overflow and OOB spans. - if start > u32::MAX as usize - || end > u32::MAX as usize - || (u32::MAX - start as u32) < span.lo().to_u32() - || (u32::MAX - end as u32) < span.lo().to_u32() - || start >= end - || end > length - { - return None; - } - - let new_lo = span.lo() + BytePos::from_usize(start); - let new_hi = span.lo() + BytePos::from_usize(end); - Some(span.with_lo(new_lo).with_hi(new_hi)) - } -} - impl server::SourceFile for Rustc<'_, '_> { fn eq(&mut self, file1: &Self::SourceFile, file2: &Self::SourceFile) -> bool { Lrc::ptr_eq(file1, file2) diff --git a/library/proc_macro/src/bridge/client.rs b/library/proc_macro/src/bridge/client.rs index 6d48faf5413e8..1516f084ab8b6 100644 --- a/library/proc_macro/src/bridge/client.rs +++ b/library/proc_macro/src/bridge/client.rs @@ -175,7 +175,6 @@ define_handles! { 'owned: FreeFunctions, TokenStream, - Literal, SourceFile, MultiSpan, Diagnostic, @@ -196,25 +195,6 @@ impl Clone for TokenStream { } } -impl Clone for Literal { - fn clone(&self) -> Self { - self.clone() - } -} - -impl fmt::Debug for Literal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Literal") - // format the kind without quotes, as in `kind: Float` - .field("kind", &format_args!("{}", &self.debug_kind())) - .field("symbol", &self.symbol()) - // format `Some("...")` on one line even in {:#?} mode - .field("suffix", &format_args!("{:?}", &self.suffix())) - .field("span", &self.span()) - .finish() - } -} - impl Clone for SourceFile { fn clone(&self) -> Self { self.clone() diff --git a/library/proc_macro/src/bridge/mod.rs b/library/proc_macro/src/bridge/mod.rs index 1015c0d725980..712b8c637973e 100644 --- a/library/proc_macro/src/bridge/mod.rs +++ b/library/proc_macro/src/bridge/mod.rs @@ -56,6 +56,8 @@ macro_rules! with_api { fn drop($self: $S::FreeFunctions); fn track_env_var(var: &str, value: Option<&str>); fn track_path(path: &str); + fn literal_from_str(s: &str) -> Result, ()>; + fn literal_subspan(lit: Literal<$S::Span, $S::Symbol>, start: Bound, end: Bound) -> Option<$S::Span>; }, TokenStream { fn drop($self: $S::TokenStream); @@ -65,11 +67,11 @@ macro_rules! with_api { fn from_str(src: &str) -> $S::TokenStream; fn to_string($self: &$S::TokenStream) -> String; fn from_token_tree( - tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>, + tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol>, ) -> $S::TokenStream; fn concat_trees( base: Option<$S::TokenStream>, - trees: Vec>, + trees: Vec>, ) -> $S::TokenStream; fn concat_streams( base: Option<$S::TokenStream>, @@ -77,31 +79,7 @@ macro_rules! with_api { ) -> $S::TokenStream; fn into_trees( $self: $S::TokenStream - ) -> Vec>; - }, - Literal { - fn drop($self: $S::Literal); - fn clone($self: &$S::Literal) -> $S::Literal; - fn from_str(s: &str) -> Result<$S::Literal, ()>; - fn to_string($self: &$S::Literal) -> String; - fn debug_kind($self: &$S::Literal) -> String; - fn symbol($self: &$S::Literal) -> String; - fn suffix($self: &$S::Literal) -> Option; - fn integer(n: &str) -> $S::Literal; - fn typed_integer(n: &str, kind: &str) -> $S::Literal; - fn float(n: &str) -> $S::Literal; - fn f32(n: &str) -> $S::Literal; - fn f64(n: &str) -> $S::Literal; - fn string(string: &str) -> $S::Literal; - fn character(ch: char) -> $S::Literal; - fn byte_string(bytes: &[u8]) -> $S::Literal; - fn span($self: &$S::Literal) -> $S::Span; - fn set_span($self: &mut $S::Literal, span: $S::Span); - fn subspan( - $self: &$S::Literal, - start: Bound, - end: Bound, - ) -> Option<$S::Span>; + ) -> Vec>; }, SourceFile { fn drop($self: $S::SourceFile); @@ -332,6 +310,7 @@ mark_noop! { u8, usize, Delimiter, + LitKind, Level, LineColumn, Spacing, @@ -361,6 +340,33 @@ rpc_encode_decode!( } ); +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum LitKind { + Byte, + Char, + Integer, + Float, + Str, + StrRaw(u8), + ByteStr, + ByteStrRaw(u8), + Err, +} + +rpc_encode_decode!( + enum LitKind { + Byte, + Char, + Integer, + Float, + Str, + StrRaw(n), + ByteStr, + ByteStrRaw(n), + Err, + } +); + macro_rules! mark_compound { (struct $name:ident <$($T:ident),+> { $($field:ident),* $(,)? }) => { impl<$($T: Mark),+> Mark for $name <$($T),+> { @@ -477,16 +483,26 @@ pub struct Ident { compound_traits!(struct Ident { sym, is_raw, span }); +#[derive(Clone, Eq, PartialEq)] +pub struct Literal { + pub kind: LitKind, + pub symbol: Symbol, + pub suffix: Option, + pub span: Span, +} + +compound_traits!(struct Literal { kind, symbol, suffix, span }); + #[derive(Clone)] -pub enum TokenTree { +pub enum TokenTree { Group(Group), Punct(Punct), Ident(Ident), - Literal(Literal), + Literal(Literal), } compound_traits!( - enum TokenTree { + enum TokenTree { Group(tt), Punct(tt), Ident(tt), diff --git a/library/proc_macro/src/bridge/server.rs b/library/proc_macro/src/bridge/server.rs index 53668ecfead78..d46e325951d72 100644 --- a/library/proc_macro/src/bridge/server.rs +++ b/library/proc_macro/src/bridge/server.rs @@ -8,7 +8,6 @@ use super::client::HandleStore; pub trait Types { type FreeFunctions: 'static; type TokenStream: 'static + Clone; - type Literal: 'static + Clone; type SourceFile: 'static + Clone; type MultiSpan: 'static; type Diagnostic: 'static; diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index 911deaef8c978..372bd3ac2141e 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -215,12 +215,7 @@ pub use quote::{quote, quote_span}; fn tree_to_bridge_tree( tree: TokenTree, -) -> bridge::TokenTree< - bridge::client::TokenStream, - bridge::client::Span, - bridge::client::Symbol, - bridge::client::Literal, -> { +) -> bridge::TokenTree { match tree { TokenTree::Group(tt) => bridge::TokenTree::Group(tt.0), TokenTree::Punct(tt) => bridge::TokenTree::Punct(tt.0), @@ -245,7 +240,6 @@ struct ConcatTreesHelper { bridge::client::TokenStream, bridge::client::Span, bridge::client::Symbol, - bridge::client::Literal, >, >, } @@ -372,7 +366,6 @@ pub mod token_stream { bridge::client::TokenStream, bridge::client::Span, bridge::client::Symbol, - bridge::client::Literal, >, >, ); @@ -1147,7 +1140,7 @@ impl fmt::Debug for Ident { /// Boolean literals like `true` and `false` do not belong here, they are `Ident`s. #[derive(Clone)] #[stable(feature = "proc_macro_lib2", since = "1.29.0")] -pub struct Literal(bridge::client::Literal); +pub struct Literal(bridge::Literal); macro_rules! suffixed_int_literals { ($($name:ident => $kind:ident,)*) => ($( @@ -1164,7 +1157,12 @@ macro_rules! suffixed_int_literals { /// below. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn $name(n: $kind) -> Literal { - Literal(bridge::client::Literal::typed_integer(&n.to_string(), stringify!($kind))) + Literal(bridge::Literal { + kind: bridge::LitKind::Integer, + symbol: bridge::client::Symbol::new(&n.to_string()), + suffix: Some(bridge::client::Symbol::new(stringify!($kind))), + span: Span::call_site().0, + }) } )*) } @@ -1186,12 +1184,26 @@ macro_rules! unsuffixed_int_literals { /// below. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn $name(n: $kind) -> Literal { - Literal(bridge::client::Literal::integer(&n.to_string())) + Literal(bridge::Literal { + kind: bridge::LitKind::Integer, + symbol: bridge::client::Symbol::new(&n.to_string()), + suffix: None, + span: Span::call_site().0, + }) } )*) } impl Literal { + fn new(kind: bridge::LitKind, value: &str, suffix: Option<&str>) -> Self { + Literal(bridge::Literal { + kind, + symbol: bridge::client::Symbol::new(value), + suffix: suffix.map(bridge::client::Symbol::new), + span: Span::call_site().0, + }) + } + suffixed_int_literals! { u8_suffixed => u8, u16_suffixed => u16, @@ -1243,7 +1255,7 @@ impl Literal { if !repr.contains('.') { repr.push_str(".0"); } - Literal(bridge::client::Literal::float(&repr)) + Literal::new(bridge::LitKind::Float, &repr, None) } /// Creates a new suffixed floating-point literal. @@ -1264,7 +1276,7 @@ impl Literal { if !n.is_finite() { panic!("Invalid float literal {n}"); } - Literal(bridge::client::Literal::f32(&n.to_string())) + Literal::new(bridge::LitKind::Float, &n.to_string(), Some("f32")) } /// Creates a new unsuffixed floating-point literal. @@ -1288,7 +1300,7 @@ impl Literal { if !repr.contains('.') { repr.push_str(".0"); } - Literal(bridge::client::Literal::float(&repr)) + Literal::new(bridge::LitKind::Float, &repr, None) } /// Creates a new suffixed floating-point literal. @@ -1309,37 +1321,49 @@ impl Literal { if !n.is_finite() { panic!("Invalid float literal {n}"); } - Literal(bridge::client::Literal::f64(&n.to_string())) + Literal::new(bridge::LitKind::Float, &n.to_string(), Some("f64")) } /// String literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn string(string: &str) -> Literal { - Literal(bridge::client::Literal::string(string)) + let quoted = format!("{:?}", string); + assert!(quoted.starts_with('"') && quoted.ends_with('"')); + let symbol = "ed[1..quoted.len() - 1]; + Literal::new(bridge::LitKind::Str, symbol, None) } /// Character literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn character(ch: char) -> Literal { - Literal(bridge::client::Literal::character(ch)) + let quoted = format!("{:?}", ch); + assert!(quoted.starts_with('\'') && quoted.ends_with('\'')); + let symbol = "ed[1..quoted.len() - 1]; + Literal::new(bridge::LitKind::Char, symbol, None) } /// Byte string literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn byte_string(bytes: &[u8]) -> Literal { - Literal(bridge::client::Literal::byte_string(bytes)) + let string = bytes + .iter() + .cloned() + .flat_map(std::ascii::escape_default) + .map(Into::::into) + .collect::(); + Literal::new(bridge::LitKind::ByteStr, &string, None) } /// Returns the span encompassing this literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn span(&self) -> Span { - Span(self.0.span()) + Span(self.0.span) } /// Configures the span associated for this literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn set_span(&mut self, span: Span) { - self.0.set_span(span.0); + self.0.span = span.0; } /// Returns a `Span` that is a subset of `self.span()` containing only the @@ -1355,7 +1379,55 @@ impl Literal { // was 'c' or whether it was '\u{63}'. #[unstable(feature = "proc_macro_span", issue = "54725")] pub fn subspan>(&self, range: R) -> Option { - self.0.subspan(range.start_bound().cloned(), range.end_bound().cloned()).map(Span) + bridge::client::FreeFunctions::literal_subspan( + self.0.clone(), + range.start_bound().cloned(), + range.end_bound().cloned(), + ) + .map(Span) + } + + fn with_symbol_and_suffix(&self, f: impl FnOnce(&str, &str) -> R) -> R { + self.0.symbol.with(|symbol| match self.0.suffix { + Some(suffix) => suffix.with(|suffix| f(symbol, suffix)), + None => f(symbol, ""), + }) + } + + /// Invokes the callback with a `&[&str]` consisting of each part of the + /// literal's representation. This is done to allow the `ToString` and + /// `Display` implementations to borrow references to symbol values, and + /// both be optimized to reduce overhead. + fn with_stringify_parts(&self, f: impl FnOnce(&[&str]) -> R) -> R { + /// Returns a string containing exactly `num` '#' characters. + /// Uses a 256-character source string literal which is always safe to + /// index with a `u8` index. + fn get_hashes_str(num: u8) -> &'static str { + const HASHES: &str = "\ + ################################################################\ + ################################################################\ + ################################################################\ + ################################################################\ + "; + const _: () = assert!(HASHES.len() == 256); + &HASHES[..num as usize] + } + + self.with_symbol_and_suffix(|symbol, suffix| match self.0.kind { + bridge::LitKind::Byte => f(&["b'", symbol, "'", suffix]), + bridge::LitKind::Char => f(&["'", symbol, "'", suffix]), + bridge::LitKind::Str => f(&["\"", symbol, "\"", suffix]), + bridge::LitKind::StrRaw(n) => { + let hashes = get_hashes_str(n); + f(&["r", hashes, "\"", symbol, "\"", hashes, suffix]) + } + bridge::LitKind::ByteStr => f(&["b\"", symbol, "\"", suffix]), + bridge::LitKind::ByteStrRaw(n) => { + let hashes = get_hashes_str(n); + f(&["br", hashes, "\"", symbol, "\"", hashes, suffix]) + } + _ => f(&[symbol, suffix]), + }) } } @@ -1374,19 +1446,17 @@ impl FromStr for Literal { type Err = LexError; fn from_str(src: &str) -> Result { - match bridge::client::Literal::from_str(src) { + match bridge::client::FreeFunctions::literal_from_str(src) { Ok(literal) => Ok(Literal(literal)), Err(()) => Err(LexError), } } } -// N.B., the bridge only provides `to_string`, implement `fmt::Display` -// based on it (the reverse of the usual relationship between the two). -#[stable(feature = "proc_macro_lib", since = "1.15.0")] +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl ToString for Literal { fn to_string(&self) -> String { - self.0.to_string() + self.with_stringify_parts(|parts| parts.concat()) } } @@ -1395,14 +1465,26 @@ impl ToString for Literal { #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.to_string()) + self.with_stringify_parts(|parts| { + for part in parts { + fmt::Display::fmt(part, f)?; + } + Ok(()) + }) } } #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Debug for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) + f.debug_struct("Literal") + // format the kind on one line even in {:#?} mode + .field("kind", &format_args!("{:?}", &self.0.kind)) + .field("symbol", &self.0.symbol) + // format `Some("...")` on one line even in {:#?} mode + .field("suffix", &format_args!("{:?}", &self.0.suffix)) + .field("span", &self.0.span) + .finish() } } From c4acac64432aa6026ae24422abfee7c3014576e3 Mon Sep 17 00:00:00 2001 From: Nika Layzell Date: Sat, 9 Jul 2022 11:34:06 -0400 Subject: [PATCH 5/5] proc_macro: Move subspan to be a method on Span in the bridge This method is still only used for Literal::subspan, however the implementation only depends on the Span component, so it is simpler and more efficient for now to pass down only the information that is needed. In the future, if more information about the Literal is required in the implementation (e.g. to validate that spans line up as expected with source text), that extra information can be added back with extra arguments. --- .../rustc_expand/src/proc_macro_server.rs | 73 +++++++++---------- library/proc_macro/src/bridge/mod.rs | 2 +- library/proc_macro/src/lib.rs | 7 +- 3 files changed, 38 insertions(+), 44 deletions(-) diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 0618c9aa084d6..176c77ca6edc6 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -436,43 +436,6 @@ impl server::FreeFunctions for Rustc<'_, '_> { span: self.call_site, }) } - - fn literal_subspan( - &mut self, - literal: Literal, - start: Bound, - end: Bound, - ) -> Option { - let span = literal.span; - let length = span.hi().to_usize() - span.lo().to_usize(); - - let start = match start { - Bound::Included(lo) => lo, - Bound::Excluded(lo) => lo.checked_add(1)?, - Bound::Unbounded => 0, - }; - - let end = match end { - Bound::Included(hi) => hi.checked_add(1)?, - Bound::Excluded(hi) => hi, - Bound::Unbounded => length, - }; - - // Bounds check the values, preventing addition overflow and OOB spans. - if start > u32::MAX as usize - || end > u32::MAX as usize - || (u32::MAX - start as u32) < span.lo().to_u32() - || (u32::MAX - end as u32) < span.lo().to_u32() - || start >= end - || end > length - { - return None; - } - - let new_lo = span.lo() + BytePos::from_usize(start); - let new_hi = span.lo() + BytePos::from_usize(end); - Some(span.with_lo(new_lo).with_hi(new_hi)) - } } impl server::TokenStream for Rustc<'_, '_> { @@ -697,6 +660,42 @@ impl server::Span for Rustc<'_, '_> { Some(first.to(second)) } + fn subspan( + &mut self, + span: Self::Span, + start: Bound, + end: Bound, + ) -> Option { + let length = span.hi().to_usize() - span.lo().to_usize(); + + let start = match start { + Bound::Included(lo) => lo, + Bound::Excluded(lo) => lo.checked_add(1)?, + Bound::Unbounded => 0, + }; + + let end = match end { + Bound::Included(hi) => hi.checked_add(1)?, + Bound::Excluded(hi) => hi, + Bound::Unbounded => length, + }; + + // Bounds check the values, preventing addition overflow and OOB spans. + if start > u32::MAX as usize + || end > u32::MAX as usize + || (u32::MAX - start as u32) < span.lo().to_u32() + || (u32::MAX - end as u32) < span.lo().to_u32() + || start >= end + || end > length + { + return None; + } + + let new_lo = span.lo() + BytePos::from_usize(start); + let new_hi = span.lo() + BytePos::from_usize(end); + Some(span.with_lo(new_lo).with_hi(new_hi)) + } + fn resolved_at(&mut self, span: Self::Span, at: Self::Span) -> Self::Span { span.with_ctxt(at.ctxt()) } diff --git a/library/proc_macro/src/bridge/mod.rs b/library/proc_macro/src/bridge/mod.rs index 712b8c637973e..5cde966bf173d 100644 --- a/library/proc_macro/src/bridge/mod.rs +++ b/library/proc_macro/src/bridge/mod.rs @@ -57,7 +57,6 @@ macro_rules! with_api { fn track_env_var(var: &str, value: Option<&str>); fn track_path(path: &str); fn literal_from_str(s: &str) -> Result, ()>; - fn literal_subspan(lit: Literal<$S::Span, $S::Symbol>, start: Bound, end: Bound) -> Option<$S::Span>; }, TokenStream { fn drop($self: $S::TokenStream); @@ -114,6 +113,7 @@ macro_rules! with_api { fn before($self: $S::Span) -> $S::Span; fn after($self: $S::Span) -> $S::Span; fn join($self: $S::Span, other: $S::Span) -> Option<$S::Span>; + fn subspan($self: $S::Span, start: Bound, end: Bound) -> Option<$S::Span>; fn resolved_at($self: $S::Span, at: $S::Span) -> $S::Span; fn source_text($self: $S::Span) -> Option; fn save_span($self: $S::Span) -> usize; diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index 372bd3ac2141e..5cf16bdd08cdd 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -1379,12 +1379,7 @@ impl Literal { // was 'c' or whether it was '\u{63}'. #[unstable(feature = "proc_macro_span", issue = "54725")] pub fn subspan>(&self, range: R) -> Option { - bridge::client::FreeFunctions::literal_subspan( - self.0.clone(), - range.start_bound().cloned(), - range.end_bound().cloned(), - ) - .map(Span) + self.0.span.subspan(range.start_bound().cloned(), range.end_bound().cloned()).map(Span) } fn with_symbol_and_suffix(&self, f: impl FnOnce(&str, &str) -> R) -> R {