From 687d56fdafb3988296d7ff0a1810e31d27b72929 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Mon, 26 Aug 2024 11:11:13 -0400 Subject: [PATCH 01/18] Reserve prefix lifetimes too --- compiler/rustc_lexer/src/lib.rs | 26 ++++++++++++++++--------- compiler/rustc_parse/src/lexer/mod.rs | 10 ++++++++++ tests/ui/lexer/prefixed-lifetime.rs | 10 ++++++++++ tests/ui/lexer/prefixed-lifetime.stderr | 14 +++++++++++++ 4 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 tests/ui/lexer/prefixed-lifetime.rs create mode 100644 tests/ui/lexer/prefixed-lifetime.stderr diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 31fdd2d7cec70..be3e151ea61f9 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -91,6 +91,12 @@ pub enum TokenKind { /// tokens. UnknownPrefix, + /// An unknown prefix in a lifetime, like `'foo#`. + /// + /// Note that like above, only the `'` and prefix are included in the token + /// and not the separator. + UnknownPrefixLifetime, + /// Similar to the above, but *always* an error on every edition. This is used /// for emoji identifier recovery, as those are not meant to be ever accepted. InvalidPrefix, @@ -688,15 +694,17 @@ impl Cursor<'_> { self.bump(); self.eat_while(is_id_continue); - // Check if after skipping literal contents we've met a closing - // single quote (which means that user attempted to create a - // string with single quotes). - if self.first() == '\'' { - self.bump(); - let kind = Char { terminated: true }; - Literal { kind, suffix_start: self.pos_within_token() } - } else { - Lifetime { starts_with_number } + match self.first() { + // Check if after skipping literal contents we've met a closing + // single quote (which means that user attempted to create a + // string with single quotes). + '\'' => { + self.bump(); + let kind = Char { terminated: true }; + Literal { kind, suffix_start: self.pos_within_token() } + } + '#' if !starts_with_number => UnknownPrefixLifetime, + _ => Lifetime { starts_with_number }, } } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index f30939093c2ec..5d2d5a6010f6f 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -205,6 +205,16 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.report_unknown_prefix(start); self.ident(start) } + rustc_lexer::TokenKind::UnknownPrefixLifetime => { + self.report_unknown_prefix(start); + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix // Do not recover an identifier with emoji if the codepoint is a confusable diff --git a/tests/ui/lexer/prefixed-lifetime.rs b/tests/ui/lexer/prefixed-lifetime.rs new file mode 100644 index 0000000000000..6191b97d57697 --- /dev/null +++ b/tests/ui/lexer/prefixed-lifetime.rs @@ -0,0 +1,10 @@ +//@ edition: 2021 + +macro_rules! w { + ($($tt:tt)*) => {}; +} + +w!('foo#lifetime); +//~^ ERROR prefix `'foo` is unknown + +fn main() {} diff --git a/tests/ui/lexer/prefixed-lifetime.stderr b/tests/ui/lexer/prefixed-lifetime.stderr new file mode 100644 index 0000000000000..39e8b5a2a570a --- /dev/null +++ b/tests/ui/lexer/prefixed-lifetime.stderr @@ -0,0 +1,14 @@ +error: prefix `'foo` is unknown + --> $DIR/prefixed-lifetime.rs:7:4 + | +LL | w!('foo#lifetime); + | ^^^^ unknown prefix + | + = note: prefixed identifiers and literals are reserved since Rust 2021 +help: consider inserting whitespace here + | +LL | w!('foo #lifetime); + | + + +error: aborting due to 1 previous error + From d3a1e5260e08f55b78219bdd81b869e33f1a924a Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Mon, 26 Aug 2024 11:26:36 -0400 Subject: [PATCH 02/18] Format lexer --- compiler/rustc_lexer/src/lib.rs | 2 +- compiler/rustc_parse/src/lexer/mod.rs | 41 ++++++++++++++------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index be3e151ea61f9..6561e416cfa4b 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -92,7 +92,7 @@ pub enum TokenKind { UnknownPrefix, /// An unknown prefix in a lifetime, like `'foo#`. - /// + /// /// Note that like above, only the `'` and prefix are included in the token /// and not the separator. UnknownPrefixLifetime, diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 5d2d5a6010f6f..c0731cf31c333 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -188,9 +188,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { preceded_by_whitespace = true; continue; } - rustc_lexer::TokenKind::Ident => { - self.ident(start) - } + rustc_lexer::TokenKind::Ident => self.ident(start), rustc_lexer::TokenKind::RawIdent => { let sym = nfc_normalize(self.str_from(start + BytePos(2))); let span = self.mk_sp(start, self.pos); @@ -215,20 +213,21 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let ident = Symbol::intern(lifetime_name); token::Lifetime(ident, IdentIsRaw::No) } - rustc_lexer::TokenKind::InvalidIdent - | rustc_lexer::TokenKind::InvalidPrefix + rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix // Do not recover an identifier with emoji if the codepoint is a confusable // with a recoverable substitution token, like `➖`. - if !UNICODE_ARRAY - .iter() - .any(|&(c, _, _)| { - let sym = self.str_from(start); - sym.chars().count() == 1 && c == sym.chars().next().unwrap() - }) => + if !UNICODE_ARRAY.iter().any(|&(c, _, _)| { + let sym = self.str_from(start); + sym.chars().count() == 1 && c == sym.chars().next().unwrap() + }) => { let sym = nfc_normalize(self.str_from(start)); let span = self.mk_sp(start, self.pos); - self.psess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default() + self.psess + .bad_unicode_identifiers + .borrow_mut() + .entry(sym) + .or_default() .push(span); token::Ident(sym, IdentIsRaw::No) } @@ -259,9 +258,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); if string == "_" { - self - .dcx() - .emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) }); + self.dcx().emit_err(errors::UnderscoreLiteralSuffix { + span: self.mk_sp(suffix_start, self.pos), + }); None } else { Some(Symbol::intern(string)) @@ -279,7 +278,8 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); if starts_with_number { let span = self.mk_sp(start, self.pos); - self.dcx().struct_err("lifetimes cannot start with a number") + self.dcx() + .struct_err("lifetimes cannot start with a number") .with_span(span) .stash(span, StashKey::LifetimeIsChar); } @@ -341,16 +341,19 @@ impl<'psess, 'src> StringReader<'psess, 'src> { // first remove compound tokens like `<<` from `rustc_lexer`, and then add // fancier error recovery to it, as there will be less overall work to do this // way. - let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1); + let (token, sugg) = + unicode_chars::check_for_substitution(self, start, c, repeats + 1); self.dcx().emit_err(errors::UnknownTokenStart { span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())), escaped: escaped_char(c), sugg, - null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None}, + null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None }, repeat: if repeats > 0 { swallow_next_invalid = repeats; Some(errors::UnknownTokenRepeat { repeats }) - } else {None} + } else { + None + }, }); if let Some(token) = token { From e6bf16c474679cbb2d50731c860ce7de7c18b4ed Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Mon, 26 Aug 2024 11:56:25 -0400 Subject: [PATCH 03/18] Initial support for raw lifetimes --- compiler/rustc_ast/src/mut_visit.rs | 4 +- compiler/rustc_ast/src/token.rs | 18 +++---- compiler/rustc_ast/src/tokenstream.rs | 4 +- compiler/rustc_ast_pretty/src/pprust/state.rs | 13 +++-- compiler/rustc_expand/src/mbe/macro_parser.rs | 6 ++- compiler/rustc_expand/src/mbe/transcribe.rs | 4 +- .../rustc_expand/src/proc_macro_server.rs | 9 ++-- compiler/rustc_lexer/src/lib.rs | 11 ++++- compiler/rustc_lint/messages.ftl | 4 ++ .../rustc_lint/src/context/diagnostics.rs | 4 ++ compiler/rustc_lint/src/lints.rs | 9 ++++ compiler/rustc_lint_defs/src/lib.rs | 2 + compiler/rustc_parse/src/lexer/mod.rs | 48 +++++++++++++++++-- compiler/rustc_parse/src/parser/expr.rs | 6 +-- compiler/rustc_parse/src/parser/mod.rs | 2 +- .../rustc_parse/src/parser/nonterminal.rs | 6 +-- compiler/rustc_parse/src/parser/pat.rs | 6 +-- compiler/rustc_parse/src/parser/ty.rs | 7 +-- 18 files changed, 123 insertions(+), 40 deletions(-) diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs index 8a66894a35603..1b4d3dcc12ff0 100644 --- a/compiler/rustc_ast/src/mut_visit.rs +++ b/compiler/rustc_ast/src/mut_visit.rs @@ -752,7 +752,7 @@ fn visit_lazy_tts(vis: &mut T, lazy_tts: &mut Option(vis: &mut T, t: &mut Token) { let Token { kind, span } = t; match kind { - token::Ident(name, _ /*raw*/) | token::Lifetime(name) => { + token::Ident(name, _ /* raw */) | token::Lifetime(name, _ /* raw */) => { let mut ident = Ident::new(*name, *span); vis.visit_ident(&mut ident); *name = ident.name; @@ -762,7 +762,7 @@ pub fn visit_token(vis: &mut T, t: &mut Token) { token::NtIdent(ident, _is_raw) => { vis.visit_ident(ident); } - token::NtLifetime(ident) => { + token::NtLifetime(ident, _is_raw) => { vis.visit_ident(ident); } token::Interpolated(nt) => { diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 43d87b96ead90..d09522679af9c 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -331,11 +331,11 @@ pub enum TokenKind { /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers. /// It's recommended to use `Token::(lifetime,uninterpolate,uninterpolated_span)` to /// treat regular and interpolated lifetime identifiers in the same way. - Lifetime(Symbol), + Lifetime(Symbol, IdentIsRaw), /// This identifier (and its span) is the lifetime passed to the /// declarative macro. The span in the surrounding `Token` is the span of /// the `lifetime` metavariable in the macro's RHS. - NtLifetime(Ident), + NtLifetime(Ident, IdentIsRaw), /// An embedded AST node, as produced by a macro. This only exists for /// historical reasons. We'd like to get rid of it, for multiple reasons. @@ -458,7 +458,7 @@ impl Token { /// if they keep spans or perform edition checks. pub fn uninterpolated_span(&self) -> Span { match self.kind { - NtIdent(ident, _) | NtLifetime(ident) => ident.span, + NtIdent(ident, _) | NtLifetime(ident, _) => ident.span, Interpolated(ref nt) => nt.use_span(), _ => self.span, } @@ -646,7 +646,9 @@ impl Token { pub fn uninterpolate(&self) -> Cow<'_, Token> { match self.kind { NtIdent(ident, is_raw) => Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span)), - NtLifetime(ident) => Cow::Owned(Token::new(Lifetime(ident.name), ident.span)), + NtLifetime(ident, is_raw) => { + Cow::Owned(Token::new(Lifetime(ident.name, is_raw), ident.span)) + } _ => Cow::Borrowed(self), } } @@ -664,11 +666,11 @@ impl Token { /// Returns a lifetime identifier if this token is a lifetime. #[inline] - pub fn lifetime(&self) -> Option { + pub fn lifetime(&self) -> Option<(Ident, IdentIsRaw)> { // We avoid using `Token::uninterpolate` here because it's slow. match self.kind { - Lifetime(name) => Some(Ident::new(name, self.span)), - NtLifetime(ident) => Some(ident), + Lifetime(name, is_raw) => Some((Ident::new(name, self.span), is_raw)), + NtLifetime(ident, is_raw) => Some((ident, is_raw)), _ => None, } } @@ -850,7 +852,7 @@ impl Token { _ => return None, }, SingleQuote => match joint.kind { - Ident(name, IdentIsRaw::No) => Lifetime(Symbol::intern(&format!("'{name}"))), + Ident(name, is_raw) => Lifetime(Symbol::intern(&format!("'{name}")), is_raw), _ => return None, }, diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 057b4455dca89..fc1dd2caf681a 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -482,11 +482,11 @@ impl TokenStream { token::NtIdent(ident, is_raw) => { TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), spacing) } - token::NtLifetime(ident) => TokenTree::Delimited( + token::NtLifetime(ident, is_raw) => TokenTree::Delimited( DelimSpan::from_single(token.span), DelimSpacing::new(Spacing::JointHidden, spacing), Delimiter::Invisible, - TokenStream::token_alone(token::Lifetime(ident.name), ident.span), + TokenStream::token_alone(token::Lifetime(ident.name, is_raw), ident.span), ), token::Interpolated(ref nt) => TokenTree::Delimited( DelimSpan::from_single(token.span), diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index c7ff39d23ed26..3b1449d9a9113 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -11,7 +11,9 @@ use std::borrow::Cow; use ast::TraitBoundModifiers; use rustc_ast::attr::AttrIdGenerator; use rustc_ast::ptr::P; -use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind}; +use rustc_ast::token::{ + self, BinOpToken, CommentKind, Delimiter, IdentIsRaw, Nonterminal, Token, TokenKind, +}; use rustc_ast::tokenstream::{Spacing, TokenStream, TokenTree}; use rustc_ast::util::classify; use rustc_ast::util::comments::{Comment, CommentStyle}; @@ -947,8 +949,13 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere token::NtIdent(ident, is_raw) => { IdentPrinter::for_ast_ident(ident, is_raw.into()).to_string().into() } - token::Lifetime(name) => name.to_string().into(), - token::NtLifetime(ident) => ident.name.to_string().into(), + + token::Lifetime(name, IdentIsRaw::No) + | token::NtLifetime(Ident { name, .. }, IdentIsRaw::No) => name.to_string().into(), + token::Lifetime(name, IdentIsRaw::Yes) + | token::NtLifetime(Ident { name, .. }, IdentIsRaw::Yes) => { + format!("'r#{}", &name.as_str()[1..]).into() + } /* Other */ token::DocComment(comment_kind, attr_style, data) => { diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 9011d02da3358..501a2417fcf31 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -398,8 +398,10 @@ pub(crate) enum NamedMatch { fn token_name_eq(t1: &Token, t2: &Token) -> bool { if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { ident1.name == ident2.name && is_raw1 == is_raw2 - } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { - ident1.name == ident2.name + } else if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = + (t1.lifetime(), t2.lifetime()) + { + ident1.name == ident2.name && is_raw1 == is_raw2 } else { t1.kind == t2.kind } diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index b06910595bb25..39489a8df1bed 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -283,9 +283,9 @@ pub(super) fn transcribe<'a>( let kind = token::NtIdent(*ident, *is_raw); TokenTree::token_alone(kind, sp) } - MatchedSingle(ParseNtResult::Lifetime(ident)) => { + MatchedSingle(ParseNtResult::Lifetime(ident, is_raw)) => { marker.visit_span(&mut sp); - let kind = token::NtLifetime(*ident); + let kind = token::NtLifetime(*ident, *is_raw); TokenTree::token_alone(kind, sp) } MatchedSingle(ParseNtResult::Nt(nt)) => { diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 4ff5da1a4bdfa..5798bcedc2263 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -229,15 +229,16 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec { + Lifetime(name, is_raw) => { let ident = symbol::Ident::new(name, span).without_first_quote(); trees.extend([ TokenTree::Punct(Punct { ch: b'\'', joint: true, span }), - TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }), + TokenTree::Ident(Ident { sym: ident.name, is_raw: is_raw.into(), span }), ]); } - NtLifetime(ident) => { - let stream = TokenStream::token_alone(token::Lifetime(ident.name), ident.span); + NtLifetime(ident, is_raw) => { + let stream = + TokenStream::token_alone(token::Lifetime(ident.name, is_raw), ident.span); trees.push(TokenTree::Group(Group { delimiter: pm::Delimiter::None, stream: Some(stream), diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 6561e416cfa4b..a5ec2c10a4440 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -97,6 +97,10 @@ pub enum TokenKind { /// and not the separator. UnknownPrefixLifetime, + /// `'r`, which is parsed into `'r#lt` in edition 2021 and above. Before + /// edition 2021, we warn that it's a reserved syntax. + RawLifetimePrefix, + /// Similar to the above, but *always* an error on every edition. This is used /// for emoji identifier recovery, as those are not meant to be ever accepted. InvalidPrefix, @@ -683,9 +687,14 @@ impl Cursor<'_> { return Literal { kind, suffix_start }; } + if self.first() == 'r' && self.second() == '#' && is_id_start(self.third()) { + // Eat "r" character. + self.bump(); + return RawLifetimePrefix; + } + // Either a lifetime or a character literal with // length greater than 1. - let starts_with_number = self.first().is_ascii_digit(); // Skip the literal contents. diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index 08a50050a36d5..4acc528c082f1 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -700,6 +700,10 @@ lint_range_endpoint_out_of_range = range endpoint is out of range for `{$ty}` lint_range_use_inclusive_range = use an inclusive range instead +lint_raw_prefix = prefix `'r` is reserved + .label = reserved prefix + .suggestion = insert whitespace here to avoid this being parsed as a prefix in Rust 2021 + lint_reason_must_be_string_literal = reason must be a string literal lint_reason_must_come_last = reason in lint attribute must come last diff --git a/compiler/rustc_lint/src/context/diagnostics.rs b/compiler/rustc_lint/src/context/diagnostics.rs index f289d4c81b3c1..1f1cc106877c6 100644 --- a/compiler/rustc_lint/src/context/diagnostics.rs +++ b/compiler/rustc_lint/src/context/diagnostics.rs @@ -171,6 +171,10 @@ pub(super) fn decorate_lint(sess: &Session, diagnostic: BuiltinLintDiag, diag: & } .decorate_lint(diag); } + BuiltinLintDiag::RawPrefix(label_span) => { + lints::RawPrefix { label: label_span, suggestion: label_span.shrink_to_hi() } + .decorate_lint(diag); + } BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => { lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag); } diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index c6bcb1f3e8334..834f3b6260bc8 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -2750,6 +2750,15 @@ pub(crate) struct ReservedPrefix { pub prefix: String, } +#[derive(LintDiagnostic)] +#[diag(lint_raw_prefix)] +pub(crate) struct RawPrefix { + #[label] + pub label: Span, + #[suggestion(code = " ", applicability = "machine-applicable")] + pub suggestion: Span, +} + #[derive(LintDiagnostic)] #[diag(lint_unused_builtin_attribute)] pub(crate) struct UnusedBuiltinAttribute { diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs index c17b85db3b044..e1ca778ebb390 100644 --- a/compiler/rustc_lint_defs/src/lib.rs +++ b/compiler/rustc_lint_defs/src/lib.rs @@ -623,6 +623,8 @@ pub enum BuiltinLintDiag { LegacyDeriveHelpers(Span), OrPatternsBackCompat(Span, String), ReservedPrefix(Span, String), + /// `'r#` in edition < 2021. + RawPrefix(Span), TrailingMacro(bool, Ident), BreakWithLabelAndLoop(Span), UnicodeTextFlow(Span, String), diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index c0731cf31c333..01601d96730c0 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,3 +1,4 @@ +use std::assert_matches::assert_matches; use std::ops::Range; use rustc_ast::ast::{self, AttrStyle}; @@ -13,7 +14,6 @@ use rustc_session::lint::builtin::{ }; use rustc_session::lint::BuiltinLintDiag; use rustc_session::parse::ParseSess; -use rustc_span::edition::Edition; use rustc_span::symbol::Symbol; use rustc_span::{BytePos, Pos, Span}; use tracing::debug; @@ -284,7 +284,49 @@ impl<'psess, 'src> StringReader<'psess, 'src> { .stash(span, StashKey::LifetimeIsChar); } let ident = Symbol::intern(lifetime_name); - token::Lifetime(ident) + token::Lifetime(ident, IdentIsRaw::No) + } + rustc_lexer::TokenKind::RawLifetimePrefix => { + let prefix_span = self.mk_sp(start, start + BytePos(2)); + if prefix_span.at_least_rust_2021() { + // # + let hash_token = self.cursor.advance_token(); + self.pos = self.pos + BytePos(hash_token.len); + assert_matches!(hash_token.kind, rustc_lexer::TokenKind::Pound); + + // ident + let ident_start = self.pos; + let ident_token = self.cursor.advance_token(); + self.pos = self.pos + BytePos(ident_token.len); + match ident_token.kind { + rustc_lexer::TokenKind::Ident => {} + rustc_lexer::TokenKind::UnknownPrefix | rustc_lexer::TokenKind::RawIdent => { + self.report_unknown_prefix(start); + } + _ => unreachable!(), + } + + let lifetime_name_without_tick = self.str_from(ident_start); + // Put the `'` back onto the lifetime name. + let mut lifetime_name = String::with_capacity(lifetime_name_without_tick.len() + 1); + lifetime_name.push('\''); + lifetime_name += lifetime_name_without_tick; + let sym = Symbol::intern(&lifetime_name); + token::Lifetime(sym, IdentIsRaw::Yes) + } else { + // Otherwise, this is just `'r`. Warn about it though. + self.psess.buffer_lint( + RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, + prefix_span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::RawPrefix(prefix_span), + ); + + let lifetime_name = self.str_from(start); + self.last_lifetime = Some(prefix_span); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } } rustc_lexer::TokenKind::Semi => token::Semi, rustc_lexer::TokenKind::Comma => token::Comma, @@ -712,7 +754,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let expn_data = prefix_span.ctxt().outer_expn_data(); - if expn_data.edition >= Edition::Edition2021 { + if expn_data.edition.at_least_rust_2021() { // In Rust 2021, this is a hard error. let sugg = if prefix == "rb" { Some(errors::UnknownPrefixSugg::UseBr(prefix_span)) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 84684e808d940..ecc4cd96fafb0 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -2050,7 +2050,7 @@ impl<'a> Parser<'a> { }; // On an error path, eagerly consider a lifetime to be an unclosed character lit, if that // makes sense. - if let Some(ident) = self.token.lifetime() + if let Some((ident, IdentIsRaw::No)) = self.token.lifetime() && could_be_unclosed_char_literal(ident) { let lt = self.expect_lifetime(); @@ -2925,9 +2925,9 @@ impl<'a> Parser<'a> { } pub(crate) fn eat_label(&mut self) -> Option