From b64c7f4e9e5d807dd7e65d5b2b224f4b1c55b331 Mon Sep 17 00:00:00 2001 From: Peter Jaszkowiak Date: Wed, 1 May 2024 22:51:15 -0600 Subject: [PATCH] Reserve guarded string literal syntax (RFC 3593) The syntax change applies to all editions, because the particular syntax `#"foo"#` is unlikely to exist in the wild. --- compiler/rustc_lexer/src/cursor.rs | 1 + compiler/rustc_lexer/src/lib.rs | 92 ++++++++++- compiler/rustc_parse/messages.ftl | 4 + compiler/rustc_parse/src/errors.rs | 18 +++ compiler/rustc_parse/src/lexer/mod.rs | 24 +++ src/librustdoc/html/highlight.rs | 3 +- .../crates/parser/src/lexed_str.rs | 4 + .../src/server/rust_analyzer_span.rs | 1 + .../proc-macro-srv/src/server/token_id.rs | 1 + tests/ui/lexer/reserved-guarded-strings.rs | 60 +++++++ .../ui/lexer/reserved-guarded-strings.stderr | 146 ++++++++++++++++++ 11 files changed, 345 insertions(+), 9 deletions(-) create mode 100644 tests/ui/lexer/reserved-guarded-strings.rs create mode 100644 tests/ui/lexer/reserved-guarded-strings.stderr diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index d173c3ac0327b..eb739a6f457fe 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -4,6 +4,7 @@ use std::str::Chars; /// /// Next characters can be peeked via `first` method, /// and position can be shifted forward via `bump` method. +#[derive(Clone)] pub struct Cursor<'a> { len_remaining: usize, /// Iterator over chars. Slightly faster than a &str. diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 6f8a9792b6ce8..848d8b46daea1 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -29,6 +29,8 @@ pub mod unescape; #[cfg(test)] mod tests; +use std::num::NonZeroU8; + pub use crate::cursor::Cursor; use self::LiteralKind::*; @@ -179,24 +181,27 @@ pub enum DocStyle { /// `rustc_ast::ast::LitKind`). #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum LiteralKind { - /// "12_u8", "0o100", "0b120i99", "1f32". + /// `12_u8`, `0o100`, `0b120i99`, `1f32`. Int { base: Base, empty_int: bool }, - /// "12.34f32", "1e3", but not "1f32". + /// `12.34f32`, `1e3`, but not `1f32`. Float { base: Base, empty_exponent: bool }, - /// "'a'", "'\\'", "'''", "';" + /// `'a'`, `'\\'`, `'''`, `';` Char { terminated: bool }, - /// "b'a'", "b'\\'", "b'''", "b';" + /// `b'a'`, `b'\\'`, `b'''`, `b';` Byte { terminated: bool }, - /// ""abc"", ""abc" + /// `"abc"`, `"abc` Str { terminated: bool }, - /// "b"abc"", "b"abc" + /// `b"abc"`, `b"abc` ByteStr { terminated: bool }, /// `c"abc"`, `c"abc` CStr { terminated: bool }, - /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates + /// `#"abc"#`, `#"a`, `##"a"#`. `None` indicates no closing quote. + /// Allows fewer hashes to close the string to support older editions. + GuardedStr { n_start_hashes: Option, n_end_hashes: u8 }, + /// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates /// an invalid literal. RawStr { n_hashes: Option }, - /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None` + /// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None` /// indicates an invalid literal. RawByteStr { n_hashes: Option }, /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal. @@ -365,6 +370,49 @@ impl Cursor<'_> { _ => self.ident_or_unknown_prefix(), }, + // Guarded string literal (reserved syntax). + '#' if matches!(self.first(), '"' | '#') => { + // Create a backup to restore later if this + // turns out to not be a guarded literal. + let backup = self.clone(); + + let mut n_start_hashes: u32 = 1; // Already captured one `#`. + while self.first() == '#' { + n_start_hashes += 1; + self.bump(); + } + + if self.first() == '"' { + self.bump(); + + let res = self.guarded_double_quoted_string(n_start_hashes); + let suffix_start = self.pos_within_token(); + + if let (Ok(n_end_hashes), Ok(n)) = (res, u8::try_from(n_start_hashes)) { + self.eat_literal_suffix(); + + Literal { + kind: GuardedStr { + n_start_hashes: NonZeroU8::new(n), + // Always succeeds because `n_end_hashes <= n` + n_end_hashes: n_end_hashes.try_into().unwrap(), + }, + suffix_start, + } + } else { + Literal { + kind: GuardedStr { n_start_hashes: None, n_end_hashes: 0 }, + suffix_start, + } + } + } else { + // Not a guarded string, so restore old state. + *self = backup; + // Return a pound token. + Pound + } + } + // Byte literal, byte string literal, raw byte string literal or identifier. 'b' => self.c_or_byte_string( |terminated| ByteStr { terminated }, @@ -758,6 +806,34 @@ impl Cursor<'_> { false } + /// Eats the double-quoted string and returns `n_hashes` and an error if encountered. + fn guarded_double_quoted_string(&mut self, n_start_hashes: u32) -> Result { + debug_assert!(self.prev() == '"'); + + // Lex the string itself as a normal string literal + // so we can recover that for older editions later. + if !self.double_quoted_string() { + return Err(RawStrError::NoTerminator { + expected: n_start_hashes, + found: 0, + possible_terminator_offset: None, + }); + } + + // Consume closing '#' symbols. + // Note that this will not consume extra trailing `#` characters: + // `###"abcde"####` is lexed as a `GuardedStr { n_hashes: 3 }` + // followed by a `#` token. + let mut n_end_hashes = 0; + while self.first() == '#' && n_end_hashes < n_start_hashes { + n_end_hashes += 1; + self.bump(); + } + + // Handle `n_end_hashes < n_start_hashes` later. + Ok(n_end_hashes) + } + /// Eats the double-quoted string and returns `n_hashes` and an error if encountered. fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result { // Wrap the actual function to handle the error with too many hashes. diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index 873095dca8722..09a70f0856c80 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -672,6 +672,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow .label = the label .suggestion = add `:` after the label +parse_reserved_guarded_string = invalid string literal + .note = unprefixed guarded string literals are reserved for future use + .suggestion_whitespace = consider inserting whitespace here + parse_return_types_use_thin_arrow = return types are denoted using `->` .suggestion = use `->` instead diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index d06f03a7c1767..6bcd795fe7a60 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -2009,6 +2009,24 @@ pub enum UnknownPrefixSugg { }, } +#[derive(Diagnostic)] +#[diag(parse_reserved_guarded_string)] +#[note] +pub struct ReservedGuardedString { + #[primary_span] + pub span: Span, + #[subdiagnostic] + pub sugg: Option, +} +#[derive(Subdiagnostic)] +#[suggestion( + parse_suggestion_whitespace, + code = " ", + applicability = "maybe-incorrect", + style = "verbose" +)] +pub struct GuardedStringSugg(#[primary_span] pub Span); + #[derive(Diagnostic)] #[diag(parse_too_many_hashes)] pub struct TooManyHashes { diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 1abb1d29562d9..ed2596bec106f 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -490,6 +490,30 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.report_raw_str_error(start, 1); } } + // RFC 3598 reserved this syntax for future use. + rustc_lexer::LiteralKind::GuardedStr { n_start_hashes, n_end_hashes } => { + let span = self.mk_sp(start, self.pos); + + if let Some(n_start_hashes) = n_start_hashes { + let n = u32::from(n_start_hashes.get()); + let e = u32::from(n_end_hashes); + let expn_data = span.ctxt().outer_expn_data(); + + let space_pos = start + BytePos(n); + let space_span = self.mk_sp(space_pos, space_pos); + + let sugg = if expn_data.is_root() { + Some(errors::GuardedStringSugg(space_span)) + } else { + None + }; + + self.dcx().emit_err(errors::ReservedGuardedString { span, sugg }); + self.cook_unicode(token::Str, Mode::Str, start, end, 1 + n, 1 + e) // ##" "## + } else { + self.dcx().emit_fatal(errors::ReservedGuardedString { span, sugg: None }); + } + } rustc_lexer::LiteralKind::RawByteStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 336d18a1df1c6..5bb602538efc8 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -850,7 +850,8 @@ impl<'src> Classifier<'src> { | LiteralKind::RawStr { .. } | LiteralKind::RawByteStr { .. } | LiteralKind::CStr { .. } - | LiteralKind::RawCStr { .. } => Class::String, + | LiteralKind::RawCStr { .. } + | LiteralKind::GuardedStr { .. } => Class::String, // Number literals. LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number, }, diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs index e5fec67de7060..922bb6ffd1430 100644 --- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -331,6 +331,10 @@ impl<'a> Converter<'a> { } C_STRING } + rustc_lexer::LiteralKind::GuardedStr { .. } => { + err = "Invalid string literal"; + STRING + } }; let err = if err.is_empty() { None } else { Some(err) }; diff --git a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs index 0350bde412243..1572585a57fcd 100644 --- a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs +++ b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs @@ -120,6 +120,7 @@ impl server::FreeFunctions for RaSpanServer { 3 + n_hashes.unwrap_or_default() as usize, 1 + n_hashes.unwrap_or_default() as usize, ), + LiteralKind::GuardedStr { .. } => return Err(()), }; let (lit, suffix) = s.split_at(suffix_start as usize); diff --git a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs index ad7bd954cf16e..1159e2b1bc908 100644 --- a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs +++ b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs @@ -113,6 +113,7 @@ impl server::FreeFunctions for TokenIdServer { 3 + n_hashes.unwrap_or_default() as usize, 1 + n_hashes.unwrap_or_default() as usize, ), + LiteralKind::GuardedStr { .. } => return Err(()), }; let (lit, suffix) = s.split_at(suffix_start as usize); diff --git a/tests/ui/lexer/reserved-guarded-strings.rs b/tests/ui/lexer/reserved-guarded-strings.rs new file mode 100644 index 0000000000000..5b8442693b6eb --- /dev/null +++ b/tests/ui/lexer/reserved-guarded-strings.rs @@ -0,0 +1,60 @@ +//@ compile-flags: -Zunstable-options +//@ edition:2024 + +macro_rules! demo1 { + ( $a:tt ) => { println!("one tokens") }; +} + +macro_rules! demo2 { + ( $a:tt $b:tt ) => { println!("two tokens") }; +} + +macro_rules! demo3 { + ( $a:tt $b:tt $c:tt ) => { println!("three tokens") }; +} + +macro_rules! demo4 { + ( $a:tt $b:tt $c:tt $d:tt ) => { println!("four tokens") }; +} + +macro_rules! demo5 { + ( $a:tt $b:tt $c:tt $d:tt $e:tt ) => { println!("five tokens") }; +} + +macro_rules! demo6 { + ( $a:tt $b:tt $c:tt $d:tt $e:tt $f:tt ) => { println!("six tokens") }; +} + +macro_rules! demo7 { + ( $a:tt $b:tt $c:tt $d:tt $e:tt $f:tt $g:tt ) => { println!("seven tokens") }; +} + +fn main() { + demo1!(""); + demo2!(# ""); + demo3!(# ""#); + demo2!(# "foo"); + demo3!(## "foo"); + demo3!(# "foo"#); + demo4!(### "foo"); + demo4!(## "foo"#); + demo7!(### "foo"###); + + demo2!("foo"#); + demo4!("foo"###); + + demo2!(blah"xx"); //~ ERROR prefix `blah` is unknown + demo2!(blah#"xx"#); + //~^ ERROR prefix `blah` is unknown + //~| ERROR invalid string literal + + demo1!(#""); //~ ERROR invalid string literal + demo1!(#""#); //~ ERROR invalid string literal + demo1!(####""); //~ ERROR invalid string literal + demo1!(#"foo"); //~ ERROR invalid string literal + demo1!(###"foo"); //~ ERROR invalid string literal + demo1!(#"foo"#); //~ ERROR invalid string literal + demo1!(###"foo"#); //~ ERROR invalid string literal + demo1!(###"foo"##); //~ ERROR invalid string literal + demo1!(###"foo"###); //~ ERROR invalid string literal +} diff --git a/tests/ui/lexer/reserved-guarded-strings.stderr b/tests/ui/lexer/reserved-guarded-strings.stderr new file mode 100644 index 0000000000000..b19f7c9ec9d7c --- /dev/null +++ b/tests/ui/lexer/reserved-guarded-strings.stderr @@ -0,0 +1,146 @@ +error: prefix `blah` is unknown + --> $DIR/reserved-guarded-strings.rs:46:12 + | +LL | demo2!(blah"xx"); + | ^^^^ unknown prefix + | + = note: prefixed identifiers and literals are reserved since Rust 2021 +help: consider inserting whitespace here + | +LL | demo2!(blah "xx"); + | + + +error: prefix `blah` is unknown + --> $DIR/reserved-guarded-strings.rs:47:12 + | +LL | demo2!(blah#"xx"#); + | ^^^^ unknown prefix + | + = note: prefixed identifiers and literals are reserved since Rust 2021 +help: consider inserting whitespace here + | +LL | demo2!(blah #"xx"#); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:47:16 + | +LL | demo2!(blah#"xx"#); + | ^^^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo2!(blah# "xx"#); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:51:12 + | +LL | demo1!(#""); + | ^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(# ""); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:52:12 + | +LL | demo1!(#""#); + | ^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(# ""#); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:53:12 + | +LL | demo1!(####""); + | ^^^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(#### ""); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:54:12 + | +LL | demo1!(#"foo"); + | ^^^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(# "foo"); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:55:12 + | +LL | demo1!(###"foo"); + | ^^^^^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(### "foo"); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:56:12 + | +LL | demo1!(#"foo"#); + | ^^^^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(# "foo"#); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:57:12 + | +LL | demo1!(###"foo"#); + | ^^^^^^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(### "foo"#); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:58:12 + | +LL | demo1!(###"foo"##); + | ^^^^^^^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(### "foo"##); + | + + +error: invalid string literal + --> $DIR/reserved-guarded-strings.rs:59:12 + | +LL | demo1!(###"foo"###); + | ^^^^^^^^^^^ + | + = note: unprefixed guarded string literals are reserved for future use +help: consider inserting whitespace here + | +LL | demo1!(### "foo"###); + | + + +error: aborting due to 12 previous errors +