From b64c7f4e9e5d807dd7e65d5b2b224f4b1c55b331 Mon Sep 17 00:00:00 2001
From: Peter Jaszkowiak
Date: Wed, 1 May 2024 22:51:15 -0600
Subject: [PATCH] Reserve guarded string literal syntax (RFC 3593)
The syntax change applies to all editions, because the
particular syntax `#"foo"#` is unlikely to exist in the wild.
---
compiler/rustc_lexer/src/cursor.rs | 1 +
compiler/rustc_lexer/src/lib.rs | 92 ++++++++++-
compiler/rustc_parse/messages.ftl | 4 +
compiler/rustc_parse/src/errors.rs | 18 +++
compiler/rustc_parse/src/lexer/mod.rs | 24 +++
src/librustdoc/html/highlight.rs | 3 +-
.../crates/parser/src/lexed_str.rs | 4 +
.../src/server/rust_analyzer_span.rs | 1 +
.../proc-macro-srv/src/server/token_id.rs | 1 +
tests/ui/lexer/reserved-guarded-strings.rs | 60 +++++++
.../ui/lexer/reserved-guarded-strings.stderr | 146 ++++++++++++++++++
11 files changed, 345 insertions(+), 9 deletions(-)
create mode 100644 tests/ui/lexer/reserved-guarded-strings.rs
create mode 100644 tests/ui/lexer/reserved-guarded-strings.stderr
diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
index d173c3ac0327b..eb739a6f457fe 100644
--- a/compiler/rustc_lexer/src/cursor.rs
+++ b/compiler/rustc_lexer/src/cursor.rs
@@ -4,6 +4,7 @@ use std::str::Chars;
///
/// Next characters can be peeked via `first` method,
/// and position can be shifted forward via `bump` method.
+#[derive(Clone)]
pub struct Cursor<'a> {
len_remaining: usize,
/// Iterator over chars. Slightly faster than a &str.
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 6f8a9792b6ce8..848d8b46daea1 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -29,6 +29,8 @@ pub mod unescape;
#[cfg(test)]
mod tests;
+use std::num::NonZeroU8;
+
pub use crate::cursor::Cursor;
use self::LiteralKind::*;
@@ -179,24 +181,27 @@ pub enum DocStyle {
/// `rustc_ast::ast::LitKind`).
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LiteralKind {
- /// "12_u8", "0o100", "0b120i99", "1f32".
+ /// `12_u8`, `0o100`, `0b120i99`, `1f32`.
Int { base: Base, empty_int: bool },
- /// "12.34f32", "1e3", but not "1f32".
+ /// `12.34f32`, `1e3`, but not `1f32`.
Float { base: Base, empty_exponent: bool },
- /// "'a'", "'\\'", "'''", "';"
+ /// `'a'`, `'\\'`, `'''`, `';`
Char { terminated: bool },
- /// "b'a'", "b'\\'", "b'''", "b';"
+ /// `b'a'`, `b'\\'`, `b'''`, `b';`
Byte { terminated: bool },
- /// ""abc"", ""abc"
+ /// `"abc"`, `"abc`
Str { terminated: bool },
- /// "b"abc"", "b"abc"
+ /// `b"abc"`, `b"abc`
ByteStr { terminated: bool },
/// `c"abc"`, `c"abc`
CStr { terminated: bool },
- /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
+ /// `#"abc"#`, `#"a`, `##"a"#`. `None` indicates no closing quote.
+ /// Allows fewer hashes to close the string to support older editions.
+ GuardedStr { n_start_hashes: Option, n_end_hashes: u8 },
+ /// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
/// an invalid literal.
RawStr { n_hashes: Option },
- /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
+ /// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
/// indicates an invalid literal.
RawByteStr { n_hashes: Option },
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
@@ -365,6 +370,49 @@ impl Cursor<'_> {
_ => self.ident_or_unknown_prefix(),
},
+ // Guarded string literal (reserved syntax).
+ '#' if matches!(self.first(), '"' | '#') => {
+ // Create a backup to restore later if this
+ // turns out to not be a guarded literal.
+ let backup = self.clone();
+
+ let mut n_start_hashes: u32 = 1; // Already captured one `#`.
+ while self.first() == '#' {
+ n_start_hashes += 1;
+ self.bump();
+ }
+
+ if self.first() == '"' {
+ self.bump();
+
+ let res = self.guarded_double_quoted_string(n_start_hashes);
+ let suffix_start = self.pos_within_token();
+
+ if let (Ok(n_end_hashes), Ok(n)) = (res, u8::try_from(n_start_hashes)) {
+ self.eat_literal_suffix();
+
+ Literal {
+ kind: GuardedStr {
+ n_start_hashes: NonZeroU8::new(n),
+ // Always succeeds because `n_end_hashes <= n`
+ n_end_hashes: n_end_hashes.try_into().unwrap(),
+ },
+ suffix_start,
+ }
+ } else {
+ Literal {
+ kind: GuardedStr { n_start_hashes: None, n_end_hashes: 0 },
+ suffix_start,
+ }
+ }
+ } else {
+ // Not a guarded string, so restore old state.
+ *self = backup;
+ // Return a pound token.
+ Pound
+ }
+ }
+
// Byte literal, byte string literal, raw byte string literal or identifier.
'b' => self.c_or_byte_string(
|terminated| ByteStr { terminated },
@@ -758,6 +806,34 @@ impl Cursor<'_> {
false
}
+ /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
+ fn guarded_double_quoted_string(&mut self, n_start_hashes: u32) -> Result {
+ debug_assert!(self.prev() == '"');
+
+ // Lex the string itself as a normal string literal
+ // so we can recover that for older editions later.
+ if !self.double_quoted_string() {
+ return Err(RawStrError::NoTerminator {
+ expected: n_start_hashes,
+ found: 0,
+ possible_terminator_offset: None,
+ });
+ }
+
+ // Consume closing '#' symbols.
+ // Note that this will not consume extra trailing `#` characters:
+ // `###"abcde"####` is lexed as a `GuardedStr { n_hashes: 3 }`
+ // followed by a `#` token.
+ let mut n_end_hashes = 0;
+ while self.first() == '#' && n_end_hashes < n_start_hashes {
+ n_end_hashes += 1;
+ self.bump();
+ }
+
+ // Handle `n_end_hashes < n_start_hashes` later.
+ Ok(n_end_hashes)
+ }
+
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result {
// Wrap the actual function to handle the error with too many hashes.
diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl
index 873095dca8722..09a70f0856c80 100644
--- a/compiler/rustc_parse/messages.ftl
+++ b/compiler/rustc_parse/messages.ftl
@@ -672,6 +672,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow
.label = the label
.suggestion = add `:` after the label
+parse_reserved_guarded_string = invalid string literal
+ .note = unprefixed guarded string literals are reserved for future use
+ .suggestion_whitespace = consider inserting whitespace here
+
parse_return_types_use_thin_arrow = return types are denoted using `->`
.suggestion = use `->` instead
diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs
index d06f03a7c1767..6bcd795fe7a60 100644
--- a/compiler/rustc_parse/src/errors.rs
+++ b/compiler/rustc_parse/src/errors.rs
@@ -2009,6 +2009,24 @@ pub enum UnknownPrefixSugg {
},
}
+#[derive(Diagnostic)]
+#[diag(parse_reserved_guarded_string)]
+#[note]
+pub struct ReservedGuardedString {
+ #[primary_span]
+ pub span: Span,
+ #[subdiagnostic]
+ pub sugg: Option,
+}
+#[derive(Subdiagnostic)]
+#[suggestion(
+ parse_suggestion_whitespace,
+ code = " ",
+ applicability = "maybe-incorrect",
+ style = "verbose"
+)]
+pub struct GuardedStringSugg(#[primary_span] pub Span);
+
#[derive(Diagnostic)]
#[diag(parse_too_many_hashes)]
pub struct TooManyHashes {
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 1abb1d29562d9..ed2596bec106f 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -490,6 +490,30 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
self.report_raw_str_error(start, 1);
}
}
+ // RFC 3598 reserved this syntax for future use.
+ rustc_lexer::LiteralKind::GuardedStr { n_start_hashes, n_end_hashes } => {
+ let span = self.mk_sp(start, self.pos);
+
+ if let Some(n_start_hashes) = n_start_hashes {
+ let n = u32::from(n_start_hashes.get());
+ let e = u32::from(n_end_hashes);
+ let expn_data = span.ctxt().outer_expn_data();
+
+ let space_pos = start + BytePos(n);
+ let space_span = self.mk_sp(space_pos, space_pos);
+
+ let sugg = if expn_data.is_root() {
+ Some(errors::GuardedStringSugg(space_span))
+ } else {
+ None
+ };
+
+ self.dcx().emit_err(errors::ReservedGuardedString { span, sugg });
+ self.cook_unicode(token::Str, Mode::Str, start, end, 1 + n, 1 + e) // ##" "##
+ } else {
+ self.dcx().emit_fatal(errors::ReservedGuardedString { span, sugg: None });
+ }
+ }
rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
if let Some(n_hashes) = n_hashes {
let n = u32::from(n_hashes);
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index 336d18a1df1c6..5bb602538efc8 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -850,7 +850,8 @@ impl<'src> Classifier<'src> {
| LiteralKind::RawStr { .. }
| LiteralKind::RawByteStr { .. }
| LiteralKind::CStr { .. }
- | LiteralKind::RawCStr { .. } => Class::String,
+ | LiteralKind::RawCStr { .. }
+ | LiteralKind::GuardedStr { .. } => Class::String,
// Number literals.
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
},
diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
index e5fec67de7060..922bb6ffd1430 100644
--- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
+++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
@@ -331,6 +331,10 @@ impl<'a> Converter<'a> {
}
C_STRING
}
+ rustc_lexer::LiteralKind::GuardedStr { .. } => {
+ err = "Invalid string literal";
+ STRING
+ }
};
let err = if err.is_empty() { None } else { Some(err) };
diff --git a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs
index 0350bde412243..1572585a57fcd 100644
--- a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs
+++ b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs
@@ -120,6 +120,7 @@ impl server::FreeFunctions for RaSpanServer {
3 + n_hashes.unwrap_or_default() as usize,
1 + n_hashes.unwrap_or_default() as usize,
),
+ LiteralKind::GuardedStr { .. } => return Err(()),
};
let (lit, suffix) = s.split_at(suffix_start as usize);
diff --git a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs
index ad7bd954cf16e..1159e2b1bc908 100644
--- a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs
+++ b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs
@@ -113,6 +113,7 @@ impl server::FreeFunctions for TokenIdServer {
3 + n_hashes.unwrap_or_default() as usize,
1 + n_hashes.unwrap_or_default() as usize,
),
+ LiteralKind::GuardedStr { .. } => return Err(()),
};
let (lit, suffix) = s.split_at(suffix_start as usize);
diff --git a/tests/ui/lexer/reserved-guarded-strings.rs b/tests/ui/lexer/reserved-guarded-strings.rs
new file mode 100644
index 0000000000000..5b8442693b6eb
--- /dev/null
+++ b/tests/ui/lexer/reserved-guarded-strings.rs
@@ -0,0 +1,60 @@
+//@ compile-flags: -Zunstable-options
+//@ edition:2024
+
+macro_rules! demo1 {
+ ( $a:tt ) => { println!("one tokens") };
+}
+
+macro_rules! demo2 {
+ ( $a:tt $b:tt ) => { println!("two tokens") };
+}
+
+macro_rules! demo3 {
+ ( $a:tt $b:tt $c:tt ) => { println!("three tokens") };
+}
+
+macro_rules! demo4 {
+ ( $a:tt $b:tt $c:tt $d:tt ) => { println!("four tokens") };
+}
+
+macro_rules! demo5 {
+ ( $a:tt $b:tt $c:tt $d:tt $e:tt ) => { println!("five tokens") };
+}
+
+macro_rules! demo6 {
+ ( $a:tt $b:tt $c:tt $d:tt $e:tt $f:tt ) => { println!("six tokens") };
+}
+
+macro_rules! demo7 {
+ ( $a:tt $b:tt $c:tt $d:tt $e:tt $f:tt $g:tt ) => { println!("seven tokens") };
+}
+
+fn main() {
+ demo1!("");
+ demo2!(# "");
+ demo3!(# ""#);
+ demo2!(# "foo");
+ demo3!(## "foo");
+ demo3!(# "foo"#);
+ demo4!(### "foo");
+ demo4!(## "foo"#);
+ demo7!(### "foo"###);
+
+ demo2!("foo"#);
+ demo4!("foo"###);
+
+ demo2!(blah"xx"); //~ ERROR prefix `blah` is unknown
+ demo2!(blah#"xx"#);
+ //~^ ERROR prefix `blah` is unknown
+ //~| ERROR invalid string literal
+
+ demo1!(#""); //~ ERROR invalid string literal
+ demo1!(#""#); //~ ERROR invalid string literal
+ demo1!(####""); //~ ERROR invalid string literal
+ demo1!(#"foo"); //~ ERROR invalid string literal
+ demo1!(###"foo"); //~ ERROR invalid string literal
+ demo1!(#"foo"#); //~ ERROR invalid string literal
+ demo1!(###"foo"#); //~ ERROR invalid string literal
+ demo1!(###"foo"##); //~ ERROR invalid string literal
+ demo1!(###"foo"###); //~ ERROR invalid string literal
+}
diff --git a/tests/ui/lexer/reserved-guarded-strings.stderr b/tests/ui/lexer/reserved-guarded-strings.stderr
new file mode 100644
index 0000000000000..b19f7c9ec9d7c
--- /dev/null
+++ b/tests/ui/lexer/reserved-guarded-strings.stderr
@@ -0,0 +1,146 @@
+error: prefix `blah` is unknown
+ --> $DIR/reserved-guarded-strings.rs:46:12
+ |
+LL | demo2!(blah"xx");
+ | ^^^^ unknown prefix
+ |
+ = note: prefixed identifiers and literals are reserved since Rust 2021
+help: consider inserting whitespace here
+ |
+LL | demo2!(blah "xx");
+ | +
+
+error: prefix `blah` is unknown
+ --> $DIR/reserved-guarded-strings.rs:47:12
+ |
+LL | demo2!(blah#"xx"#);
+ | ^^^^ unknown prefix
+ |
+ = note: prefixed identifiers and literals are reserved since Rust 2021
+help: consider inserting whitespace here
+ |
+LL | demo2!(blah #"xx"#);
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:47:16
+ |
+LL | demo2!(blah#"xx"#);
+ | ^^^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo2!(blah# "xx"#);
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:51:12
+ |
+LL | demo1!(#"");
+ | ^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(# "");
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:52:12
+ |
+LL | demo1!(#""#);
+ | ^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(# ""#);
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:53:12
+ |
+LL | demo1!(####"");
+ | ^^^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(#### "");
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:54:12
+ |
+LL | demo1!(#"foo");
+ | ^^^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(# "foo");
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:55:12
+ |
+LL | demo1!(###"foo");
+ | ^^^^^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(### "foo");
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:56:12
+ |
+LL | demo1!(#"foo"#);
+ | ^^^^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(# "foo"#);
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:57:12
+ |
+LL | demo1!(###"foo"#);
+ | ^^^^^^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(### "foo"#);
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:58:12
+ |
+LL | demo1!(###"foo"##);
+ | ^^^^^^^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(### "foo"##);
+ | +
+
+error: invalid string literal
+ --> $DIR/reserved-guarded-strings.rs:59:12
+ |
+LL | demo1!(###"foo"###);
+ | ^^^^^^^^^^^
+ |
+ = note: unprefixed guarded string literals are reserved for future use
+help: consider inserting whitespace here
+ |
+LL | demo1!(### "foo"###);
+ | +
+
+error: aborting due to 12 previous errors
+