diff --git a/src/libsyntax/parse/lexer/literal_validation.rs b/src/libsyntax/parse/lexer/literal_validation.rs new file mode 100644 index 0000000000000..cc4b81e0acc36 --- /dev/null +++ b/src/libsyntax/parse/lexer/literal_validation.rs @@ -0,0 +1,130 @@ +//! This module contains utilities for literal tokens validation. + +use super::unescape_error_reporting::emit_unescape_error; +use super::StringReader; + +use rustc_lexer::unescape; +use rustc_lexer::Base; +use syntax_pos::BytePos; + +// Extensions for the `StringReader` providing verification helper +// methods. +impl<'a> StringReader<'a> { + pub(super) fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_raw_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Str, + range, + err, + ) + } + }) + } + + pub(super) fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + if let Err((off, err)) = unescape::unescape_char(lit) { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Char, + 0..off, + err, + ) + } + } + + pub(super) fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + if let Err((off, err)) = unescape::unescape_byte(lit) { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Byte, + 0..off, + err, + ) + } + } + + pub(super) fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Str, + range, + err, + ) + } + }) + } + + pub(super) fn validate_raw_byte_str_escape( + &self, + content_start: BytePos, + content_end: BytePos, + ) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_raw_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::ByteStr, + range, + err, + ) + } + }) + } + + pub(super) fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::ByteStr, + range, + err, + ) + } + }) + } + + pub(super) fn validate_int_literal( + &self, + base: Base, + content_start: BytePos, + content_end: BytePos, + ) { + let base = match base { + Base::Binary => 2, + Base::Octal => 8, + _ => return, + }; + let s = self.str_from_to(content_start + BytePos(2), content_end); + for (idx, c) in s.char_indices() { + let idx = idx as u32; + if c != '_' && c.to_digit(base).is_none() { + let lo = content_start + BytePos(2 + idx); + let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); + self.err_span_(lo, hi, &format!("invalid digit for a base {} literal", base)); + } + } + } +} diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index b1b7b08c78a3b..f88084673f91a 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1,25 +1,26 @@ -use crate::token::{self, Token, TokenKind}; use crate::sess::ParseSess; use crate::symbol::{sym, Symbol}; +use crate::token::{self, Token, TokenKind}; use crate::util::comments; -use errors::{FatalError, DiagnosticBuilder}; +use errors::{DiagnosticBuilder, FatalError}; use syntax_pos::{BytePos, Pos, Span}; -use rustc_lexer::Base; -use rustc_lexer::unescape; +use log::debug; +use rustc_data_structures::sync::Lrc; use std::char; use std::convert::TryInto; -use rustc_data_structures::sync::Lrc; -use log::debug; + +use unescape_error_reporting::push_escaped_char; #[cfg(test)] mod tests; +mod literal_validation; mod tokentrees; -mod unicode_chars; mod unescape_error_reporting; -use unescape_error_reporting::{emit_unescape_error, push_escaped_char}; +mod unicode_chars; +mod verification; #[derive(Clone, Debug)] pub struct UnmatchedBrace { @@ -44,12 +45,14 @@ pub struct StringReader<'a> { } impl<'a> StringReader<'a> { - pub fn new(sess: &'a ParseSess, - source_file: Lrc, - override_span: Option) -> Self { + pub fn new( + sess: &'a ParseSess, + source_file: Lrc, + override_span: Option, + ) -> Self { if source_file.src.is_none() { - sess.span_diagnostic.bug(&format!("cannot lex `source_file` without source: {}", - source_file.name)); + sess.span_diagnostic + .bug(&format!("cannot lex `source_file` without source: {}", source_file.name)); } let src = (*source_file.src.as_ref().unwrap()).clone(); @@ -81,7 +84,6 @@ impl<'a> StringReader<'a> { sr } - fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi)) } @@ -140,7 +142,6 @@ impl<'a> StringReader<'a> { self.sess.span_diagnostic.struct_span_err(sp, m).emit(); } - /// Report a fatal error spanning [`from_pos`, `to_pos`). fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError { self.fatal_span(self.mk_sp(from_pos, to_pos), m) @@ -151,15 +152,22 @@ impl<'a> StringReader<'a> { self.err_span(self.mk_sp(from_pos, to_pos), m) } - fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str) - -> DiagnosticBuilder<'a> - { + fn struct_span_fatal( + &self, + from_pos: BytePos, + to_pos: BytePos, + m: &str, + ) -> DiagnosticBuilder<'a> { self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), m) } - fn struct_fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) - -> DiagnosticBuilder<'a> - { + fn struct_fatal_span_char( + &self, + from_pos: BytePos, + to_pos: BytePos, + m: &str, + c: char, + ) -> DiagnosticBuilder<'a> { let mut m = m.to_string(); m.push_str(": "); push_escaped_char(&mut m, c); @@ -170,17 +178,14 @@ impl<'a> StringReader<'a> { /// Turns simple `rustc_lexer::TokenKind` enum into a rich /// `libsyntax::TokenKind`. This turns strings into interned /// symbols and runs additional validation. - fn cook_lexer_token( - &self, - token: rustc_lexer::TokenKind, - start: BytePos, - ) -> TokenKind { + fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> TokenKind { match token { rustc_lexer::TokenKind::LineComment => { let string = self.str_from(start); - // comments with only more "/"s are not doc comments + // Comments with more than three "/"s are not doc comments. let tok = if comments::is_line_doc_comment(string) { - self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment"); + let is_block_comment = false; + self.verify_doc_comment_contents(start, string, is_block_comment); token::DocComment(Symbol::intern(string)) } else { token::Comment @@ -190,24 +195,14 @@ impl<'a> StringReader<'a> { } rustc_lexer::TokenKind::BlockComment { terminated } => { let string = self.str_from(start); - // block comments starting with "/**" or "/*!" are doc-comments - // but comments with only "*"s between two "/"s are not + // Block comments starting with "/**" or "/*!" are doc-comments, + // but comments with only "*"s between two "/"s are not. let is_doc_comment = comments::is_block_doc_comment(string); - - if !terminated { - let msg = if is_doc_comment { - "unterminated block doc-comment" - } else { - "unterminated block comment" - }; - let last_bpos = self.pos; - self.fatal_span_(start, last_bpos, msg).raise(); - } + self.verify_doc_comment_terminated(start, terminated, is_doc_comment); let tok = if is_doc_comment { - self.forbid_bare_cr(start, - string, - "bare CR not allowed in block doc-comment"); + let is_block_comment = true; + self.verify_doc_comment_contents(start, string, is_block_comment); token::DocComment(Symbol::intern(string)) } else { token::Comment @@ -216,21 +211,23 @@ impl<'a> StringReader<'a> { tok } rustc_lexer::TokenKind::Whitespace => token::Whitespace, - rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => { - let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; - let mut ident_start = start; - if is_raw_ident { - ident_start = ident_start + BytePos(2); - } + rustc_lexer::TokenKind::Ident => { + let is_raw_ident = false; + + // FIXME: perform NFKC normalization here. (Issue #2253) + let sym = self.symbol_from(start); + token::Ident(sym, is_raw_ident) + } + rustc_lexer::TokenKind::RawIdent => { + let is_raw_ident = true; + let ident_start = start + BytePos(2); + // FIXME: perform NFKC normalization here. (Issue #2253) let sym = self.symbol_from(ident_start); - if is_raw_ident { - let span = self.mk_sp(start, self.pos); - if !sym.can_be_raw() { - self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); - } - self.sess.raw_identifier_spans.borrow_mut().push(span); - } + let span = self.mk_sp(start, self.pos); + self.verify_raw_symbol(&sym, span); + self.sess.raw_identifier_spans.borrow_mut().push(span); + token::Ident(sym, is_raw_ident) } rustc_lexer::TokenKind::Literal { kind, suffix_start } => { @@ -238,19 +235,10 @@ impl<'a> StringReader<'a> { let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); - if string == "_" { - self.sess.span_diagnostic - .struct_span_warn(self.mk_sp(suffix_start, self.pos), - "underscore literal suffix is not allowed") - .warn("this was previously accepted by the compiler but is \ - being phased out; it will become a hard error in \ - a future release!") - .note("for more information, see issue #42326 \ - ") - .emit(); - None - } else { + if self.verify_no_underscore_literal_suffix(suffix_start, string).is_ok() { Some(Symbol::intern(string)) + } else { + None } } else { None @@ -262,13 +250,8 @@ impl<'a> StringReader<'a> { // expansion purposes. See #12512 for the gory details of why // this is necessary. let lifetime_name = self.str_from(start); - if starts_with_number { - self.err_span_( - start, - self.pos, - "lifetimes cannot start with a number", - ); - } + self.verify_lifetime(start, starts_with_number); + let ident = Symbol::intern(lifetime_name); token::Lifetime(ident) } @@ -301,11 +284,10 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), rustc_lexer::TokenKind::Unknown => { + // Report an error about unknown token. let c = self.str_from(start).chars().next().unwrap(); - let mut err = self.struct_fatal_span_char(start, - self.pos, - "unknown start of token", - c); + let mut err = + self.struct_fatal_span_char(start, self.pos, "unknown start of token", c); // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, // instead of keeping a table in `check_for_substitution`into the token. Ideally, // this should be inside `rustc_lexer`. However, we should first remove compound @@ -323,124 +305,74 @@ impl<'a> StringReader<'a> { &self, start: BytePos, suffix_start: BytePos, - kind: rustc_lexer::LiteralKind + kind: rustc_lexer::LiteralKind, ) -> (token::LitKind, Symbol) { match kind { - rustc_lexer::LiteralKind::Char { terminated } => { - if !terminated { - self.fatal_span_(start, suffix_start, - "unterminated character literal".into()) - .raise() - } + rustc_lexer::LiteralKind::Char { .. } => { + self.verify_literal_enclosed(start, suffix_start, kind); let content_start = start + BytePos(1); let content_end = suffix_start - BytePos(1); self.validate_char_escape(content_start, content_end); let id = self.symbol_from_to(content_start, content_end); (token::Char, id) - }, - rustc_lexer::LiteralKind::Byte { terminated } => { - if !terminated { - self.fatal_span_(start + BytePos(1), suffix_start, - "unterminated byte constant".into()) - .raise() - } + } + rustc_lexer::LiteralKind::Byte { .. } => { + self.verify_literal_enclosed(start, suffix_start, kind); let content_start = start + BytePos(2); let content_end = suffix_start - BytePos(1); self.validate_byte_escape(content_start, content_end); let id = self.symbol_from_to(content_start, content_end); (token::Byte, id) - }, - rustc_lexer::LiteralKind::Str { terminated } => { - if !terminated { - self.fatal_span_(start, suffix_start, - "unterminated double quote string".into()) - .raise() - } + } + rustc_lexer::LiteralKind::Str { .. } => { + self.verify_literal_enclosed(start, suffix_start, kind); let content_start = start + BytePos(1); let content_end = suffix_start - BytePos(1); self.validate_str_escape(content_start, content_end); let id = self.symbol_from_to(content_start, content_end); (token::Str, id) } - rustc_lexer::LiteralKind::ByteStr { terminated } => { - if !terminated { - self.fatal_span_(start + BytePos(1), suffix_start, - "unterminated double quote byte string".into()) - .raise() - } + rustc_lexer::LiteralKind::ByteStr { .. } => { + self.verify_literal_enclosed(start, suffix_start, kind); let content_start = start + BytePos(2); let content_end = suffix_start - BytePos(1); self.validate_byte_str_escape(content_start, content_end); let id = self.symbol_from_to(content_start, content_end); (token::ByteStr, id) } - rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => { - if !started { - self.report_non_started_raw_string(start); - } - if !terminated { - self.report_unterminated_raw_string(start, n_hashes) - } + rustc_lexer::LiteralKind::RawStr { n_hashes, .. } => { + self.verify_literal_enclosed(start, suffix_start, kind); let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); - let n = u32::from(n_hashes); - let content_start = start + BytePos(2 + n); - let content_end = suffix_start - BytePos(1 + n); + let content_start = start + BytePos(2 + n_hashes as u32); + let content_end = suffix_start - BytePos(1 + n_hashes as u32); self.validate_raw_str_escape(content_start, content_end); let id = self.symbol_from_to(content_start, content_end); (token::StrRaw(n_hashes), id) } - rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => { - if !started { - self.report_non_started_raw_string(start); - } - if !terminated { - self.report_unterminated_raw_string(start, n_hashes) - } + rustc_lexer::LiteralKind::RawByteStr { n_hashes, .. } => { + self.verify_literal_enclosed(start, suffix_start, kind); let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); - let n = u32::from(n_hashes); - let content_start = start + BytePos(3 + n); - let content_end = suffix_start - BytePos(1 + n); + let content_start = start + BytePos(3 + n_hashes as u32); + let content_end = suffix_start - BytePos(1 + n_hashes as u32); self.validate_raw_byte_str_escape(content_start, content_end); let id = self.symbol_from_to(content_start, content_end); (token::ByteStrRaw(n_hashes), id) } rustc_lexer::LiteralKind::Int { base, empty_int } => { - if empty_int { - self.err_span_(start, suffix_start, "no valid digits found for number"); - (token::Integer, sym::integer(0)) - } else { + if self.verify_int_not_empty(start, suffix_start, empty_int).is_ok() { self.validate_int_literal(base, start, suffix_start); (token::Integer, self.symbol_from_to(start, suffix_start)) + } else { + (token::Integer, sym::integer(0)) } - }, + } rustc_lexer::LiteralKind::Float { base, empty_exponent } => { - if empty_exponent { - let mut err = self.struct_span_fatal( - start, self.pos, - "expected at least one digit in exponent" - ); - err.emit(); - } - - match base { - Base::Hexadecimal => { - self.err_span_(start, suffix_start, - "hexadecimal float literal is not supported") - } - Base::Octal => { - self.err_span_(start, suffix_start, - "octal float literal is not supported") - } - Base::Binary => { - self.err_span_(start, suffix_start, - "binary float literal is not supported") - } - _ => () - } + self.verify_float_exponent_not_empty(start, empty_exponent); + self.verify_float_base(start, suffix_start, base); let id = self.symbol_from_to(start, suffix_start); (token::Float, id) - }, + } } } @@ -451,8 +383,7 @@ impl<'a> StringReader<'a> { /// Slice of the source text from `start` up to but excluding `self.pos`, /// meaning the slice does not include the character `self.ch`. - fn str_from(&self, start: BytePos) -> &str - { + fn str_from(&self, start: BytePos) -> &str { self.str_from_to(start, self.pos) } @@ -469,176 +400,21 @@ impl<'a> StringReader<'a> { } /// Slice of the source text spanning from `start` up to but excluding `end`. - fn str_from_to(&self, start: BytePos, end: BytePos) -> &str - { + fn str_from_to(&self, start: BytePos, end: BytePos) -> &str { &self.src[self.src_index(start)..self.src_index(end)] } - fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) { - let mut idx = 0; - loop { - idx = match s[idx..].find('\r') { - None => break, - Some(it) => idx + it + 1 - }; - self.err_span_(start + BytePos(idx as u32 - 1), - start + BytePos(idx as u32), - errmsg); - } - } - - fn report_non_started_raw_string(&self, start: BytePos) -> ! { - let bad_char = self.str_from(start).chars().last().unwrap(); - self - .struct_fatal_span_char( - start, - self.pos, - "found invalid character; only `#` is allowed \ - in raw string delimitation", - bad_char, - ) - .emit(); - FatalError.raise() - } - - fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! { - let mut err = self.struct_span_fatal( - start, start, - "unterminated raw string", - ); - err.span_label( - self.mk_sp(start, start), - "unterminated raw string", - ); - - if n_hashes > 0 { - err.note(&format!("this raw string should be terminated with `\"{}`", - "#".repeat(n_hashes as usize))); - } - - err.emit(); - FatalError.raise() - } - fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 { match n_hashes.try_into() { Ok(n_hashes) => n_hashes, Err(_) => { - self.fatal_span_(start, - self.pos, - "too many `#` symbols: raw strings may be \ - delimited by up to 65535 `#` symbols").raise(); - } - } - } - - fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) { - let lit = self.str_from_to(content_start, content_end); - if let Err((off, err)) = unescape::unescape_char(lit) { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), - unescape::Mode::Char, - 0..off, - err, - ) - } - } - - fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) { - let lit = self.str_from_to(content_start, content_end); - if let Err((off, err)) = unescape::unescape_byte(lit) { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), - unescape::Mode::Byte, - 0..off, - err, - ) - } - } - - fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) { - let lit = self.str_from_to(content_start, content_end); - unescape::unescape_str(lit, &mut |range, c| { - if let Err(err) = c { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), - unescape::Mode::Str, - range, - err, - ) - } - }) - } - - fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { - let lit = self.str_from_to(content_start, content_end); - unescape::unescape_raw_str(lit, &mut |range, c| { - if let Err(err) = c { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), - unescape::Mode::Str, - range, - err, - ) - } - }) - } - - fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { - let lit = self.str_from_to(content_start, content_end); - unescape::unescape_raw_byte_str(lit, &mut |range, c| { - if let Err(err) = c { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), - unescape::Mode::ByteStr, - range, - err, + self.fatal_span_( + start, + self.pos, + "too many `#` symbols: raw strings may be \ + delimited by up to 65535 `#` symbols", ) - } - }) - } - - fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { - let lit = self.str_from_to(content_start, content_end); - unescape::unescape_byte_str(lit, &mut |range, c| { - if let Err(err) = c { - emit_unescape_error( - &self.sess.span_diagnostic, - lit, - self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), - unescape::Mode::ByteStr, - range, - err, - ) - } - }) - } - - fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { - let base = match base { - Base::Binary => 2, - Base::Octal => 8, - _ => return, - }; - let s = self.str_from_to(content_start + BytePos(2), content_end); - for (idx, c) in s.char_indices() { - let idx = idx as u32; - if c != '_' && c.to_digit(base).is_none() { - let lo = content_start + BytePos(2 + idx); - let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); - self.err_span_(lo, hi, - &format!("invalid digit for a base {} literal", base)); - + .raise(); } } } diff --git a/src/libsyntax/parse/lexer/verification.rs b/src/libsyntax/parse/lexer/verification.rs new file mode 100644 index 0000000000000..a90e727b03ab7 --- /dev/null +++ b/src/libsyntax/parse/lexer/verification.rs @@ -0,0 +1,230 @@ +//! This module contains utilities for verifying the token state +//! and reporting errors on verification failures. +//! +//! The purpose of this module is to encapsulate the diagnostics +//! from the actual token parsing. + +use super::StringReader; + +use crate::symbol::Symbol; + +use errors::FatalError; +use rustc_lexer::Base; +use syntax_pos::{BytePos, Span}; + + +// Extensions for the `StringReader` providing verification helper +// methods. +impl<'a> StringReader<'a> { + pub(super) fn verify_doc_comment_terminated( + &self, + start: BytePos, + terminated: bool, + is_doc_comment: bool + ) { + if !terminated { + let msg = if is_doc_comment { + "unterminated block doc-comment" + } else { + "unterminated block comment" + }; + let last_bpos = self.pos; + self.fatal_span_(start, last_bpos, msg).raise(); + } + } + + pub(super) fn verify_doc_comment_contents( + &self, + start: BytePos, + string: &str, + is_block_comment: bool + ) { + let message = if is_block_comment { + "bare CR not allowed in block doc-comment" + } else { + "bare CR not allowed in doc-comment" + }; + self.verify_no_bare_cr(start, string, message); + } + + pub(super) fn verify_raw_symbol(&self, sym: &Symbol, span: Span) { + if !sym.can_be_raw() { + self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); + } + } + + pub(super) fn verify_no_underscore_literal_suffix( + &self, + suffix_start: BytePos, + string: &str + ) -> Result<(), ()>{ + if string == "_" { + self.sess + .span_diagnostic + .struct_span_warn( + self.mk_sp(suffix_start, self.pos), + "underscore literal suffix is not allowed", + ) + .warn( + "this was previously accepted by the compiler but is \ + being phased out; it will become a hard error in \ + a future release!", + ) + .note( + "for more information, see issue #42326 \ + ", + ) + .emit(); + Err(()) + } else { + Ok(()) + } + } + + pub(super) fn verify_lifetime(&self, start: BytePos, starts_with_number: bool) { + if starts_with_number { + self.err_span_(start, self.pos, "lifetimes cannot start with a number"); + } + } + + pub(super) fn verify_literal_enclosed( + &self, + start: BytePos, + suffix_start: BytePos, + kind: rustc_lexer::LiteralKind + ) { + match kind { + rustc_lexer::LiteralKind::Char { terminated } => { + if !terminated { + self.fatal_span_(start, suffix_start, "unterminated character literal".into()) + .raise() + } + } + rustc_lexer::LiteralKind::Byte { terminated } => { + if !terminated { + self.fatal_span_( + start + BytePos(1), + suffix_start, + "unterminated byte constant".into(), + ) + .raise() + } + } + rustc_lexer::LiteralKind::Str { terminated } => { + if !terminated { + self.fatal_span_(start, suffix_start, "unterminated double quote string".into()) + .raise() + } + } + rustc_lexer::LiteralKind::ByteStr { terminated } => { + if !terminated { + self.fatal_span_( + start + BytePos(1), + suffix_start, + "unterminated double quote byte string".into(), + ) + .raise() + } + } + rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => { + if !started { + self.report_non_started_raw_string(start); + } + if !terminated { + self.report_unterminated_raw_string(start, n_hashes) + } + } + rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => { + if !started { + self.report_non_started_raw_string(start); + } + if !terminated { + self.report_unterminated_raw_string(start, n_hashes) + } + } + token => panic!("Literal type {:?} cannot be 'enclosed'", token), + } + } + + pub(super) fn verify_int_not_empty( + &self, + start: BytePos, + suffix_start: BytePos, + empty_int: bool + ) -> Result<(), ()> { + if empty_int { + self.err_span_(start, suffix_start, "no valid digits found for number"); + Err(()) + } else { + Ok(()) + } + } + + pub(super) fn verify_float_exponent_not_empty(&self, start: BytePos, empty_exponent: bool) { + if empty_exponent { + let mut err = self.struct_span_fatal( + start, + self.pos, + "expected at least one digit in exponent", + ); + err.emit(); + } + } + + pub(super) fn verify_float_base(&self, start: BytePos, suffix_start: BytePos, base: Base) { + match base { + Base::Hexadecimal => self.err_span_( + start, + suffix_start, + "hexadecimal float literal is not supported", + ), + Base::Octal => { + self.err_span_(start, suffix_start, "octal float literal is not supported") + } + Base::Binary => { + self.err_span_(start, suffix_start, "binary float literal is not supported") + } + _ => (), + } + } + + /// Checks that there is no bare CR in the provided string. + fn verify_no_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) { + let mut idx = 0; + loop { + idx = match s[idx..].find('\r') { + None => break, + Some(it) => idx + it + 1, + }; + self.err_span_(start + BytePos(idx as u32 - 1), start + BytePos(idx as u32), errmsg); + } + } + + fn report_non_started_raw_string(&self, start: BytePos) -> ! { + let bad_char = self.str_from(start).chars().last().unwrap(); + self.struct_fatal_span_char( + start, + self.pos, + "found invalid character; only `#` is allowed \ + in raw string delimitation", + bad_char, + ) + .emit(); + FatalError.raise() + } + + fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! { + let mut err = self.struct_span_fatal(start, start, "unterminated raw string"); + err.span_label(self.mk_sp(start, start), "unterminated raw string"); + + if n_hashes > 0 { + err.note(&format!( + "this raw string should be terminated with `\"{}`", + "#".repeat(n_hashes as usize) + )); + } + + err.emit(); + FatalError.raise() + } +}