From fcc2f92f454d9ce1e66713e28edb136d6948dd5a Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 18 May 2019 17:36:30 +0300 Subject: [PATCH 1/6] syntax: Return named errors from literal parsing functions --- src/libsyntax/attr/mod.rs | 4 +- src/libsyntax/parse/literal.rs | 263 ++++++++++-------- src/libsyntax/parse/parser.rs | 2 +- src/libsyntax/parse/token.rs | 25 +- src/test/ui/parser/no-hex-float-literal.rs | 2 +- .../ui/parser/no-hex-float-literal.stderr | 4 +- 6 files changed, 169 insertions(+), 131 deletions(-) diff --git a/src/libsyntax/attr/mod.rs b/src/libsyntax/attr/mod.rs index a97c094418dfb..2f75a8c9db57e 100644 --- a/src/libsyntax/attr/mod.rs +++ b/src/libsyntax/attr/mod.rs @@ -554,7 +554,7 @@ impl MetaItemKind { Some(TokenTree::Token(_, token::Eq)) => { tokens.next(); return if let Some(TokenTree::Token(span, token)) = tokens.next() { - Lit::from_token(&token, span, None).map(MetaItemKind::NameValue) + Lit::from_token(&token, span).ok().map(MetaItemKind::NameValue) } else { None }; @@ -599,7 +599,7 @@ impl NestedMetaItem { where I: Iterator, { if let Some(TokenTree::Token(span, token)) = tokens.peek().cloned() { - if let Some(lit) = Lit::from_token(&token, span, None) { + if let Ok(lit) = Lit::from_token(&token, span) { tokens.next(); return Some(NestedMetaItem::Literal(lit)); } diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index f277f0522b8e5..343aa6bc653ba 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -16,11 +16,64 @@ use syntax_pos::Span; use std::ascii; -macro_rules! err { - ($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => { - match $opt_diag { - Some(($span, $diag)) => { $($body)* } - None => return None, +crate enum LitError { + NotLiteral, + LexerError, + InvalidSuffix, + InvalidIntSuffix, + InvalidFloatSuffix, + NonDecimalFloat(&'static str), + IntTooLarge, +} + +impl LitError { + crate fn report(&self, diag: &Handler, lit: token::Lit, suf: Option, span: Span) { + match *self { + LitError::NotLiteral | LitError::LexerError => {} + LitError::InvalidSuffix => { + expect_no_suffix(diag, span, &format!("{} {}", lit.article(), lit.descr()), suf); + } + LitError::InvalidIntSuffix => { + let suf = suf.expect("suffix error with no suffix").as_str(); + if looks_like_width_suffix(&['i', 'u'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for integer literal", &suf[1..]); + diag.struct_span_err(span, &msg) + .help("valid widths are 8, 16, 32, 64 and 128") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for numeric literal", suf); + diag.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("the suffix must be one of the integral types (`u32`, `isize`, etc)") + .emit(); + } + } + LitError::InvalidFloatSuffix => { + let suf = suf.expect("suffix error with no suffix").as_str(); + if looks_like_width_suffix(&['f'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for float literal", &suf[1..]); + diag.struct_span_err(span, &msg) + .help("valid widths are 32 and 64") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for float literal", suf); + diag.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("valid suffixes are `f32` and `f64`") + .emit(); + } + } + LitError::NonDecimalFloat(descr) => { + diag.struct_span_err(span, &format!("{} float literal is not supported", descr)) + .span_label(span, "not supported") + .emit(); + } + LitError::IntTooLarge => { + diag.struct_span_err(span, "int literal is too large") + .emit(); + } } } } @@ -33,15 +86,12 @@ impl LitKind { fn from_lit_token( lit: token::Lit, suf: Option, - diag: Option<(Span, &Handler)> - ) -> Option { + ) -> Result { if suf.is_some() && !lit.may_have_suffix() { - err!(diag, |span, diag| { - expect_no_suffix(span, diag, &format!("a {}", lit.literal_name()), suf) - }); + return Err(LitError::InvalidSuffix); } - Some(match lit { + Ok(match lit { token::Bool(i) => { assert!(i == kw::True || i == kw::False); LitKind::Bool(i == kw::True) @@ -55,33 +105,33 @@ impl LitKind { token::Char(i) => { match unescape_char(&i.as_str()) { Ok(c) => LitKind::Char(c), - Err(_) => LitKind::Err(i), + Err(_) => return Err(LitError::LexerError), } }, token::Err(i) => LitKind::Err(i), // There are some valid suffixes for integer and float literals, // so all the handling is done internally. - token::Integer(s) => return integer_lit(&s.as_str(), suf, diag), - token::Float(s) => return float_lit(&s.as_str(), suf, diag), + token::Integer(s) => return integer_lit(s, suf), + token::Float(s) => return float_lit(s, suf), token::Str_(mut sym) => { // If there are no characters requiring special treatment we can // reuse the symbol from the Token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the Token. - let mut has_error = false; + let mut error = None; let s = &sym.as_str(); if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { let mut buf = String::with_capacity(s.len()); unescape_str(s, &mut |_, unescaped_char| { match unescaped_char { Ok(c) => buf.push(c), - Err(_) => has_error = true, + Err(_) => error = Some(LitError::LexerError), } }); - if has_error { - return Some(LitKind::Err(sym)); + if let Some(error) = error { + return Err(error); } sym = Symbol::intern(&buf) } @@ -99,15 +149,15 @@ impl LitKind { token::ByteStr(i) => { let s = &i.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut has_error = false; + let mut error = None; unescape_byte_str(s, &mut |_, unescaped_byte| { match unescaped_byte { Ok(c) => buf.push(c), - Err(_) => has_error = true, + Err(_) => error = Some(LitError::LexerError), } }); - if has_error { - return Some(LitKind::Err(i)); + if let Some(error) = error { + return Err(error); } buf.shrink_to_fit(); LitKind::ByteStr(Lrc::new(buf)) @@ -165,6 +215,15 @@ impl LitKind { } impl Lit { + fn from_lit_token( + token: token::Lit, + suffix: Option, + span: Span, + ) -> Result { + let node = LitKind::from_lit_token(token, suffix)?; + Ok(Lit { node, token, suffix, span }) + } + /// Converts literal token with a suffix into an AST literal. /// Works speculatively and may return `None` if diagnostic handler is not passed. /// If diagnostic handler is passed, may return `Some`, @@ -172,9 +231,8 @@ impl Lit { crate fn from_token( token: &token::Token, span: Span, - diag: Option<(Span, &Handler)>, - ) -> Option { - let (token, suffix) = match *token { + ) -> Result { + let (lit, suf) = match *token { token::Ident(ident, false) if ident.name == kw::True || ident.name == kw::False => (token::Bool(ident.name), None), token::Literal(token, suffix) => @@ -182,16 +240,15 @@ impl Lit { token::Interpolated(ref nt) => { if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { if let ast::ExprKind::Lit(lit) = &expr.node { - return Some(lit.clone()); + return Ok(lit.clone()); } } - return None; + return Err(LitError::NotLiteral); } - _ => return None, + _ => return Err(LitError::NotLiteral) }; - let node = LitKind::from_lit_token(token, suffix, diag)?; - Some(Lit { node, token, suffix, span }) + Lit::from_lit_token(lit, suf, span) } /// Attempts to recover an AST literal from semantic literal. @@ -215,13 +272,10 @@ impl Lit { impl<'a> Parser<'a> { /// Matches `lit = true | false | token_lit`. crate fn parse_lit(&mut self) -> PResult<'a, Lit> { - let diag = Some((self.span, &self.sess.span_diagnostic)); - if let Some(lit) = Lit::from_token(&self.token, self.span, diag) { - self.bump(); - return Ok(lit); - } else if self.token == token::Dot { - // Recover `.4` as `0.4`. - let recovered = self.look_ahead(1, |t| { + let mut recovered = None; + if self.token == token::Dot { + // Attempt to recover `.4` as `0.4`. + recovered = self.look_ahead(1, |t| { if let token::Literal(token::Integer(val), suf) = *t { let next_span = self.look_ahead_span(1); if self.span.hi() == next_span.lo() { @@ -232,7 +286,7 @@ impl<'a> Parser<'a> { } None }); - if let Some((token, span)) = recovered { + if let Some((ref token, span)) = recovered { self.diagnostic() .struct_span_err(span, "float literals must have an integer part") .span_suggestion( @@ -242,27 +296,37 @@ impl<'a> Parser<'a> { Applicability::MachineApplicable, ) .emit(); - let diag = Some((span, &self.sess.span_diagnostic)); - if let Some(lit) = Lit::from_token(&token, span, diag) { - self.bump(); - self.bump(); - return Ok(lit); - } + self.bump(); } } - Err(self.span_fatal(self.span, &format!("unexpected token: {}", self.this_token_descr()))) + let (token, span) = recovered.as_ref().map_or((&self.token, self.span), + |(token, span)| (token, *span)); + + match Lit::from_token(token, span) { + Ok(lit) => { + self.bump(); + return Ok(lit); + } + Err(LitError::NotLiteral) => { + let msg = format!("unexpected token: {}", self.this_token_descr()); + return Err(self.span_fatal(span, &msg)); + } + Err(err) => { + let (lit, suf) = token.expect_lit(); + self.bump(); + err.report(&self.sess.span_diagnostic, lit, suf, span); + return Ok(Lit::from_lit_token(token::Err(lit.symbol()), suf, span).ok().unwrap()); + } + } } } -crate fn expect_no_suffix(sp: Span, diag: &Handler, kind: &str, suffix: Option) { +crate fn expect_no_suffix(diag: &Handler, sp: Span, kind: &str, suffix: Option) { match suffix { None => {/* everything ok */} Some(suf) => { let text = suf.as_str(); - if text.is_empty() { - diag.span_bug(sp, "found empty literal suffix in Some") - } let mut err = if kind == "a tuple index" && ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) { @@ -318,48 +382,33 @@ fn raw_str_lit(lit: &str) -> String { res } -// check if `s` looks like i32 or u1234 etc. +// Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { - s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) + s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) } -fn filtered_float_lit(data: Symbol, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { +fn filtered_float_lit(data: Symbol, suffix: Option) -> Result { debug!("filtered_float_lit: {}, {:?}", data, suffix); let suffix = match suffix { Some(suffix) => suffix, - None => return Some(LitKind::FloatUnsuffixed(data)), + None => return Ok(LitKind::FloatUnsuffixed(data)), }; - Some(match &*suffix.as_str() { + Ok(match &*suffix.as_str() { "f32" => LitKind::Float(data, ast::FloatTy::F32), "f64" => LitKind::Float(data, ast::FloatTy::F64), - suf => { - err!(diag, |span, diag| { - if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { - // if it looks like a width, lets try to be helpful. - let msg = format!("invalid width `{}` for float literal", &suf[1..]); - diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit() - } else { - let msg = format!("invalid suffix `{}` for float literal", suf); - diag.struct_span_err(span, &msg) - .span_label(span, format!("invalid suffix `{}`", suf)) - .help("valid suffixes are `f32` and `f64`") - .emit(); - } - }); - - LitKind::FloatUnsuffixed(data) - } + _ => return Err(LitError::InvalidFloatSuffix), }) } -fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { + +fn float_lit(s: Symbol, suffix: Option) -> Result { debug!("float_lit: {:?}, {:?}", s, suffix); // FIXME #2252: bounds checking float literals is deferred until trans // Strip underscores without allocating a new String unless necessary. let s2; + let s = s.as_str(); + let s = s.get(); let s = if s.chars().any(|c| c == '_') { s2 = s.chars().filter(|&c| c != '_').collect::(); &s2 @@ -367,15 +416,17 @@ fn float_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) s }; - filtered_float_lit(Symbol::intern(s), suffix, diag) + filtered_float_lit(Symbol::intern(s), suffix) } -fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) - -> Option { +fn integer_lit(s: Symbol, suffix: Option) -> Result { // s can only be ascii, byte indexing is fine // Strip underscores without allocating a new String unless necessary. let s2; + let orig = s; + let s = s.as_str(); + let s = s.get(); let mut s = if s.chars().any(|c| c == '_') { s2 = s.chars().filter(|&c| c != '_').collect::(); &s2 @@ -386,7 +437,6 @@ fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) debug!("integer_lit: {}, {:?}", s, suffix); let mut base = 10; - let orig = s; let mut ty = ast::LitIntType::Unsuffixed; if s.starts_with('0') && s.len() > 1 { @@ -402,19 +452,15 @@ fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) if let Some(suf) = suffix { if looks_like_width_suffix(&['f'], &suf.as_str()) { let err = match base { - 16 => Some("hexadecimal float literal is not supported"), - 8 => Some("octal float literal is not supported"), - 2 => Some("binary float literal is not supported"), + 16 => Some(LitError::NonDecimalFloat("hexadecimal")), + 8 => Some(LitError::NonDecimalFloat("octal")), + 2 => Some(LitError::NonDecimalFloat("binary")), _ => None, }; if let Some(err) = err { - err!(diag, |span, diag| { - diag.struct_span_err(span, err) - .span_label(span, "not supported") - .emit(); - }); + return Err(err); } - return filtered_float_lit(Symbol::intern(s), Some(suf), diag) + return filtered_float_lit(Symbol::intern(s), Some(suf)) } } @@ -423,9 +469,6 @@ fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) } if let Some(suf) = suffix { - if suf.as_str().is_empty() { - err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some")); - } ty = match &*suf.as_str() { "isize" => ast::LitIntType::Signed(ast::IntTy::Isize), "i8" => ast::LitIntType::Signed(ast::IntTy::I8), @@ -439,48 +482,22 @@ fn integer_lit(s: &str, suffix: Option, diag: Option<(Span, &Handler)>) "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32), "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64), "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128), - suf => { - // i and u look like widths, so lets - // give an error message along those lines - err!(diag, |span, diag| { - if looks_like_width_suffix(&['i', 'u'], suf) { - let msg = format!("invalid width `{}` for integer literal", &suf[1..]); - diag.struct_span_err(span, &msg) - .help("valid widths are 8, 16, 32, 64 and 128") - .emit(); - } else { - let msg = format!("invalid suffix `{}` for numeric literal", suf); - diag.struct_span_err(span, &msg) - .span_label(span, format!("invalid suffix `{}`", suf)) - .help("the suffix must be one of the integral types \ - (`u32`, `isize`, etc)") - .emit(); - } - }); - - ty - } + _ => return Err(LitError::InvalidIntSuffix), } } debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \ string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); - Some(match u128::from_str_radix(s, base) { + Ok(match u128::from_str_radix(s, base) { Ok(r) => LitKind::Int(r, ty), Err(_) => { - // small bases are lexed as if they were base 10, e.g, the string + // Small bases are lexed as if they were base 10, e.g, the string // might be `0b10201`. This will cause the conversion above to fail, - // but these cases have errors in the lexer: we don't want to emit - // two errors, and we especially don't want to emit this error since - // it isn't necessarily true. - let already_errored = base < 10 && - s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); - - if !already_errored { - err!(diag, |span, diag| diag.span_err(span, "int literal is too large")); - } - LitKind::Int(0, ty) + // but these kinds of errors are already reported by the lexer. + let from_lexer = + base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); + return Err(if from_lexer { LitError::LexerError } else { LitError::IntTooLarge }); } }) } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 7600d6078a106..3d82344ac1e54 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -1054,7 +1054,7 @@ impl<'a> Parser<'a> { } fn expect_no_suffix(&self, sp: Span, kind: &str, suffix: Option) { - literal::expect_no_suffix(sp, &self.sess.span_diagnostic, kind, suffix) + literal::expect_no_suffix(&self.sess.span_diagnostic, sp, kind, suffix) } /// Attempts to consume a `<`. If `<<` is seen, replaces it with a single diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 663eace6b6289..f089fc024fecf 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -77,7 +77,21 @@ pub enum Lit { static_assert_size!(Lit, 8); impl Lit { - crate fn literal_name(&self) -> &'static str { + crate fn symbol(&self) -> Symbol { + match *self { + Bool(s) | Byte(s) | Char(s) | Integer(s) | Float(s) | Err(s) | + Str_(s) | StrRaw(s, _) | ByteStr(s) | ByteStrRaw(s, _) => s + } + } + + crate fn article(&self) -> &'static str { + match *self { + Integer(_) | Err(_) => "an", + _ => "a", + } + } + + crate fn descr(&self) -> &'static str { match *self { Bool(_) => panic!("literal token contains `Lit::Bool`"), Byte(_) => "byte literal", @@ -92,7 +106,7 @@ impl Lit { crate fn may_have_suffix(&self) -> bool { match *self { - Integer(..) | Float(..) => true, + Integer(..) | Float(..) | Err(..) => true, _ => false, } } @@ -318,6 +332,13 @@ impl Token { } } + crate fn expect_lit(&self) -> (Lit, Option) { + match *self { + Literal(lit, suf) => (lit, suf), + _=> panic!("`expect_lit` called on non-literal"), + } + } + /// Returns `true` if the token is any literal, a minus (which can prefix a literal, /// for example a '-42', or one of the boolean idents). crate fn can_begin_literal_or_bool(&self) -> bool { diff --git a/src/test/ui/parser/no-hex-float-literal.rs b/src/test/ui/parser/no-hex-float-literal.rs index bf11dee08338e..88574ba6f922c 100644 --- a/src/test/ui/parser/no-hex-float-literal.rs +++ b/src/test/ui/parser/no-hex-float-literal.rs @@ -4,6 +4,6 @@ fn main() { 0x567.89; //~^ ERROR hexadecimal float literal is not supported 0xDEAD.BEEFp-2f; - //~^ ERROR invalid suffix `f` for float literal + //~^ ERROR invalid suffix `f` for integer literal //~| ERROR `{integer}` is a primitive type and therefore doesn't have fields } diff --git a/src/test/ui/parser/no-hex-float-literal.stderr b/src/test/ui/parser/no-hex-float-literal.stderr index 258ab06d5ee2f..48041f34838cc 100644 --- a/src/test/ui/parser/no-hex-float-literal.stderr +++ b/src/test/ui/parser/no-hex-float-literal.stderr @@ -4,13 +4,13 @@ error: hexadecimal float literal is not supported LL | 0x567.89; | ^^^^^^^^ -error: invalid suffix `f` for float literal +error: invalid suffix `f` for integer literal --> $DIR/no-hex-float-literal.rs:6:18 | LL | 0xDEAD.BEEFp-2f; | ^^ invalid suffix `f` | - = help: valid suffixes are `f32` and `f64` + = help: the suffix must be one of the integral types (`u32`, `isize`, etc) error[E0610]: `{integer}` is a primitive type and therefore doesn't have fields --> $DIR/no-hex-float-literal.rs:2:11 From 694f76d56117bf178fb21d4a99f6b773b4ece119 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 18 May 2019 22:44:09 +0300 Subject: [PATCH 2/6] syntax: More consistent wording for some literal parsing errors --- src/libsyntax/parse/literal.rs | 4 ++-- src/test/ui/old-suffixes-are-really-forbidden.stderr | 4 ++-- src/test/ui/parser/bad-lit-suffixes.rs | 4 ++-- src/test/ui/parser/bad-lit-suffixes.stderr | 4 ++-- src/test/ui/parser/int-literal-too-large-span.rs | 2 +- src/test/ui/parser/int-literal-too-large-span.stderr | 2 +- src/test/ui/parser/issue-5544-a.rs | 2 +- src/test/ui/parser/issue-5544-a.stderr | 2 +- src/test/ui/parser/issue-5544-b.rs | 2 +- src/test/ui/parser/issue-5544-b.stderr | 2 +- src/test/ui/parser/lex-bad-numeric-literals.rs | 4 ++-- src/test/ui/parser/lex-bad-numeric-literals.stderr | 4 ++-- src/test/ui/parser/no-binary-float-literal.rs | 2 +- src/test/ui/parser/no-binary-float-literal.stderr | 2 +- 14 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 343aa6bc653ba..b4a2c6a7a855d 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -42,7 +42,7 @@ impl LitError { .help("valid widths are 8, 16, 32, 64 and 128") .emit(); } else { - let msg = format!("invalid suffix `{}` for numeric literal", suf); + let msg = format!("invalid suffix `{}` for integer literal", suf); diag.struct_span_err(span, &msg) .span_label(span, format!("invalid suffix `{}`", suf)) .help("the suffix must be one of the integral types (`u32`, `isize`, etc)") @@ -71,7 +71,7 @@ impl LitError { .emit(); } LitError::IntTooLarge => { - diag.struct_span_err(span, "int literal is too large") + diag.struct_span_err(span, "integer literal is too large") .emit(); } } diff --git a/src/test/ui/old-suffixes-are-really-forbidden.stderr b/src/test/ui/old-suffixes-are-really-forbidden.stderr index 9d1e8d071f061..ccfe60e964b2e 100644 --- a/src/test/ui/old-suffixes-are-really-forbidden.stderr +++ b/src/test/ui/old-suffixes-are-really-forbidden.stderr @@ -1,4 +1,4 @@ -error: invalid suffix `is` for numeric literal +error: invalid suffix `is` for integer literal --> $DIR/old-suffixes-are-really-forbidden.rs:2:13 | LL | let a = 1_is; @@ -6,7 +6,7 @@ LL | let a = 1_is; | = help: the suffix must be one of the integral types (`u32`, `isize`, etc) -error: invalid suffix `us` for numeric literal +error: invalid suffix `us` for integer literal --> $DIR/old-suffixes-are-really-forbidden.rs:3:13 | LL | let b = 2_us; diff --git a/src/test/ui/parser/bad-lit-suffixes.rs b/src/test/ui/parser/bad-lit-suffixes.rs index 75bed3088587c..151c6e1527ff5 100644 --- a/src/test/ui/parser/bad-lit-suffixes.rs +++ b/src/test/ui/parser/bad-lit-suffixes.rs @@ -22,8 +22,8 @@ fn main() { 1234f1024; //~ ERROR invalid width `1024` for float literal 1234.5f1024; //~ ERROR invalid width `1024` for float literal - 1234suffix; //~ ERROR invalid suffix `suffix` for numeric literal - 0b101suffix; //~ ERROR invalid suffix `suffix` for numeric literal + 1234suffix; //~ ERROR invalid suffix `suffix` for integer literal + 0b101suffix; //~ ERROR invalid suffix `suffix` for integer literal 1.0suffix; //~ ERROR invalid suffix `suffix` for float literal 1.0e10suffix; //~ ERROR invalid suffix `suffix` for float literal } diff --git a/src/test/ui/parser/bad-lit-suffixes.stderr b/src/test/ui/parser/bad-lit-suffixes.stderr index de194f4820de0..e53b1498332d1 100644 --- a/src/test/ui/parser/bad-lit-suffixes.stderr +++ b/src/test/ui/parser/bad-lit-suffixes.stderr @@ -78,7 +78,7 @@ LL | 1234.5f1024; | = help: valid widths are 32 and 64 -error: invalid suffix `suffix` for numeric literal +error: invalid suffix `suffix` for integer literal --> $DIR/bad-lit-suffixes.rs:25:5 | LL | 1234suffix; @@ -86,7 +86,7 @@ LL | 1234suffix; | = help: the suffix must be one of the integral types (`u32`, `isize`, etc) -error: invalid suffix `suffix` for numeric literal +error: invalid suffix `suffix` for integer literal --> $DIR/bad-lit-suffixes.rs:26:5 | LL | 0b101suffix; diff --git a/src/test/ui/parser/int-literal-too-large-span.rs b/src/test/ui/parser/int-literal-too-large-span.rs index 206242c3c7fa6..666ca93505976 100644 --- a/src/test/ui/parser/int-literal-too-large-span.rs +++ b/src/test/ui/parser/int-literal-too-large-span.rs @@ -2,6 +2,6 @@ fn main() { 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 - //~^ ERROR int literal is too large + //~^ ERROR integer literal is too large ; // the span shouldn't point to this. } diff --git a/src/test/ui/parser/int-literal-too-large-span.stderr b/src/test/ui/parser/int-literal-too-large-span.stderr index d7774c333c29a..7cae85fc9fe6d 100644 --- a/src/test/ui/parser/int-literal-too-large-span.stderr +++ b/src/test/ui/parser/int-literal-too-large-span.stderr @@ -1,4 +1,4 @@ -error: int literal is too large +error: integer literal is too large --> $DIR/int-literal-too-large-span.rs:4:5 | LL | 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 diff --git a/src/test/ui/parser/issue-5544-a.rs b/src/test/ui/parser/issue-5544-a.rs index 303b902c49904..3c239c73b9d70 100644 --- a/src/test/ui/parser/issue-5544-a.rs +++ b/src/test/ui/parser/issue-5544-a.rs @@ -1,4 +1,4 @@ fn main() { let __isize = 340282366920938463463374607431768211456; // 2^128 - //~^ ERROR int literal is too large + //~^ ERROR integer literal is too large } diff --git a/src/test/ui/parser/issue-5544-a.stderr b/src/test/ui/parser/issue-5544-a.stderr index bc48853b380e6..de579c3c134e5 100644 --- a/src/test/ui/parser/issue-5544-a.stderr +++ b/src/test/ui/parser/issue-5544-a.stderr @@ -1,4 +1,4 @@ -error: int literal is too large +error: integer literal is too large --> $DIR/issue-5544-a.rs:2:19 | LL | let __isize = 340282366920938463463374607431768211456; // 2^128 diff --git a/src/test/ui/parser/issue-5544-b.rs b/src/test/ui/parser/issue-5544-b.rs index fa40f15dedb5c..93f2ff271364e 100644 --- a/src/test/ui/parser/issue-5544-b.rs +++ b/src/test/ui/parser/issue-5544-b.rs @@ -1,4 +1,4 @@ fn main() { let __isize = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff_ff; - //~^ ERROR int literal is too large + //~^ ERROR integer literal is too large } diff --git a/src/test/ui/parser/issue-5544-b.stderr b/src/test/ui/parser/issue-5544-b.stderr index 3f15c0c0d755c..7df212dedfede 100644 --- a/src/test/ui/parser/issue-5544-b.stderr +++ b/src/test/ui/parser/issue-5544-b.stderr @@ -1,4 +1,4 @@ -error: int literal is too large +error: integer literal is too large --> $DIR/issue-5544-b.rs:2:19 | LL | let __isize = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff_ff; diff --git a/src/test/ui/parser/lex-bad-numeric-literals.rs b/src/test/ui/parser/lex-bad-numeric-literals.rs index 9496ad305fc2c..440150da17005 100644 --- a/src/test/ui/parser/lex-bad-numeric-literals.rs +++ b/src/test/ui/parser/lex-bad-numeric-literals.rs @@ -13,8 +13,8 @@ fn main() { 0o; //~ ERROR: no valid digits 1e+; //~ ERROR: expected at least one digit in exponent 0x539.0; //~ ERROR: hexadecimal float literal is not supported - 9900000000000000000000000000999999999999999999999999999999; //~ ERROR: int literal is too large - 9900000000000000000000000000999999999999999999999999999999; //~ ERROR: int literal is too large + 9900000000000000000000000000999999999999999999999999999999; //~ ERROR: integer literal is too large + 9900000000000000000000000000999999999999999999999999999999; //~ ERROR: integer literal is too large 0x; //~ ERROR: no valid digits 0xu32; //~ ERROR: no valid digits 0ou32; //~ ERROR: no valid digits diff --git a/src/test/ui/parser/lex-bad-numeric-literals.stderr b/src/test/ui/parser/lex-bad-numeric-literals.stderr index 466d7af1bda47..2058ea63ce4bd 100644 --- a/src/test/ui/parser/lex-bad-numeric-literals.stderr +++ b/src/test/ui/parser/lex-bad-numeric-literals.stderr @@ -112,13 +112,13 @@ error: octal float literal is not supported LL | 0o2f32; | ^^^^^^ not supported -error: int literal is too large +error: integer literal is too large --> $DIR/lex-bad-numeric-literals.rs:16:5 | LL | 9900000000000000000000000000999999999999999999999999999999; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -error: int literal is too large +error: integer literal is too large --> $DIR/lex-bad-numeric-literals.rs:17:5 | LL | 9900000000000000000000000000999999999999999999999999999999; diff --git a/src/test/ui/parser/no-binary-float-literal.rs b/src/test/ui/parser/no-binary-float-literal.rs index a42d2cbc442f0..052cb4934f503 100644 --- a/src/test/ui/parser/no-binary-float-literal.rs +++ b/src/test/ui/parser/no-binary-float-literal.rs @@ -4,5 +4,5 @@ fn main() { 0b101.010; //~^ ERROR binary float literal is not supported 0b101p4f64; - //~^ ERROR invalid suffix `p4f64` for numeric literal + //~^ ERROR invalid suffix `p4f64` for integer literal } diff --git a/src/test/ui/parser/no-binary-float-literal.stderr b/src/test/ui/parser/no-binary-float-literal.stderr index 21f415bcfb000..65b129b5827ce 100644 --- a/src/test/ui/parser/no-binary-float-literal.stderr +++ b/src/test/ui/parser/no-binary-float-literal.stderr @@ -10,7 +10,7 @@ error: binary float literal is not supported LL | 0b101010f64; | ^^^^^^^^^^^ not supported -error: invalid suffix `p4f64` for numeric literal +error: invalid suffix `p4f64` for integer literal --> $DIR/no-binary-float-literal.rs:6:5 | LL | 0b101p4f64; From 9450e7d142f53fc9daad87866cdc7af144013264 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 18 May 2019 22:45:24 +0300 Subject: [PATCH 3/6] syntax: Fix spans for boolean literals passed to proc macros --- src/libsyntax/parse/literal.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index b4a2c6a7a855d..4ddf3ddbcee60 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -3,7 +3,7 @@ use crate::ast::{self, Ident, Lit, LitKind}; use crate::parse::parser::Parser; use crate::parse::PResult; -use crate::parse::token::{self, Token}; +use crate::parse::token; use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; use crate::print::pprust; use crate::symbol::{kw, Symbol}; @@ -117,9 +117,9 @@ impl LitKind { token::Str_(mut sym) => { // If there are no characters requiring special treatment we can - // reuse the symbol from the Token. Otherwise, we must generate a + // reuse the symbol from the token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the - // string in the Token. + // string in the token. let mut error = None; let s = &sym.as_str(); if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { @@ -262,8 +262,8 @@ impl Lit { /// Losslessly convert an AST literal into a token stream. crate fn tokens(&self) -> TokenStream { let token = match self.token { - token::Bool(symbol) => Token::Ident(Ident::with_empty_ctxt(symbol), false), - token => Token::Literal(token, self.suffix), + token::Bool(symbol) => token::Ident(Ident::new(symbol, self.span), false), + token => token::Literal(token, self.suffix), }; TokenTree::Token(self.span, token).into() } From 558559e70f648ff518da5ada726da2f04b617197 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 18 May 2019 22:46:27 +0300 Subject: [PATCH 4/6] syntax: Remove an obsolete hack from literal comparisons --- src/libsyntax/parse/token.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index f089fc024fecf..fbc27d1999d86 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -110,12 +110,6 @@ impl Lit { _ => false, } } - - // See comments in `Nonterminal::to_tokenstream` for why we care about - // *probably* equal here rather than actual equality - fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool { - mem::discriminant(self) == mem::discriminant(other) - } } pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool { @@ -590,9 +584,7 @@ impl Token { a.name == kw::DollarCrate || c.name == kw::DollarCrate), - (&Literal(ref a, b), &Literal(ref c, d)) => { - b == d && a.probably_equal_for_proc_macro(c) - } + (&Literal(a, b), &Literal(c, d)) => b == d && a == c, (&Interpolated(_), &Interpolated(_)) => false, From ca2a50fad7439f02a9e99f8107ffed8460fc8c44 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 19 May 2019 01:04:26 +0300 Subject: [PATCH 5/6] syntax: Turn `token::Lit` into a struct --- src/librustc/hir/print.rs | 3 +- src/librustc/ich/impls_syntax.rs | 34 ++++--- src/librustdoc/html/highlight.rs | 14 +-- src/libsyntax/ast.rs | 2 - src/libsyntax/diagnostics/plugin.rs | 4 +- src/libsyntax/parse/lexer/mod.rs | 105 ++++++++++---------- src/libsyntax/parse/literal.rs | 130 ++++++++++++++----------- src/libsyntax/parse/parser.rs | 41 ++++---- src/libsyntax/parse/token.rs | 95 +++++++++--------- src/libsyntax/print/pprust.rs | 36 +++---- src/libsyntax_ext/assert.rs | 15 ++- src/libsyntax_ext/proc_macro_server.rs | 82 +++++----------- 12 files changed, 275 insertions(+), 286 deletions(-) diff --git a/src/librustc/hir/print.rs b/src/librustc/hir/print.rs index e51681082f319..c5337381a3d4f 100644 --- a/src/librustc/hir/print.rs +++ b/src/librustc/hir/print.rs @@ -1249,8 +1249,7 @@ impl<'a> State<'a> { fn print_literal(&mut self, lit: &hir::Lit) -> io::Result<()> { self.maybe_print_comment(lit.span.lo())?; - let (token, suffix) = lit.node.to_lit_token(); - self.writer().word(pprust::literal_to_string(token, suffix)) + self.writer().word(pprust::literal_to_string(lit.node.to_lit_token())) } pub fn print_expr(&mut self, expr: &hir::Expr) -> io::Result<()> { diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs index 4e5718cc5ef2a..af53f686ae548 100644 --- a/src/librustc/ich/impls_syntax.rs +++ b/src/librustc/ich/impls_syntax.rs @@ -165,7 +165,6 @@ impl_stable_hash_for!(enum ::syntax::ast::LitIntType { impl_stable_hash_for!(struct ::syntax::ast::Lit { node, token, - suffix, span }); @@ -288,17 +287,23 @@ for tokenstream::TokenStream { } } -impl_stable_hash_for!(enum token::Lit { - Bool(val), - Byte(val), - Char(val), - Err(val), - Integer(val), - Float(val), - Str_(val), - ByteStr(val), - StrRaw(val, n), - ByteStrRaw(val, n) +impl_stable_hash_for!(enum token::LitKind { + Bool, + Byte, + Char, + Integer, + Float, + Str, + ByteStr, + StrRaw(n), + ByteStrRaw(n), + Err +}); + +impl_stable_hash_for!(struct token::Lit { + kind, + symbol, + suffix }); fn hash_token<'a, 'gcx, W: StableHasherResult>( @@ -348,10 +353,7 @@ fn hash_token<'a, 'gcx, W: StableHasherResult>( token::Token::CloseDelim(delim_token) => { std_hash::Hash::hash(&delim_token, hasher); } - token::Token::Literal(lit, opt_name) => { - lit.hash_stable(hcx, hasher); - opt_name.hash_stable(hcx, hasher); - } + token::Token::Literal(lit) => lit.hash_stable(hcx, hasher), token::Token::Ident(ident, is_raw) => { ident.name.hash_stable(hcx, hasher); diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index e5b44077fc993..932419c78f22c 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -310,17 +310,17 @@ impl<'a> Classifier<'a> { } } - token::Literal(lit, _suf) => { - match lit { + token::Literal(lit) => { + match lit.kind { // Text literals. - token::Byte(..) | token::Char(..) | token::Err(..) | - token::ByteStr(..) | token::ByteStrRaw(..) | - token::Str_(..) | token::StrRaw(..) => Class::String, + token::Byte | token::Char | token::Err | + token::ByteStr | token::ByteStrRaw(..) | + token::Str | token::StrRaw(..) => Class::String, // Number literals. - token::Integer(..) | token::Float(..) => Class::Number, + token::Integer | token::Float => Class::Number, - token::Bool(..) => panic!("literal token contains `Lit::Bool`"), + token::Bool => panic!("literal token contains `Lit::Bool`"), } } diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 064459e750fbe..84ef0468cac7a 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -1347,8 +1347,6 @@ pub enum StrStyle { pub struct Lit { /// The original literal token as written in source code. pub token: token::Lit, - /// The original literal suffix as written in source code. - pub suffix: Option, /// The "semantic" representation of the literal lowered from the original tokens. /// Strings are unescaped, hexadecimal forms are eliminated, etc. /// FIXME: Remove this and only create the semantic representation during lowering to HIR. diff --git a/src/libsyntax/diagnostics/plugin.rs b/src/libsyntax/diagnostics/plugin.rs index e9476e2fdfd51..0c57c23b2b5c4 100644 --- a/src/libsyntax/diagnostics/plugin.rs +++ b/src/libsyntax/diagnostics/plugin.rs @@ -77,8 +77,8 @@ pub fn expand_register_diagnostic<'cx>(ecx: &'cx mut ExtCtxt<'_>, }, (3, Some(&TokenTree::Token(_, token::Ident(ref code, _))), Some(&TokenTree::Token(_, token::Comma)), - Some(&TokenTree::Token(_, token::Literal(token::StrRaw(description, _), None)))) => { - (code, Some(description)) + Some(&TokenTree::Token(_, token::Literal(token::Lit { symbol, .. })))) => { + (code, Some(symbol)) } _ => unreachable!() }; diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index c97d804076157..deb76d6d70a33 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1,5 +1,6 @@ use crate::ast::{self, Ident}; -use crate::parse::{token, ParseSess}; +use crate::parse::ParseSess; +use crate::parse::token::{self, Token}; use crate::symbol::Symbol; use crate::parse::unescape; use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char}; @@ -21,7 +22,7 @@ mod unicode_chars; #[derive(Clone, Debug)] pub struct TokenAndSpan { - pub tok: token::Token, + pub tok: Token, pub sp: Span, } @@ -55,7 +56,7 @@ pub struct StringReader<'a> { /// Stop reading src at this index. crate end_src_index: usize, // cached: - peek_tok: token::Token, + peek_tok: Token, peek_span: Span, peek_span_src_raw: Span, fatal_errs: Vec>, @@ -726,7 +727,7 @@ impl<'a> StringReader<'a> { } /// Lex a LIT_INTEGER or a LIT_FLOAT - fn scan_number(&mut self, c: char) -> token::Lit { + fn scan_number(&mut self, c: char) -> (token::LitKind, Symbol) { let mut base = 10; let start_bpos = self.pos; self.bump(); @@ -753,7 +754,7 @@ impl<'a> StringReader<'a> { } _ => { // just a 0 - return token::Integer(self.name_from(start_bpos)); + return (token::Integer, self.name_from(start_bpos)); } } } else if c.is_digit(10) { @@ -765,7 +766,7 @@ impl<'a> StringReader<'a> { if num_digits == 0 { self.err_span_(start_bpos, self.pos, "no valid digits found for number"); - return token::Integer(Symbol::intern("0")); + return (token::Integer, Symbol::intern("0")); } // might be a float, but don't be greedy if this is actually an @@ -783,17 +784,17 @@ impl<'a> StringReader<'a> { let pos = self.pos; self.check_float_base(start_bpos, pos, base); - token::Float(self.name_from(start_bpos)) + (token::Float, self.name_from(start_bpos)) } else { // it might be a float if it has an exponent if self.ch_is('e') || self.ch_is('E') { self.scan_float_exponent(); let pos = self.pos; self.check_float_base(start_bpos, pos, base); - return token::Float(self.name_from(start_bpos)); + return (token::Float, self.name_from(start_bpos)); } // but we certainly have an integer! - token::Integer(self.name_from(start_bpos)) + (token::Integer, self.name_from(start_bpos)) } } @@ -846,7 +847,7 @@ impl<'a> StringReader<'a> { } } - fn binop(&mut self, op: token::BinOpToken) -> token::Token { + fn binop(&mut self, op: token::BinOpToken) -> Token { self.bump(); if self.ch_is('=') { self.bump(); @@ -858,7 +859,7 @@ impl<'a> StringReader<'a> { /// Returns the next token from the string, advances the input past that /// token, and updates the interner - fn next_token_inner(&mut self) -> Result { + fn next_token_inner(&mut self) -> Result { let c = self.ch; if ident_start(c) { @@ -912,10 +913,10 @@ impl<'a> StringReader<'a> { } if is_dec_digit(c) { - let num = self.scan_number(c.unwrap()); + let (kind, symbol) = self.scan_number(c.unwrap()); let suffix = self.scan_optional_raw_name(); - debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix); - return Ok(token::Literal(num, suffix)); + debug!("next_token_inner: scanned number {:?}, {:?}, {:?}", kind, symbol, suffix); + return Ok(Token::lit(kind, symbol, suffix)); } match c.expect("next_token_inner called at EOF") { @@ -1073,10 +1074,10 @@ impl<'a> StringReader<'a> { // lifetimes shouldn't end with a single quote // if we find one, then this is an invalid character literal if self.ch_is('\'') { - let id = self.name_from(start); + let symbol = self.name_from(start); self.bump(); self.validate_char_escape(start_with_quote); - return Ok(token::Literal(token::Char(id), None)) + return Ok(Token::lit(token::Char, symbol, None)); } // Include the leading `'` in the real identifier, for macro @@ -1098,43 +1099,43 @@ impl<'a> StringReader<'a> { return Ok(token::Lifetime(ident)); } let msg = "unterminated character literal"; - let id = self.scan_single_quoted_string(start_with_quote, msg); + let symbol = self.scan_single_quoted_string(start_with_quote, msg); self.validate_char_escape(start_with_quote); let suffix = self.scan_optional_raw_name(); - Ok(token::Literal(token::Char(id), suffix)) + Ok(Token::lit(token::Char, symbol, suffix)) } 'b' => { self.bump(); - let lit = match self.ch { + let (kind, symbol) = match self.ch { Some('\'') => { let start_with_quote = self.pos; self.bump(); let msg = "unterminated byte constant"; - let id = self.scan_single_quoted_string(start_with_quote, msg); + let symbol = self.scan_single_quoted_string(start_with_quote, msg); self.validate_byte_escape(start_with_quote); - token::Byte(id) + (token::Byte, symbol) }, Some('"') => { let start_with_quote = self.pos; let msg = "unterminated double quote byte string"; - let id = self.scan_double_quoted_string(msg); + let symbol = self.scan_double_quoted_string(msg); self.validate_byte_str_escape(start_with_quote); - token::ByteStr(id) + (token::ByteStr, symbol) }, Some('r') => self.scan_raw_byte_string(), _ => unreachable!(), // Should have been a token::Ident above. }; let suffix = self.scan_optional_raw_name(); - Ok(token::Literal(lit, suffix)) + Ok(Token::lit(kind, symbol, suffix)) } '"' => { let start_with_quote = self.pos; let msg = "unterminated double quote string"; - let id = self.scan_double_quoted_string(msg); + let symbol = self.scan_double_quoted_string(msg); self.validate_str_escape(start_with_quote); let suffix = self.scan_optional_raw_name(); - Ok(token::Literal(token::Str_(id), suffix)) + Ok(Token::lit(token::Str, symbol, suffix)) } 'r' => { let start_bpos = self.pos; @@ -1205,14 +1206,14 @@ impl<'a> StringReader<'a> { } self.bump(); - let id = if valid { + let symbol = if valid { self.name_from_to(content_start_bpos, content_end_bpos) } else { Symbol::intern("??") }; let suffix = self.scan_optional_raw_name(); - Ok(token::Literal(token::StrRaw(id, hash_count), suffix)) + Ok(Token::lit(token::StrRaw(hash_count), symbol, suffix)) } '-' => { if self.nextch_is('>') { @@ -1366,7 +1367,7 @@ impl<'a> StringReader<'a> { id } - fn scan_raw_byte_string(&mut self) -> token::Lit { + fn scan_raw_byte_string(&mut self) -> (token::LitKind, Symbol) { let start_bpos = self.pos; self.bump(); let mut hash_count = 0; @@ -1423,7 +1424,7 @@ impl<'a> StringReader<'a> { self.bump(); - token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos), hash_count) + (token::ByteStrRaw(hash_count), self.name_from_to(content_start_bpos, content_end_bpos)) } fn validate_char_escape(&self, start_with_quote: BytePos) { @@ -1637,15 +1638,19 @@ mod tests { // check that the given reader produces the desired stream // of tokens (stop checking after exhausting the expected vec) - fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec) { + fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec) { for expected_tok in &expected { assert_eq!(&string_reader.next_token().tok, expected_tok); } } // make the identifier by looking up the string in the interner - fn mk_ident(id: &str) -> token::Token { - token::Token::from_ast_ident(Ident::from_str(id)) + fn mk_ident(id: &str) -> Token { + Token::from_ast_ident(Ident::from_str(id)) + } + + fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> Token { + Token::lit(kind, Symbol::intern(symbol), suffix.map(Symbol::intern)) } #[test] @@ -1694,7 +1699,7 @@ mod tests { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token().tok, - token::Literal(token::Char(Symbol::intern("a")), None)); + mk_lit(token::Char, "a", None)); }) } @@ -1704,7 +1709,7 @@ mod tests { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token().tok, - token::Literal(token::Char(Symbol::intern(" ")), None)); + mk_lit(token::Char, " ", None)); }) } @@ -1714,7 +1719,7 @@ mod tests { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); assert_eq!(setup(&sm, &sh, "'\\n'".to_string()).next_token().tok, - token::Literal(token::Char(Symbol::intern("\\n")), None)); + mk_lit(token::Char, "\\n", None)); }) } @@ -1724,7 +1729,7 @@ mod tests { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); assert_eq!(setup(&sm, &sh, "'abc".to_string()).next_token().tok, - token::Lifetime(Ident::from_str("'abc"))); + token::Lifetime(Ident::from_str("'abc"))); }) } @@ -1733,10 +1738,8 @@ mod tests { with_default_globals(|| { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()) - .next_token() - .tok, - token::Literal(token::StrRaw(Symbol::intern("\"#a\\b\x00c\""), 3), None)); + assert_eq!(setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token().tok, + mk_lit(token::StrRaw(3), "\"#a\\b\x00c\"", None)); }) } @@ -1748,18 +1751,16 @@ mod tests { macro_rules! test { ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ assert_eq!(setup(&sm, &sh, format!("{}suffix", $input)).next_token().tok, - token::Literal(token::$tok_type(Symbol::intern($tok_contents)), - Some(Symbol::intern("suffix")))); + mk_lit(token::$tok_type, $tok_contents, Some("suffix"))); // with a whitespace separator: assert_eq!(setup(&sm, &sh, format!("{} suffix", $input)).next_token().tok, - token::Literal(token::$tok_type(Symbol::intern($tok_contents)), - None)); + mk_lit(token::$tok_type, $tok_contents, None)); }} } test!("'a'", Char, "a"); test!("b'a'", Byte, "a"); - test!("\"a\"", Str_, "a"); + test!("\"a\"", Str, "a"); test!("b\"a\"", ByteStr, "a"); test!("1234", Integer, "1234"); test!("0b101", Integer, "0b101"); @@ -1768,14 +1769,11 @@ mod tests { test!("1.0e10", Float, "1.0e10"); assert_eq!(setup(&sm, &sh, "2us".to_string()).next_token().tok, - token::Literal(token::Integer(Symbol::intern("2")), - Some(Symbol::intern("us")))); + mk_lit(token::Integer, "2", Some("us"))); assert_eq!(setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token().tok, - token::Literal(token::StrRaw(Symbol::intern("raw"), 3), - Some(Symbol::intern("suffix")))); + mk_lit(token::StrRaw(3), "raw", Some("suffix"))); assert_eq!(setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token().tok, - token::Literal(token::ByteStrRaw(Symbol::intern("raw"), 3), - Some(Symbol::intern("suffix")))); + mk_lit(token::ByteStrRaw(3), "raw", Some("suffix"))); }) } @@ -1796,8 +1794,7 @@ mod tests { token::Comment => {} _ => panic!("expected a comment!"), } - assert_eq!(lexer.next_token().tok, - token::Literal(token::Char(Symbol::intern("a")), None)); + assert_eq!(lexer.next_token().tok, mk_lit(token::Char, "a", None)); }) } diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 4ddf3ddbcee60..7554c7119932e 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -3,7 +3,7 @@ use crate::ast::{self, Ident, Lit, LitKind}; use crate::parse::parser::Parser; use crate::parse::PResult; -use crate::parse::token; +use crate::parse::token::{self, Token}; use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; use crate::print::pprust; use crate::symbol::{kw, Symbol}; @@ -27,14 +27,21 @@ crate enum LitError { } impl LitError { - crate fn report(&self, diag: &Handler, lit: token::Lit, suf: Option, span: Span) { + crate fn report( + &self, + diag: &Handler, + token::Lit { kind, suffix, .. }: token::Lit, + span: Span, + ) { match *self { LitError::NotLiteral | LitError::LexerError => {} LitError::InvalidSuffix => { - expect_no_suffix(diag, span, &format!("{} {}", lit.article(), lit.descr()), suf); + expect_no_suffix( + diag, span, &format!("{} {}", kind.article(), kind.descr()), suffix + ); } LitError::InvalidIntSuffix => { - let suf = suf.expect("suffix error with no suffix").as_str(); + let suf = suffix.expect("suffix error with no suffix").as_str(); if looks_like_width_suffix(&['i', 'u'], &suf) { // If it looks like a width, try to be helpful. let msg = format!("invalid width `{}` for integer literal", &suf[1..]); @@ -50,7 +57,7 @@ impl LitError { } } LitError::InvalidFloatSuffix => { - let suf = suf.expect("suffix error with no suffix").as_str(); + let suf = suffix.expect("suffix error with no suffix").as_str(); if looks_like_width_suffix(&['f'], &suf) { // If it looks like a width, try to be helpful. let msg = format!("invalid width `{}` for float literal", &suf[1..]); @@ -84,43 +91,42 @@ impl LitKind { /// If diagnostic handler is passed, always returns `Some`, /// possibly after reporting non-fatal errors and recovery. fn from_lit_token( - lit: token::Lit, - suf: Option, + token::Lit { kind, symbol, suffix }: token::Lit, ) -> Result { - if suf.is_some() && !lit.may_have_suffix() { + if suffix.is_some() && !kind.may_have_suffix() { return Err(LitError::InvalidSuffix); } - Ok(match lit { - token::Bool(i) => { - assert!(i == kw::True || i == kw::False); - LitKind::Bool(i == kw::True) + Ok(match kind { + token::Bool => { + assert!(symbol == kw::True || symbol == kw::False); + LitKind::Bool(symbol == kw::True) } - token::Byte(i) => { - match unescape_byte(&i.as_str()) { + token::Byte => { + match unescape_byte(&symbol.as_str()) { Ok(c) => LitKind::Byte(c), - Err(_) => LitKind::Err(i), + Err(_) => return Err(LitError::LexerError), } }, - token::Char(i) => { - match unescape_char(&i.as_str()) { + token::Char => { + match unescape_char(&symbol.as_str()) { Ok(c) => LitKind::Char(c), Err(_) => return Err(LitError::LexerError), } }, - token::Err(i) => LitKind::Err(i), // There are some valid suffixes for integer and float literals, // so all the handling is done internally. - token::Integer(s) => return integer_lit(s, suf), - token::Float(s) => return float_lit(s, suf), + token::Integer => return integer_lit(symbol, suffix), + token::Float => return float_lit(symbol, suffix), - token::Str_(mut sym) => { + token::Str => { // If there are no characters requiring special treatment we can // reuse the symbol from the token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the token. let mut error = None; + let mut sym = symbol; let s = &sym.as_str(); if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { let mut buf = String::with_capacity(s.len()); @@ -138,16 +144,17 @@ impl LitKind { LitKind::Str(sym, ast::StrStyle::Cooked) } - token::StrRaw(mut sym, n) => { + token::StrRaw(n) => { // Ditto. + let mut sym = symbol; let s = &sym.as_str(); if s.contains('\r') { sym = Symbol::intern(&raw_str_lit(s)); } LitKind::Str(sym, ast::StrStyle::Raw(n)) } - token::ByteStr(i) => { - let s = &i.as_str(); + token::ByteStr => { + let s = &symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); let mut error = None; unescape_byte_str(s, &mut |_, unescaped_byte| { @@ -162,36 +169,37 @@ impl LitKind { buf.shrink_to_fit(); LitKind::ByteStr(Lrc::new(buf)) } - token::ByteStrRaw(i, _) => { - LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())) + token::ByteStrRaw(_) => { + LitKind::ByteStr(Lrc::new(symbol.to_string().into_bytes())) } + token::Err => LitKind::Err(symbol), }) } /// Attempts to recover a token from semantic literal. /// This function is used when the original token doesn't exist (e.g. the literal is created /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn to_lit_token(&self) -> (token::Lit, Option) { - match *self { + pub fn to_lit_token(&self) -> token::Lit { + let (kind, symbol, suffix) = match *self { LitKind::Str(string, ast::StrStyle::Cooked) => { let escaped = string.as_str().escape_default().to_string(); - (token::Lit::Str_(Symbol::intern(&escaped)), None) + (token::Str, Symbol::intern(&escaped), None) } LitKind::Str(string, ast::StrStyle::Raw(n)) => { - (token::Lit::StrRaw(string, n), None) + (token::StrRaw(n), string, None) } LitKind::ByteStr(ref bytes) => { let string = bytes.iter().cloned().flat_map(ascii::escape_default) .map(Into::::into).collect::(); - (token::Lit::ByteStr(Symbol::intern(&string)), None) + (token::ByteStr, Symbol::intern(&string), None) } LitKind::Byte(byte) => { let string: String = ascii::escape_default(byte).map(Into::::into).collect(); - (token::Lit::Byte(Symbol::intern(&string)), None) + (token::Byte, Symbol::intern(&string), None) } LitKind::Char(ch) => { let string: String = ch.escape_default().map(Into::::into).collect(); - (token::Lit::Char(Symbol::intern(&string)), None) + (token::Char, Symbol::intern(&string), None) } LitKind::Int(n, ty) => { let suffix = match ty { @@ -199,29 +207,33 @@ impl LitKind { ast::LitIntType::Signed(ty) => Some(Symbol::intern(ty.ty_to_string())), ast::LitIntType::Unsuffixed => None, }; - (token::Lit::Integer(Symbol::intern(&n.to_string())), suffix) + (token::Integer, Symbol::intern(&n.to_string()), suffix) } LitKind::Float(symbol, ty) => { - (token::Lit::Float(symbol), Some(Symbol::intern(ty.ty_to_string()))) + (token::Float, symbol, Some(Symbol::intern(ty.ty_to_string()))) + } + LitKind::FloatUnsuffixed(symbol) => { + (token::Float, symbol, None) } - LitKind::FloatUnsuffixed(symbol) => (token::Lit::Float(symbol), None), LitKind::Bool(value) => { - let kw = if value { kw::True } else { kw::False }; - (token::Lit::Bool(kw), None) + let symbol = if value { kw::True } else { kw::False }; + (token::Bool, symbol, None) } - LitKind::Err(val) => (token::Lit::Err(val), None), - } + LitKind::Err(symbol) => { + (token::Err, symbol, None) + } + }; + + token::Lit::new(kind, symbol, suffix) } } impl Lit { fn from_lit_token( token: token::Lit, - suffix: Option, span: Span, ) -> Result { - let node = LitKind::from_lit_token(token, suffix)?; - Ok(Lit { node, token, suffix, span }) + Ok(Lit { token, node: LitKind::from_lit_token(token)?, span }) } /// Converts literal token with a suffix into an AST literal. @@ -232,11 +244,11 @@ impl Lit { token: &token::Token, span: Span, ) -> Result { - let (lit, suf) = match *token { + let lit = match *token { token::Ident(ident, false) if ident.name == kw::True || ident.name == kw::False => - (token::Bool(ident.name), None), - token::Literal(token, suffix) => - (token, suffix), + token::Lit::new(token::Bool, ident.name, None), + token::Literal(lit) => + lit, token::Interpolated(ref nt) => { if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { if let ast::ExprKind::Lit(lit) = &expr.node { @@ -248,22 +260,21 @@ impl Lit { _ => return Err(LitError::NotLiteral) }; - Lit::from_lit_token(lit, suf, span) + Lit::from_lit_token(lit, span) } /// Attempts to recover an AST literal from semantic literal. /// This function is used when the original token doesn't exist (e.g. the literal is created /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). pub fn from_lit_kind(node: LitKind, span: Span) -> Lit { - let (token, suffix) = node.to_lit_token(); - Lit { node, token, suffix, span } + Lit { token: node.to_lit_token(), node, span } } /// Losslessly convert an AST literal into a token stream. crate fn tokens(&self) -> TokenStream { - let token = match self.token { - token::Bool(symbol) => token::Ident(Ident::new(symbol, self.span), false), - token => token::Literal(token, self.suffix), + let token = match self.token.kind { + token::Bool => token::Ident(Ident::new(self.token.symbol, self.span), false), + _ => token::Literal(self.token), }; TokenTree::Token(self.span, token).into() } @@ -276,11 +287,11 @@ impl<'a> Parser<'a> { if self.token == token::Dot { // Attempt to recover `.4` as `0.4`. recovered = self.look_ahead(1, |t| { - if let token::Literal(token::Integer(val), suf) = *t { + if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = *t { let next_span = self.look_ahead_span(1); if self.span.hi() == next_span.lo() { - let sym = String::from("0.") + &val.as_str(); - let token = token::Literal(token::Float(Symbol::intern(&sym)), suf); + let s = String::from("0.") + &symbol.as_str(); + let token = Token::lit(token::Float, Symbol::intern(&s), suffix); return Some((token, self.span.to(next_span))); } } @@ -313,10 +324,11 @@ impl<'a> Parser<'a> { return Err(self.span_fatal(span, &msg)); } Err(err) => { - let (lit, suf) = token.expect_lit(); + let lit = token.expect_lit(); self.bump(); - err.report(&self.sess.span_diagnostic, lit, suf, span); - return Ok(Lit::from_lit_token(token::Err(lit.symbol()), suf, span).ok().unwrap()); + err.report(&self.sess.span_diagnostic, lit, span); + let lit = token::Lit::new(token::Err, lit.symbol, lit.suffix); + return Ok(Lit::from_lit_token(lit, span).ok().unwrap()); } } } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 3d82344ac1e54..ae3665c834bd3 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -352,10 +352,12 @@ impl TokenCursor { let body = TokenTree::Delimited( delim_span, token::Bracket, - [TokenTree::Token(sp, token::Ident(ast::Ident::with_empty_ctxt(sym::doc), false)), - TokenTree::Token(sp, token::Eq), - TokenTree::Token(sp, token::Literal( - token::StrRaw(Symbol::intern(&stripped), num_of_hashes), None)) + [ + TokenTree::Token(sp, token::Ident(ast::Ident::with_empty_ctxt(sym::doc), false)), + TokenTree::Token(sp, token::Eq), + TokenTree::Token(sp, token::Token::lit( + token::StrRaw(num_of_hashes), Symbol::intern(&stripped), None + )), ] .iter().cloned().collect::().into(), ); @@ -2241,10 +2243,10 @@ impl<'a> Parser<'a> { } fn parse_field_name(&mut self) -> PResult<'a, Ident> { - if let token::Literal(token::Integer(name), suffix) = self.token { + if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = self.token { self.expect_no_suffix(self.span, "a tuple index", suffix); self.bump(); - Ok(Ident::new(name, self.prev_span)) + Ok(Ident::new(symbol, self.prev_span)) } else { self.parse_ident_common(false) } @@ -3045,19 +3047,19 @@ impl<'a> Parser<'a> { token::Ident(..) => { e = self.parse_dot_suffix(e, lo)?; } - token::Literal(token::Integer(name), suffix) => { + token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { let span = self.span; self.bump(); - let field = ExprKind::Field(e, Ident::new(name, span)); + let field = ExprKind::Field(e, Ident::new(symbol, span)); e = self.mk_expr(lo.to(span), field, ThinVec::new()); self.expect_no_suffix(span, "a tuple index", suffix); } - token::Literal(token::Float(n), _suf) => { + token::Literal(token::Lit { kind: token::Float, symbol, .. }) => { self.bump(); - let fstr = n.as_str(); - let mut err = self.diagnostic() - .struct_span_err(self.prev_span, &format!("unexpected token: `{}`", n)); + let fstr = symbol.as_str(); + let msg = format!("unexpected token: `{}`", symbol); + let mut err = self.diagnostic().struct_span_err(self.prev_span, &msg); err.span_label(self.prev_span, "unexpected token"); if fstr.chars().all(|x| "0123456789.".contains(x)) { let float = match fstr.parse::().ok() { @@ -7557,11 +7559,12 @@ impl<'a> Parser<'a> { /// the `extern` keyword, if one is found. fn parse_opt_abi(&mut self) -> PResult<'a, Option> { match self.token { - token::Literal(token::Str_(s), suf) | token::Literal(token::StrRaw(s, _), suf) => { + token::Literal(token::Lit { kind: token::Str, symbol, suffix }) | + token::Literal(token::Lit { kind: token::StrRaw(..), symbol, suffix }) => { let sp = self.span; - self.expect_no_suffix(sp, "an ABI spec", suf); + self.expect_no_suffix(sp, "an ABI spec", suffix); self.bump(); - match abi::lookup(&s.as_str()) { + match abi::lookup(&symbol.as_str()) { Some(abi) => Ok(Some(abi)), None => { let prev_span = self.prev_span; @@ -7570,7 +7573,7 @@ impl<'a> Parser<'a> { prev_span, E0703, "invalid ABI: found `{}`", - s); + symbol); err.span_label(prev_span, "invalid ABI"); err.help(&format!("valid ABIs: {}", abi::all_names().join(", "))); err.emit(); @@ -8370,8 +8373,10 @@ impl<'a> Parser<'a> { pub fn parse_optional_str(&mut self) -> Option<(Symbol, ast::StrStyle, Option)> { let ret = match self.token { - token::Literal(token::Str_(s), suf) => (s, ast::StrStyle::Cooked, suf), - token::Literal(token::StrRaw(s, n), suf) => (s, ast::StrStyle::Raw(n), suf), + token::Literal(token::Lit { kind: token::Str, symbol, suffix }) => + (symbol, ast::StrStyle::Cooked, suffix), + token::Literal(token::Lit { kind: token::StrRaw(n), symbol, suffix }) => + (symbol, ast::StrStyle::Raw(n), suffix), _ => return None }; self.bump(); diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index fbc27d1999d86..4711a156ab15b 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -1,7 +1,7 @@ pub use BinOpToken::*; pub use Nonterminal::*; pub use DelimToken::*; -pub use Lit::*; +pub use LitKind::*; pub use Token::*; use crate::ast::{self}; @@ -59,59 +59,62 @@ impl DelimToken { } } -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] -pub enum Lit { - Bool(ast::Name), // AST only, must never appear in a `Token` - Byte(ast::Name), - Char(ast::Name), - Err(ast::Name), - Integer(ast::Name), - Float(ast::Name), - Str_(ast::Name), - StrRaw(ast::Name, u16), /* raw str delimited by n hash symbols */ - ByteStr(ast::Name), - ByteStrRaw(ast::Name, u16), /* raw byte str delimited by n hash symbols */ +#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)] +pub enum LitKind { + Bool, // AST only, must never appear in a `Token` + Byte, + Char, + Integer, + Float, + Str, + StrRaw(u16), // raw string delimited by `n` hash symbols + ByteStr, + ByteStrRaw(u16), // raw byte string delimited by `n` hash symbols + Err, } -#[cfg(target_arch = "x86_64")] -static_assert_size!(Lit, 8); - -impl Lit { - crate fn symbol(&self) -> Symbol { - match *self { - Bool(s) | Byte(s) | Char(s) | Integer(s) | Float(s) | Err(s) | - Str_(s) | StrRaw(s, _) | ByteStr(s) | ByteStrRaw(s, _) => s - } - } +#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)] +pub struct Lit { + pub kind: LitKind, + pub symbol: Symbol, + pub suffix: Option, +} - crate fn article(&self) -> &'static str { - match *self { - Integer(_) | Err(_) => "an", +impl LitKind { + crate fn article(self) -> &'static str { + match self { + Integer | Err => "an", _ => "a", } } - crate fn descr(&self) -> &'static str { - match *self { - Bool(_) => panic!("literal token contains `Lit::Bool`"), - Byte(_) => "byte literal", - Char(_) => "char literal", - Err(_) => "invalid literal", - Integer(_) => "integer literal", - Float(_) => "float literal", - Str_(_) | StrRaw(..) => "string literal", - ByteStr(_) | ByteStrRaw(..) => "byte string literal" + crate fn descr(self) -> &'static str { + match self { + Bool => panic!("literal token contains `Lit::Bool`"), + Byte => "byte literal", + Char => "char literal", + Integer => "integer literal", + Float => "float literal", + Str | StrRaw(..) => "string literal", + ByteStr | ByteStrRaw(..) => "byte string literal", + Err => "invalid literal", } } - crate fn may_have_suffix(&self) -> bool { - match *self { - Integer(..) | Float(..) | Err(..) => true, + crate fn may_have_suffix(self) -> bool { + match self { + Integer | Float | Err => true, _ => false, } } } +impl Lit { + pub fn new(kind: LitKind, symbol: Symbol, suffix: Option) -> Lit { + Lit { kind, symbol, suffix } + } +} + pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool { let ident_token: Token = Ident(ident, is_raw); @@ -201,7 +204,7 @@ pub enum Token { CloseDelim(DelimToken), /* Literals */ - Literal(Lit, Option), + Literal(Lit), /* Name components */ Ident(ast::Ident, /* is_raw */ bool), @@ -318,6 +321,10 @@ impl Token { self == &Question || self == &OpenDelim(Paren) } + pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option) -> Token { + Literal(Lit::new(kind, symbol, suffix)) + } + /// Returns `true` if the token is any literal crate fn is_lit(&self) -> bool { match *self { @@ -326,9 +333,9 @@ impl Token { } } - crate fn expect_lit(&self) -> (Lit, Option) { + crate fn expect_lit(&self) -> Lit { match *self { - Literal(lit, suf) => (lit, suf), + Literal(lit) => lit, _=> panic!("`expect_lit` called on non-literal"), } } @@ -579,13 +586,13 @@ impl Token { (&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b, + (&Literal(a), &Literal(b)) => a == b, + (&Lifetime(a), &Lifetime(b)) => a.name == b.name, (&Ident(a, b), &Ident(c, d)) => b == d && (a.name == c.name || a.name == kw::DollarCrate || c.name == kw::DollarCrate), - (&Literal(a, b), &Literal(c, d)) => b == d && a == c, - (&Interpolated(_), &Interpolated(_)) => false, _ => panic!("forgot to add a token?"), diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 2e072522d24a4..67f57a7ed0052 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -163,22 +163,22 @@ fn binop_to_string(op: BinOpToken) -> &'static str { } } -pub fn literal_to_string(lit: token::Lit, suffix: Option) -> String { - let mut out = match lit { - token::Byte(b) => format!("b'{}'", b), - token::Char(c) => format!("'{}'", c), - token::Err(c) => format!("'{}'", c), - token::Bool(c) | - token::Float(c) | - token::Integer(c) => c.to_string(), - token::Str_(s) => format!("\"{}\"", s), - token::StrRaw(s, n) => format!("r{delim}\"{string}\"{delim}", - delim="#".repeat(n as usize), - string=s), - token::ByteStr(v) => format!("b\"{}\"", v), - token::ByteStrRaw(s, n) => format!("br{delim}\"{string}\"{delim}", - delim="#".repeat(n as usize), - string=s), +pub fn literal_to_string(token::Lit { kind, symbol, suffix }: token::Lit) -> String { + let mut out = match kind { + token::Byte => format!("b'{}'", symbol), + token::Char => format!("'{}'", symbol), + token::Bool | + token::Float | + token::Integer => symbol.to_string(), + token::Str => format!("\"{}\"", symbol), + token::StrRaw(n) => format!("r{delim}\"{string}\"{delim}", + delim="#".repeat(n as usize), + string=symbol), + token::ByteStr => format!("b\"{}\"", symbol), + token::ByteStrRaw(n) => format!("br{delim}\"{string}\"{delim}", + delim="#".repeat(n as usize), + string=symbol), + token::Err => format!("'{}'", symbol), }; if let Some(suffix) = suffix { @@ -231,7 +231,7 @@ pub fn token_to_string(tok: &Token) -> String { token::SingleQuote => "'".to_string(), /* Literals */ - token::Literal(lit, suf) => literal_to_string(lit, suf), + token::Literal(lit) => literal_to_string(lit), /* Name components */ token::Ident(s, false) => s.to_string(), @@ -571,7 +571,7 @@ pub trait PrintState<'a> { fn print_literal(&mut self, lit: &ast::Lit) -> io::Result<()> { self.maybe_print_comment(lit.span.lo())?; - self.writer().word(literal_to_string(lit.token, lit.suffix)) + self.writer().word(literal_to_string(lit.token)) } fn print_string(&mut self, st: &str, diff --git a/src/libsyntax_ext/assert.rs b/src/libsyntax_ext/assert.rs index cd69733571d5a..a11cd9c6f761d 100644 --- a/src/libsyntax_ext/assert.rs +++ b/src/libsyntax_ext/assert.rs @@ -4,7 +4,7 @@ use syntax::ast::{self, *}; use syntax::source_map::Spanned; use syntax::ext::base::*; use syntax::ext::build::AstBuilder; -use syntax::parse::token; +use syntax::parse::token::{self, Token}; use syntax::parse::parser::Parser; use syntax::print::pprust; use syntax::ptr::P; @@ -31,13 +31,10 @@ pub fn expand_assert<'cx>( tts: custom_message.unwrap_or_else(|| { TokenStream::from(TokenTree::Token( DUMMY_SP, - token::Literal( - token::Lit::Str_(Name::intern(&format!( - "assertion failed: {}", - pprust::expr_to_string(&cond_expr).escape_debug() - ))), - None, - ), + Token::lit(token::Str, Symbol::intern(&format!( + "assertion failed: {}", + pprust::expr_to_string(&cond_expr).escape_debug() + )), None), )) }).into(), delim: MacDelimiter::Parenthesis, @@ -106,7 +103,7 @@ fn parse_assert<'a>( // // Parse this as an actual message, and suggest inserting a comma. Eventually, this should be // turned into an error. - let custom_message = if let token::Literal(token::Lit::Str_(_), _) = parser.token { + let custom_message = if let token::Literal(token::Lit { kind: token::Str, .. }) = parser.token { let mut err = cx.struct_span_warn(parser.span, "unexpected string literal"); let comma_span = cx.source_map().next_point(parser.prev_span); err.span_suggestion_short( diff --git a/src/libsyntax_ext/proc_macro_server.rs b/src/libsyntax_ext/proc_macro_server.rs index 69e83f5a84ced..a9bc5fe357d60 100644 --- a/src/libsyntax_ext/proc_macro_server.rs +++ b/src/libsyntax_ext/proc_macro_server.rs @@ -150,7 +150,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec)> stack.push(tt!(Ident::new(ident.name, false))); tt!(Punct::new('\'', true)) } - Literal(lit, suffix) => tt!(Literal { lit, suffix }), + Literal(lit) => tt!(Literal { lit }), DocComment(c) => { let style = comments::doc_comment_style(&c.as_str()); let stripped = comments::strip_doc_comment_decoration(&c.as_str()); @@ -161,7 +161,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec)> let stream = vec![ Ident(ast::Ident::new(Symbol::intern("doc"), span), false), Eq, - Literal(Lit::Str_(Symbol::intern(&escaped)), None), + Token::lit(token::Str, Symbol::intern(&escaped), None), ] .into_iter() .map(|token| tokenstream::TokenTree::Token(span, token)) @@ -215,31 +215,29 @@ impl ToInternal for TokenTree { return tokenstream::TokenTree::Token(span, token).into(); } TokenTree::Literal(self::Literal { - lit: Lit::Integer(ref a), - suffix, + lit: token::Lit { kind: token::Integer, symbol, suffix }, span, - }) if a.as_str().starts_with("-") => { + }) if symbol.as_str().starts_with("-") => { let minus = BinOp(BinOpToken::Minus); - let integer = Symbol::intern(&a.as_str()[1..]); - let integer = Literal(Lit::Integer(integer), suffix); + let symbol = Symbol::intern(&symbol.as_str()[1..]); + let integer = Token::lit(token::Integer, symbol, suffix); let a = tokenstream::TokenTree::Token(span, minus); let b = tokenstream::TokenTree::Token(span, integer); return vec![a, b].into_iter().collect(); } TokenTree::Literal(self::Literal { - lit: Lit::Float(ref a), - suffix, + lit: token::Lit { kind: token::Float, symbol, suffix }, span, - }) if a.as_str().starts_with("-") => { + }) if symbol.as_str().starts_with("-") => { let minus = BinOp(BinOpToken::Minus); - let float = Symbol::intern(&a.as_str()[1..]); - let float = Literal(Lit::Float(float), suffix); + let symbol = Symbol::intern(&symbol.as_str()[1..]); + let float = Token::lit(token::Float, symbol, suffix); let a = tokenstream::TokenTree::Token(span, minus); let b = tokenstream::TokenTree::Token(span, float); return vec![a, b].into_iter().collect(); } - TokenTree::Literal(self::Literal { lit, suffix, span }) => { - return tokenstream::TokenTree::Token(span, Literal(lit, suffix)).into() + TokenTree::Literal(self::Literal { lit, span }) => { + return tokenstream::TokenTree::Token(span, Literal(lit)).into() } }; @@ -355,7 +353,6 @@ impl Ident { #[derive(Clone, Debug)] pub struct Literal { lit: token::Lit, - suffix: Option, span: Span, } @@ -381,6 +378,13 @@ impl<'a> Rustc<'a> { call_site: to_span(Transparency::Transparent), } } + + pub fn lit(&mut self, kind: token::LitKind, symbol: Symbol, suffix: Option) -> Literal { + Literal { + lit: token::Lit::new(kind, symbol, suffix), + span: server::Span::call_site(self), + } + } } impl server::Types for Rustc<'_> { @@ -536,59 +540,31 @@ impl server::Literal for Rustc<'_> { format!("{:?}", literal) } fn integer(&mut self, n: &str) -> Self::Literal { - Literal { - lit: token::Lit::Integer(Symbol::intern(n)), - suffix: None, - span: server::Span::call_site(self), - } + self.lit(token::Integer, Symbol::intern(n), None) } fn typed_integer(&mut self, n: &str, kind: &str) -> Self::Literal { - Literal { - lit: token::Lit::Integer(Symbol::intern(n)), - suffix: Some(Symbol::intern(kind)), - span: server::Span::call_site(self), - } + self.lit(token::Integer, Symbol::intern(n), Some(Symbol::intern(kind))) } fn float(&mut self, n: &str) -> Self::Literal { - Literal { - lit: token::Lit::Float(Symbol::intern(n)), - suffix: None, - span: server::Span::call_site(self), - } + self.lit(token::Float, Symbol::intern(n), None) } fn f32(&mut self, n: &str) -> Self::Literal { - Literal { - lit: token::Lit::Float(Symbol::intern(n)), - suffix: Some(Symbol::intern("f32")), - span: server::Span::call_site(self), - } + self.lit(token::Float, Symbol::intern(n), Some(Symbol::intern("f32"))) } fn f64(&mut self, n: &str) -> Self::Literal { - Literal { - lit: token::Lit::Float(Symbol::intern(n)), - suffix: Some(Symbol::intern("f64")), - span: server::Span::call_site(self), - } + self.lit(token::Float, Symbol::intern(n), Some(Symbol::intern("f64"))) } fn string(&mut self, string: &str) -> Self::Literal { let mut escaped = String::new(); for ch in string.chars() { escaped.extend(ch.escape_debug()); } - Literal { - lit: token::Lit::Str_(Symbol::intern(&escaped)), - suffix: None, - span: server::Span::call_site(self), - } + self.lit(token::Str, Symbol::intern(&escaped), None) } fn character(&mut self, ch: char) -> Self::Literal { let mut escaped = String::new(); escaped.extend(ch.escape_unicode()); - Literal { - lit: token::Lit::Char(Symbol::intern(&escaped)), - suffix: None, - span: server::Span::call_site(self), - } + self.lit(token::Char, Symbol::intern(&escaped), None) } fn byte_string(&mut self, bytes: &[u8]) -> Self::Literal { let string = bytes @@ -597,11 +573,7 @@ impl server::Literal for Rustc<'_> { .flat_map(ascii::escape_default) .map(Into::::into) .collect::(); - Literal { - lit: token::Lit::ByteStr(Symbol::intern(&string)), - suffix: None, - span: server::Span::call_site(self), - } + self.lit(token::ByteStr, Symbol::intern(&string), None) } fn span(&mut self, literal: &Self::Literal) -> Self::Span { literal.span From 90d15e770419fb4ae8e120909baafc35ef243947 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 19 May 2019 19:56:45 +0300 Subject: [PATCH 6/6] syntax: Some code cleanup --- src/libsyntax/parse/literal.rs | 350 ++++++++---------- src/libsyntax/parse/token.rs | 16 +- src/libsyntax/print/pprust.rs | 3 +- src/libsyntax_ext/proc_macro_server.rs | 2 +- src/libsyntax_pos/symbol.rs | 2 + .../ui/parser/lex-bad-numeric-literals.rs | 6 +- .../ui/parser/lex-bad-numeric-literals.stderr | 20 +- src/test/ui/parser/no-hex-float-literal.rs | 2 +- .../ui/parser/no-hex-float-literal.stderr | 4 +- 9 files changed, 177 insertions(+), 228 deletions(-) diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs index 7554c7119932e..0305b1f59b946 100644 --- a/src/libsyntax/parse/literal.rs +++ b/src/libsyntax/parse/literal.rs @@ -6,7 +6,7 @@ use crate::parse::PResult; use crate::parse::token::{self, Token}; use crate::parse::unescape::{unescape_str, unescape_char, unescape_byte_str, unescape_byte}; use crate::print::pprust; -use crate::symbol::{kw, Symbol}; +use crate::symbol::{kw, sym, Symbol}; use crate::tokenstream::{TokenStream, TokenTree}; use errors::{Applicability, Handler}; @@ -22,22 +22,23 @@ crate enum LitError { InvalidSuffix, InvalidIntSuffix, InvalidFloatSuffix, - NonDecimalFloat(&'static str), + NonDecimalFloat(u32), IntTooLarge, } impl LitError { - crate fn report( - &self, - diag: &Handler, - token::Lit { kind, suffix, .. }: token::Lit, - span: Span, - ) { + fn report(&self, diag: &Handler, lit: token::Lit, span: Span) { + let token::Lit { kind, suffix, .. } = lit; match *self { - LitError::NotLiteral | LitError::LexerError => {} + // `NotLiteral` is not an error by itself, so we don't report + // it and give the parser opportunity to try something else. + LitError::NotLiteral => {} + // `LexerError` *is* an error, but it was already reported + // by lexer, so here we don't report it the second time. + LitError::LexerError => {} LitError::InvalidSuffix => { expect_no_suffix( - diag, span, &format!("{} {}", kind.article(), kind.descr()), suffix + diag, span, &format!("{} {} literal", kind.article(), kind.descr()), suffix ); } LitError::InvalidIntSuffix => { @@ -72,7 +73,13 @@ impl LitError { .emit(); } } - LitError::NonDecimalFloat(descr) => { + LitError::NonDecimalFloat(base) => { + let descr = match base { + 16 => "hexadecimal", + 8 => "octal", + 2 => "binary", + _ => unreachable!(), + }; diag.struct_span_err(span, &format!("{} float literal is not supported", descr)) .span_label(span, "not supported") .emit(); @@ -86,13 +93,9 @@ impl LitError { } impl LitKind { - /// Converts literal token with a suffix into a semantic literal. - /// Works speculatively and may return `None` if diagnostic handler is not passed. - /// If diagnostic handler is passed, always returns `Some`, - /// possibly after reporting non-fatal errors and recovery. - fn from_lit_token( - token::Lit { kind, symbol, suffix }: token::Lit, - ) -> Result { + /// Converts literal token into a semantic literal. + fn from_lit_token(lit: token::Lit) -> Result { + let token::Lit { kind, symbol, suffix } = lit; if suffix.is_some() && !kind.may_have_suffix() { return Err(LitError::InvalidSuffix); } @@ -102,18 +105,10 @@ impl LitKind { assert!(symbol == kw::True || symbol == kw::False); LitKind::Bool(symbol == kw::True) } - token::Byte => { - match unescape_byte(&symbol.as_str()) { - Ok(c) => LitKind::Byte(c), - Err(_) => return Err(LitError::LexerError), - } - }, - token::Char => { - match unescape_char(&symbol.as_str()) { - Ok(c) => LitKind::Char(c), - Err(_) => return Err(LitError::LexerError), - } - }, + token::Byte => return unescape_byte(&symbol.as_str()) + .map(LitKind::Byte).map_err(|_| LitError::LexerError), + token::Char => return unescape_char(&symbol.as_str()) + .map(LitKind::Char).map_err(|_| LitError::LexerError), // There are some valid suffixes for integer and float literals, // so all the handling is done internally. @@ -125,53 +120,48 @@ impl LitKind { // reuse the symbol from the token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the token. - let mut error = None; - let mut sym = symbol; - let s = &sym.as_str(); - if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { + let s = symbol.as_str(); + let symbol = if s.contains(&['\\', '\r'][..]) { let mut buf = String::with_capacity(s.len()); - unescape_str(s, &mut |_, unescaped_char| { + let mut error = Ok(()); + unescape_str(&s, &mut |_, unescaped_char| { match unescaped_char { Ok(c) => buf.push(c), - Err(_) => error = Some(LitError::LexerError), + Err(_) => error = Err(LitError::LexerError), } }); - if let Some(error) = error { - return Err(error); - } - sym = Symbol::intern(&buf) - } - - LitKind::Str(sym, ast::StrStyle::Cooked) + error?; + Symbol::intern(&buf) + } else { + symbol + }; + LitKind::Str(symbol, ast::StrStyle::Cooked) } token::StrRaw(n) => { // Ditto. - let mut sym = symbol; - let s = &sym.as_str(); - if s.contains('\r') { - sym = Symbol::intern(&raw_str_lit(s)); - } - LitKind::Str(sym, ast::StrStyle::Raw(n)) + let s = symbol.as_str(); + let symbol = if s.contains('\r') { + Symbol::intern(&raw_str_lit(&s)) + } else { + symbol + }; + LitKind::Str(symbol, ast::StrStyle::Raw(n)) } token::ByteStr => { - let s = &symbol.as_str(); + let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = None; - unescape_byte_str(s, &mut |_, unescaped_byte| { + let mut error = Ok(()); + unescape_byte_str(&s, &mut |_, unescaped_byte| { match unescaped_byte { Ok(c) => buf.push(c), - Err(_) => error = Some(LitError::LexerError), + Err(_) => error = Err(LitError::LexerError), } }); - if let Some(error) = error { - return Err(error); - } + error?; buf.shrink_to_fit(); LitKind::ByteStr(Lrc::new(buf)) } - token::ByteStrRaw(_) => { - LitKind::ByteStr(Lrc::new(symbol.to_string().into_bytes())) - } + token::ByteStrRaw(_) => LitKind::ByteStr(Lrc::new(symbol.to_string().into_bytes())), token::Err => LitKind::Err(symbol), }) } @@ -229,21 +219,13 @@ impl LitKind { } impl Lit { - fn from_lit_token( - token: token::Lit, - span: Span, - ) -> Result { + /// Converts literal token into an AST literal. + fn from_lit_token(token: token::Lit, span: Span) -> Result { Ok(Lit { token, node: LitKind::from_lit_token(token)?, span }) } - /// Converts literal token with a suffix into an AST literal. - /// Works speculatively and may return `None` if diagnostic handler is not passed. - /// If diagnostic handler is passed, may return `Some`, - /// possibly after reporting non-fatal errors and recovery, or `None` for irrecoverable errors. - crate fn from_token( - token: &token::Token, - span: Span, - ) -> Result { + /// Converts arbitrary token into an AST literal. + crate fn from_token(token: &Token, span: Span) -> Result { let lit = match *token { token::Ident(ident, false) if ident.name == kw::True || ident.name == kw::False => token::Lit::new(token::Bool, ident.name, None), @@ -298,6 +280,7 @@ impl<'a> Parser<'a> { None }); if let Some((ref token, span)) = recovered { + self.bump(); self.diagnostic() .struct_span_err(span, "float literals must have an integer part") .span_suggestion( @@ -307,7 +290,6 @@ impl<'a> Parser<'a> { Applicability::MachineApplicable, ) .emit(); - self.bump(); } } @@ -317,64 +299,59 @@ impl<'a> Parser<'a> { match Lit::from_token(token, span) { Ok(lit) => { self.bump(); - return Ok(lit); + Ok(lit) } Err(LitError::NotLiteral) => { let msg = format!("unexpected token: {}", self.this_token_descr()); - return Err(self.span_fatal(span, &msg)); + Err(self.span_fatal(span, &msg)) } Err(err) => { let lit = token.expect_lit(); self.bump(); err.report(&self.sess.span_diagnostic, lit, span); let lit = token::Lit::new(token::Err, lit.symbol, lit.suffix); - return Ok(Lit::from_lit_token(lit, span).ok().unwrap()); + Lit::from_lit_token(lit, span).map_err(|_| unreachable!()) } } } } -crate fn expect_no_suffix(diag: &Handler, sp: Span, kind: &str, suffix: Option) { - match suffix { - None => {/* everything ok */} - Some(suf) => { - let text = suf.as_str(); - let mut err = if kind == "a tuple index" && - ["i32", "u32", "isize", "usize"].contains(&text.to_string().as_str()) - { - // #59553: warn instead of reject out of hand to allow the fix to percolate - // through the ecosystem when people fix their macros - let mut err = diag.struct_span_warn( - sp, - &format!("suffixes on {} are invalid", kind), - ); - err.note(&format!( - "`{}` is *temporarily* accepted on tuple index fields as it was \ - incorrectly accepted on stable for a few releases", - text, - )); - err.help( - "on proc macros, you'll want to use `syn::Index::from` or \ - `proc_macro::Literal::*_unsuffixed` for code that will desugar \ - to tuple field access", - ); - err.note( - "for more context, see https://github.com/rust-lang/rust/issues/60210", - ); - err - } else { - diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) - }; - err.span_label(sp, format!("invalid suffix `{}`", text)); - err.emit(); - } +crate fn expect_no_suffix(diag: &Handler, sp: Span, kind: &str, suffix: Option) { + if let Some(suf) = suffix { + let mut err = if kind == "a tuple index" && + [sym::i32, sym::u32, sym::isize, sym::usize].contains(&suf) { + // #59553: warn instead of reject out of hand to allow the fix to percolate + // through the ecosystem when people fix their macros + let mut err = diag.struct_span_warn( + sp, + &format!("suffixes on {} are invalid", kind), + ); + err.note(&format!( + "`{}` is *temporarily* accepted on tuple index fields as it was \ + incorrectly accepted on stable for a few releases", + suf, + )); + err.help( + "on proc macros, you'll want to use `syn::Index::from` or \ + `proc_macro::Literal::*_unsuffixed` for code that will desugar \ + to tuple field access", + ); + err.note( + "for more context, see https://github.com/rust-lang/rust/issues/60210", + ); + err + } else { + diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) + }; + err.span_label(sp, format!("invalid suffix `{}`", suf)); + err.emit(); } } /// Parses a string representing a raw string literal into its final form. The /// only operation this does is convert embedded CRLF into a single LF. fn raw_str_lit(lit: &str) -> String { - debug!("raw_str_lit: given {}", lit.escape_default()); + debug!("raw_str_lit: {:?}", lit); let mut res = String::with_capacity(lit.len()); let mut chars = lit.chars().peekable(); @@ -399,117 +376,82 @@ fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) } -fn filtered_float_lit(data: Symbol, suffix: Option) -> Result { - debug!("filtered_float_lit: {}, {:?}", data, suffix); - let suffix = match suffix { - Some(suffix) => suffix, - None => return Ok(LitKind::FloatUnsuffixed(data)), - }; +fn strip_underscores(symbol: Symbol) -> Symbol { + // Do not allocate a new string unless necessary. + let s = symbol.as_str(); + if s.contains('_') { + let mut s = s.to_string(); + s.retain(|c| c != '_'); + return Symbol::intern(&s); + } + symbol +} - Ok(match &*suffix.as_str() { - "f32" => LitKind::Float(data, ast::FloatTy::F32), - "f64" => LitKind::Float(data, ast::FloatTy::F64), - _ => return Err(LitError::InvalidFloatSuffix), +fn filtered_float_lit(symbol: Symbol, suffix: Option, base: u32) + -> Result { + debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base); + if base != 10 { + return Err(LitError::NonDecimalFloat(base)); + } + Ok(match suffix { + Some(suf) => match suf { + sym::f32 => LitKind::Float(symbol, ast::FloatTy::F32), + sym::f64 => LitKind::Float(symbol, ast::FloatTy::F64), + _ => return Err(LitError::InvalidFloatSuffix), + } + None => LitKind::FloatUnsuffixed(symbol) }) } -fn float_lit(s: Symbol, suffix: Option) -> Result { - debug!("float_lit: {:?}, {:?}", s, suffix); - // FIXME #2252: bounds checking float literals is deferred until trans - - // Strip underscores without allocating a new String unless necessary. - let s2; - let s = s.as_str(); - let s = s.get(); - let s = if s.chars().any(|c| c == '_') { - s2 = s.chars().filter(|&c| c != '_').collect::(); - &s2 - } else { - s - }; - - filtered_float_lit(Symbol::intern(s), suffix) +fn float_lit(symbol: Symbol, suffix: Option) -> Result { + debug!("float_lit: {:?}, {:?}", symbol, suffix); + filtered_float_lit(strip_underscores(symbol), suffix, 10) } -fn integer_lit(s: Symbol, suffix: Option) -> Result { - // s can only be ascii, byte indexing is fine - - // Strip underscores without allocating a new String unless necessary. - let s2; - let orig = s; - let s = s.as_str(); - let s = s.get(); - let mut s = if s.chars().any(|c| c == '_') { - s2 = s.chars().filter(|&c| c != '_').collect::(); - &s2 - } else { - s - }; - - debug!("integer_lit: {}, {:?}", s, suffix); +fn integer_lit(symbol: Symbol, suffix: Option) -> Result { + debug!("integer_lit: {:?}, {:?}", symbol, suffix); + let symbol = strip_underscores(symbol); + let s = symbol.as_str(); let mut base = 10; - let mut ty = ast::LitIntType::Unsuffixed; - - if s.starts_with('0') && s.len() > 1 { + if s.len() > 1 && s.as_bytes()[0] == b'0' { match s.as_bytes()[1] { b'x' => base = 16, b'o' => base = 8, b'b' => base = 2, - _ => { } - } - } - - // 1f64 and 2f32 etc. are valid float literals. - if let Some(suf) = suffix { - if looks_like_width_suffix(&['f'], &suf.as_str()) { - let err = match base { - 16 => Some(LitError::NonDecimalFloat("hexadecimal")), - 8 => Some(LitError::NonDecimalFloat("octal")), - 2 => Some(LitError::NonDecimalFloat("binary")), - _ => None, - }; - if let Some(err) = err { - return Err(err); - } - return filtered_float_lit(Symbol::intern(s), Some(suf)) + _ => {} } } - if base != 10 { - s = &s[2..]; - } - - if let Some(suf) = suffix { - ty = match &*suf.as_str() { - "isize" => ast::LitIntType::Signed(ast::IntTy::Isize), - "i8" => ast::LitIntType::Signed(ast::IntTy::I8), - "i16" => ast::LitIntType::Signed(ast::IntTy::I16), - "i32" => ast::LitIntType::Signed(ast::IntTy::I32), - "i64" => ast::LitIntType::Signed(ast::IntTy::I64), - "i128" => ast::LitIntType::Signed(ast::IntTy::I128), - "usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize), - "u8" => ast::LitIntType::Unsigned(ast::UintTy::U8), - "u16" => ast::LitIntType::Unsigned(ast::UintTy::U16), - "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32), - "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64), - "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128), + let ty = match suffix { + Some(suf) => match suf { + sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize), + sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8), + sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16), + sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32), + sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64), + sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128), + sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize), + sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8), + sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16), + sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32), + sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64), + sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128), + // `1f64` and `2f32` etc. are valid float literals, and + // `fxxx` looks more like an invalid float literal than invalid integer literal. + _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base), _ => return Err(LitError::InvalidIntSuffix), } - } - - debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \ - string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); + _ => ast::LitIntType::Unsuffixed + }; - Ok(match u128::from_str_radix(s, base) { - Ok(r) => LitKind::Int(r, ty), - Err(_) => { - // Small bases are lexed as if they were base 10, e.g, the string - // might be `0b10201`. This will cause the conversion above to fail, - // but these kinds of errors are already reported by the lexer. - let from_lexer = - base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); - return Err(if from_lexer { LitError::LexerError } else { LitError::IntTooLarge }); - } + let s = &s[if base != 10 { 2 } else { 0 } ..]; + u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| { + // Small bases are lexed as if they were base 10, e.g, the string + // might be `0b10201`. This will cause the conversion above to fail, + // but these kinds of errors are already reported by the lexer. + let from_lexer = + base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); + if from_lexer { LitError::LexerError } else { LitError::IntTooLarge } }) } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 4711a156ab15b..e5361b2db4e9e 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -73,6 +73,7 @@ pub enum LitKind { Err, } +/// A literal token. #[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)] pub struct Lit { pub kind: LitKind, @@ -81,6 +82,7 @@ pub struct Lit { } impl LitKind { + /// An English article for the literal token kind. crate fn article(self) -> &'static str { match self { Integer | Err => "an", @@ -91,13 +93,13 @@ impl LitKind { crate fn descr(self) -> &'static str { match self { Bool => panic!("literal token contains `Lit::Bool`"), - Byte => "byte literal", - Char => "char literal", - Integer => "integer literal", - Float => "float literal", - Str | StrRaw(..) => "string literal", - ByteStr | ByteStrRaw(..) => "byte string literal", - Err => "invalid literal", + Byte => "byte", + Char => "char", + Integer => "integer", + Float => "float", + Str | StrRaw(..) => "string", + ByteStr | ByteStrRaw(..) => "byte string", + Err => "error", } } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 67f57a7ed0052..88a5033f3b55f 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -163,7 +163,8 @@ fn binop_to_string(op: BinOpToken) -> &'static str { } } -pub fn literal_to_string(token::Lit { kind, symbol, suffix }: token::Lit) -> String { +pub fn literal_to_string(lit: token::Lit) -> String { + let token::Lit { kind, symbol, suffix } = lit; let mut out = match kind { token::Byte => format!("b'{}'", symbol), token::Char => format!("'{}'", symbol), diff --git a/src/libsyntax_ext/proc_macro_server.rs b/src/libsyntax_ext/proc_macro_server.rs index a9bc5fe357d60..beac92894b77a 100644 --- a/src/libsyntax_ext/proc_macro_server.rs +++ b/src/libsyntax_ext/proc_macro_server.rs @@ -379,7 +379,7 @@ impl<'a> Rustc<'a> { } } - pub fn lit(&mut self, kind: token::LitKind, symbol: Symbol, suffix: Option) -> Literal { + fn lit(&mut self, kind: token::LitKind, symbol: Symbol, suffix: Option) -> Literal { Literal { lit: token::Lit::new(kind, symbol, suffix), span: server::Span::call_site(self), diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index f69b26e49d0c0..b1e1a056db4ad 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -246,6 +246,8 @@ symbols! { extern_prelude, extern_types, f16c_target_feature, + f32, + f64, feature, ffi_returns_twice, field_init_shorthand, diff --git a/src/test/ui/parser/lex-bad-numeric-literals.rs b/src/test/ui/parser/lex-bad-numeric-literals.rs index 440150da17005..67134c14cded3 100644 --- a/src/test/ui/parser/lex-bad-numeric-literals.rs +++ b/src/test/ui/parser/lex-bad-numeric-literals.rs @@ -13,8 +13,10 @@ fn main() { 0o; //~ ERROR: no valid digits 1e+; //~ ERROR: expected at least one digit in exponent 0x539.0; //~ ERROR: hexadecimal float literal is not supported - 9900000000000000000000000000999999999999999999999999999999; //~ ERROR: integer literal is too large - 9900000000000000000000000000999999999999999999999999999999; //~ ERROR: integer literal is too large + 9900000000000000000000000000999999999999999999999999999999; + //~^ ERROR: integer literal is too large + 9900000000000000000000000000999999999999999999999999999999; + //~^ ERROR: integer literal is too large 0x; //~ ERROR: no valid digits 0xu32; //~ ERROR: no valid digits 0ou32; //~ ERROR: no valid digits diff --git a/src/test/ui/parser/lex-bad-numeric-literals.stderr b/src/test/ui/parser/lex-bad-numeric-literals.stderr index 2058ea63ce4bd..6d6cd3cc17129 100644 --- a/src/test/ui/parser/lex-bad-numeric-literals.stderr +++ b/src/test/ui/parser/lex-bad-numeric-literals.stderr @@ -65,43 +65,43 @@ LL | 0x539.0; | ^^^^^^^ error: no valid digits found for number - --> $DIR/lex-bad-numeric-literals.rs:18:5 + --> $DIR/lex-bad-numeric-literals.rs:20:5 | LL | 0x; | ^^ error: no valid digits found for number - --> $DIR/lex-bad-numeric-literals.rs:19:5 + --> $DIR/lex-bad-numeric-literals.rs:21:5 | LL | 0xu32; | ^^ error: no valid digits found for number - --> $DIR/lex-bad-numeric-literals.rs:20:5 + --> $DIR/lex-bad-numeric-literals.rs:22:5 | LL | 0ou32; | ^^ error: no valid digits found for number - --> $DIR/lex-bad-numeric-literals.rs:21:5 + --> $DIR/lex-bad-numeric-literals.rs:23:5 | LL | 0bu32; | ^^ error: no valid digits found for number - --> $DIR/lex-bad-numeric-literals.rs:22:5 + --> $DIR/lex-bad-numeric-literals.rs:24:5 | LL | 0b; | ^^ error: octal float literal is not supported - --> $DIR/lex-bad-numeric-literals.rs:24:5 + --> $DIR/lex-bad-numeric-literals.rs:26:5 | LL | 0o123.456; | ^^^^^^^^^ error: binary float literal is not supported - --> $DIR/lex-bad-numeric-literals.rs:26:5 + --> $DIR/lex-bad-numeric-literals.rs:28:5 | LL | 0b111.101; | ^^^^^^^^^ @@ -119,19 +119,19 @@ LL | 9900000000000000000000000000999999999999999999999999999999; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: integer literal is too large - --> $DIR/lex-bad-numeric-literals.rs:17:5 + --> $DIR/lex-bad-numeric-literals.rs:18:5 | LL | 9900000000000000000000000000999999999999999999999999999999; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: octal float literal is not supported - --> $DIR/lex-bad-numeric-literals.rs:23:5 + --> $DIR/lex-bad-numeric-literals.rs:25:5 | LL | 0o123f64; | ^^^^^^^^ not supported error: binary float literal is not supported - --> $DIR/lex-bad-numeric-literals.rs:25:5 + --> $DIR/lex-bad-numeric-literals.rs:27:5 | LL | 0b101f64; | ^^^^^^^^ not supported diff --git a/src/test/ui/parser/no-hex-float-literal.rs b/src/test/ui/parser/no-hex-float-literal.rs index 88574ba6f922c..bf11dee08338e 100644 --- a/src/test/ui/parser/no-hex-float-literal.rs +++ b/src/test/ui/parser/no-hex-float-literal.rs @@ -4,6 +4,6 @@ fn main() { 0x567.89; //~^ ERROR hexadecimal float literal is not supported 0xDEAD.BEEFp-2f; - //~^ ERROR invalid suffix `f` for integer literal + //~^ ERROR invalid suffix `f` for float literal //~| ERROR `{integer}` is a primitive type and therefore doesn't have fields } diff --git a/src/test/ui/parser/no-hex-float-literal.stderr b/src/test/ui/parser/no-hex-float-literal.stderr index 48041f34838cc..258ab06d5ee2f 100644 --- a/src/test/ui/parser/no-hex-float-literal.stderr +++ b/src/test/ui/parser/no-hex-float-literal.stderr @@ -4,13 +4,13 @@ error: hexadecimal float literal is not supported LL | 0x567.89; | ^^^^^^^^ -error: invalid suffix `f` for integer literal +error: invalid suffix `f` for float literal --> $DIR/no-hex-float-literal.rs:6:18 | LL | 0xDEAD.BEEFp-2f; | ^^ invalid suffix `f` | - = help: the suffix must be one of the integral types (`u32`, `isize`, etc) + = help: valid suffixes are `f32` and `f64` error[E0610]: `{integer}` is a primitive type and therefore doesn't have fields --> $DIR/no-hex-float-literal.rs:2:11