From d43aebc110e9b9e498c48031c9f69583c6bffdf3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 Mar 2015 09:38:25 +0100 Subject: [PATCH] Prevent ICEs when parsing invalid escapes, closes #23620 --- src/libsyntax/parse/lexer/mod.rs | 40 +++++++++++----- .../parse-fail/issue-23620-invalid-escapes.rs | 46 +++++++++++++++++++ src/test/parse-fail/new-unicode-escapes-4.rs | 5 +- 3 files changed, 79 insertions(+), 12 deletions(-) create mode 100644 src/test/parse-fail/issue-23620-invalid-escapes.rs diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 0843713681bbd..27a8b0cb318e0 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -742,6 +742,7 @@ impl<'a> StringReader<'a> { let start_bpos = self.last_pos; let mut accum_int = 0; + let mut valid = true; for _ in 0..n_digits { if self.is_eof() { let last_bpos = self.last_pos; @@ -750,6 +751,7 @@ impl<'a> StringReader<'a> { if self.curr_is(delim) { let last_bpos = self.last_pos; self.err_span_(start_bpos, last_bpos, "numeric character escape is too short"); + valid = false; break; } let c = self.curr.unwrap_or('\x00'); @@ -757,6 +759,8 @@ impl<'a> StringReader<'a> { accum_int += c.to_digit(16).unwrap_or_else(|| { self.err_span_char(self.last_pos, self.pos, "illegal character in numeric character escape", c); + + valid = false; 0 }); self.bump(); @@ -767,10 +771,11 @@ impl<'a> StringReader<'a> { self.last_pos, "this form of character escape may only be used \ with characters in the range [\\x00-\\x7f]"); + valid = false; } match char::from_u32(accum_int) { - Some(_) => true, + Some(_) => valid, None => { let last_bpos = self.last_pos; self.err_span_(start_bpos, last_bpos, "illegal numeric character escape"); @@ -799,7 +804,18 @@ impl<'a> StringReader<'a> { 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, 'x' => self.scan_byte_escape(delim, !ascii_only), 'u' if self.curr_is('{') => { - self.scan_unicode_escape(delim) + let valid = self.scan_unicode_escape(delim); + if valid && ascii_only { + self.err_span_( + escaped_pos, + self.last_pos, + "Unicode escape sequences cannot be used as byte or in \ + byte string." + ); + false + } else { + valid + } } '\n' if delim == '"' => { self.consume_whitespace(); @@ -869,6 +885,7 @@ impl<'a> StringReader<'a> { let start_bpos = self.last_pos; let mut count = 0; let mut accum_int = 0; + let mut valid = true; while !self.curr_is('}') && count <= 6 { let c = match self.curr { @@ -884,29 +901,30 @@ impl<'a> StringReader<'a> { self.fatal_span_(self.last_pos, self.pos, "unterminated unicode escape (needed a `}`)"); } else { - self.fatal_span_char(self.last_pos, self.pos, + self.err_span_char(self.last_pos, self.pos, "illegal character in unicode escape", c); } + valid = false; + 0 }); self.bump(); count += 1; } if count > 6 { - self.fatal_span_(start_bpos, self.last_pos, + self.err_span_(start_bpos, self.last_pos, "overlong unicode escape (can have at most 6 hex digits)"); + valid = false; } self.bump(); // past the ending } - let mut valid = count >= 1 && count <= 6; - if char::from_u32(accum_int).is_none() { - valid = false; + if valid && (char::from_u32(accum_int).is_none() || count == 0) { + self.err_span_(start_bpos, self.last_pos, "illegal unicode character escape"); + valid= false; } - if !valid { - self.fatal_span_(start_bpos, self.last_pos, "illegal unicode character escape"); - } + valid } @@ -1330,7 +1348,7 @@ impl<'a> StringReader<'a> { "unterminated byte constant".to_string()); } - let id = if valid { self.name_from(start) } else { token::intern("??") }; + let id = if valid { self.name_from(start) } else { token::intern("?") }; self.bump(); // advance curr past token return token::Byte(id); } diff --git a/src/test/parse-fail/issue-23620-invalid-escapes.rs b/src/test/parse-fail/issue-23620-invalid-escapes.rs new file mode 100644 index 0000000000000..fafb5f7db5fe8 --- /dev/null +++ b/src/test/parse-fail/issue-23620-invalid-escapes.rs @@ -0,0 +1,46 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + let _ = b"\u{a66e}"; + //~^ ERROR Unicode escape sequences cannot be used as byte or in byte string + + let _ = b'\u{a66e}'; + //~^ ERROR Unicode escape sequences cannot be used as byte or in byte string. + + let _ = b'\u'; + //~^ ERROR unknown byte escape: u + + let _ = b'\x5'; + //~^ ERROR numeric character escape is too short + + let _ = b'\xxy'; + //~^ ERROR illegal character in numeric character escape: x + //~^^ ERROR illegal character in numeric character escape: y + + let _ = '\x5'; + //~^ ERROR numeric character escape is too short + + let _ = '\xxy'; + //~^ ERROR illegal character in numeric character escape: x + //~^^ ERROR illegal character in numeric character escape: y + + let _ = b"\u{a4a4} \xf \u"; + //~^ ERROR Unicode escape sequences cannot be used as byte or in byte string. + //~^^ ERROR illegal character in numeric character escape: + //~^^^ ERROR unknown byte escape: u + + let _ = "\u{ffffff} \xf \u"; + //~^ ERROR illegal unicode character escape + //~^^ ERROR illegal character in numeric character escape: + //~^^^ ERROR this form of character escape may only be used with characters in the range [\x00-\x7f] + //~^^^^ ERROR unknown character escape: u + +} diff --git a/src/test/parse-fail/new-unicode-escapes-4.rs b/src/test/parse-fail/new-unicode-escapes-4.rs index ffc2b11e0c13c..96b86f1f5635f 100644 --- a/src/test/parse-fail/new-unicode-escapes-4.rs +++ b/src/test/parse-fail/new-unicode-escapes-4.rs @@ -9,5 +9,8 @@ // except according to those terms. pub fn main() { - let s = "\u{lol}"; //~ ERROR illegal character in unicode escape + let s = "\u{lol}"; + //~^ ERROR illegal character in unicode escape: l + //~^^ ERROR illegal character in unicode escape: o + //~^^^ ERROR illegal character in unicode escape: l }