Skip to content

Commit

Permalink
Rollup merge of rust-lang#63017 - matklad:no-fatal, r=petrochenkov
Browse files Browse the repository at this point in the history
Remove special code-path for handing unknown tokens

In `StringReader`, we have a buffer of fatal errors, which is used only in a single case: when we see something which is not a reasonable token at all, like `🦀`. I think a more straightforward thing to do here is to produce an explicit error token in this case, and let the next layer (the parser), deal with it.

However currently this leads to duplicated error messages. What should we do with this? Naively, I would think that emitting (just emitting, not raising) `FatalError` should stop other errors, but looks like this is not the case? We can also probably tweak parser on the case-by-case basis, to avoid emitting "expected" errors if the current token is an `Err`. I personally also fine with cascading errors in this case: it's quite unlikely that you actually type a fully invalid token.

@petrochenkov, which approach should we take to fight cascading errors?
  • Loading branch information
Centril authored Aug 6, 2019
2 parents 7750dfe + b3e8c8b commit e502615
Show file tree
Hide file tree
Showing 13 changed files with 223 additions and 116 deletions.
3 changes: 2 additions & 1 deletion src/librustc/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for token::TokenKind {
}

token::DocComment(val) |
token::Shebang(val) => val.hash_stable(hcx, hasher),
token::Shebang(val) |
token::Unknown(val) => val.hash_stable(hcx, hasher),
}
}
}
Expand Down
29 changes: 15 additions & 14 deletions src/librustdoc/html/highlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub fn render_with_highlighting(

let mut highlighted_source = vec![];
if classifier.write_source(&mut highlighted_source).is_err() {
Err(classifier.lexer.buffer_fatal_errors())
Err(())
} else {
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
}
Expand All @@ -59,14 +59,9 @@ pub fn render_with_highlighting(
}
write_footer(&mut out).unwrap();
}
Err(errors) => {
// If errors are encountered while trying to highlight, cancel the errors and just emit
// the unhighlighted source. The errors will have already been reported in the
// `check-code-block-syntax` pass.
for mut error in errors {
error.cancel();
}

Err(()) => {
// If errors are encountered while trying to highlight, just emit
// the unhighlighted source.
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
}
}
Expand Down Expand Up @@ -192,14 +187,20 @@ impl<'a> Classifier<'a> {
if let Some(token) = self.peek_token.take() {
return Ok(token);
}
self.lexer.try_next_token().map_err(|()| HighlightError::LexError)
let token = self.lexer.next_token();
if let token::Unknown(..) = &token.kind {
return Err(HighlightError::LexError);
}
Ok(token)
}

fn peek(&mut self) -> Result<&Token, HighlightError> {
if self.peek_token.is_none() {
self.peek_token = Some(
self.lexer.try_next_token().map_err(|()| HighlightError::LexError)?
);
let token = self.lexer.next_token();
if let token::Unknown(..) = &token.kind {
return Err(HighlightError::LexError);
}
self.peek_token = Some(token);
}
Ok(self.peek_token.as_ref().unwrap())
}
Expand Down Expand Up @@ -237,7 +238,7 @@ impl<'a> Classifier<'a> {
return Ok(());
},

token::Whitespace => Class::None,
token::Whitespace | token::Unknown(..) => Class::None,
token::Comment => Class::Comment,
token::DocComment(..) => Class::DocComment,

Expand Down
32 changes: 9 additions & 23 deletions src/librustdoc/passes/check_code_block_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,20 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
dox[code_block.code].to_owned(),
);

let errors = {
let has_errors = {
let mut has_errors = false;
let mut lexer = Lexer::new(&sess, source_file, None);
while let Ok(token::Token { kind, .. }) = lexer.try_next_token() {
if kind == token::Eof {
break;
loop {
match lexer.next_token().kind {
token::Eof => break,
token::Unknown(..) => has_errors = true,
_ => (),
}
}

let errors = lexer.buffer_fatal_errors();

if !errors.is_empty() {
Err(errors)
} else {
Ok(())
}
has_errors
};

if let Err(errors) = errors {
if has_errors {
let mut diag = if let Some(sp) =
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
{
Expand All @@ -58,11 +54,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
.sess()
.struct_span_warn(sp, "could not parse code block as Rust code");

for mut err in errors {
diag.note(&format!("error from rustc: {}", err.message()));
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
let sp = sp.from_inner(InnerSpan::new(0, 3));
diag.span_suggestion(
Expand All @@ -82,11 +73,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
"doc comment contains an invalid Rust code block",
);

for mut err in errors {
// Don't bother reporting the error, because we can't show where it happened.
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
diag.help("mark blocks that do not contain Rust code as text: ```text");
}
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/ext/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
}

OpenDelim(..) | CloseDelim(..) => unreachable!(),
Whitespace | Comment | Shebang(..) | Eof => unreachable!(),
Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => unreachable!(),
}
}
}
Expand Down
73 changes: 13 additions & 60 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::parse::token::{self, Token, TokenKind};
use crate::symbol::{sym, Symbol};
use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};

use errors::{FatalError, Diagnostic, DiagnosticBuilder};
use errors::{FatalError, DiagnosticBuilder};
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
use rustc_lexer::Base;
use rustc_lexer::unescape;
Expand Down Expand Up @@ -39,7 +39,6 @@ pub struct StringReader<'a> {
pos: BytePos,
/// Stop reading src at this index.
end_src_index: usize,
fatal_errs: Vec<DiagnosticBuilder<'a>>,
/// Source text to tokenize.
src: Lrc<String>,
override_span: Option<Span>,
Expand All @@ -62,7 +61,6 @@ impl<'a> StringReader<'a> {
pos: source_file.start_pos,
end_src_index: src.len(),
src,
fatal_errs: Vec::new(),
override_span,
}
}
Expand All @@ -89,29 +87,17 @@ impl<'a> StringReader<'a> {
self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION))
}

fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token {
match res {
Ok(tok) => tok,
Err(_) => {
self.emit_fatal_errors();
FatalError.raise();
}
}
}

/// Returns the next token, including trivia like whitespace or comments.
///
/// `Err(())` means that some errors were encountered, which can be
/// retrieved using `buffer_fatal_errors`.
pub fn try_next_token(&mut self) -> Result<Token, ()> {
assert!(self.fatal_errs.is_empty());

pub fn next_token(&mut self) -> Token {
let start_src_index = self.src_index(self.pos);
let text: &str = &self.src[start_src_index..self.end_src_index];

if text.is_empty() {
let span = self.mk_sp(self.pos, self.pos);
return Ok(Token::new(token::Eof, span));
return Token::new(token::Eof, span);
}

{
Expand All @@ -125,7 +111,7 @@ impl<'a> StringReader<'a> {
let kind = token::Shebang(sym);

let span = self.mk_sp(start, self.pos);
return Ok(Token::new(kind, span));
return Token::new(kind, span);
}
}
}
Expand All @@ -139,39 +125,10 @@ impl<'a> StringReader<'a> {

// This could use `?`, but that makes code significantly (10-20%) slower.
// https://github.com/rust-lang/rust/issues/37939
let kind = match self.cook_lexer_token(token.kind, start) {
Ok(it) => it,
Err(err) => return Err(self.fatal_errs.push(err)),
};
let kind = self.cook_lexer_token(token.kind, start);

let span = self.mk_sp(start, self.pos);
Ok(Token::new(kind, span))
}

/// Returns the next token, including trivia like whitespace or comments.
///
/// Aborts in case of an error.
pub fn next_token(&mut self) -> Token {
let res = self.try_next_token();
self.unwrap_or_abort(res)
}

fn emit_fatal_errors(&mut self) {
for err in &mut self.fatal_errs {
err.emit();
}

self.fatal_errs.clear();
}

pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
let mut buffer = Vec::new();

for err in self.fatal_errs.drain(..) {
err.buffer(&mut buffer);
}

buffer
Token::new(kind, span)
}

/// Report a fatal lexical error with a given span.
Expand Down Expand Up @@ -218,8 +175,8 @@ impl<'a> StringReader<'a> {
&self,
token: rustc_lexer::TokenKind,
start: BytePos,
) -> Result<TokenKind, DiagnosticBuilder<'a>> {
let kind = match token {
) -> TokenKind {
match token {
rustc_lexer::TokenKind::LineComment => {
let string = self.str_from(start);
// comments with only more "/"s are not doc comments
Expand Down Expand Up @@ -396,16 +353,12 @@ impl<'a> StringReader<'a> {
// this should be inside `rustc_lexer`. However, we should first remove compound
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
// as there will be less overall work to do this way.
return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
Some(token) => {
err.emit();
Ok(token)
}
None => Err(err),
}
let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
.unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
err.emit();
token
}
};
Ok(kind)
}
}

fn cook_lexer_literal(
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/parse/lexer/tokentrees.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ impl<'a> TokenTreesReader<'a> {
loop {
let token = self.string_reader.next_token();
match token.kind {
token::Whitespace | token::Comment | token::Shebang(_) => {
token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => {
self.joint_to_prev = NonJoint;
}
_ => {
Expand Down
4 changes: 3 additions & 1 deletion src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ pub enum TokenKind {
/// A comment.
Comment,
Shebang(ast::Name),
/// A completely invalid token which should be skipped.
Unknown(ast::Name),

Eof,
}
Expand Down Expand Up @@ -603,7 +605,7 @@ impl Token {
DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
Question | OpenDelim(..) | CloseDelim(..) |
Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
Whitespace | Comment | Shebang(..) | Eof => return None,
Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None,
};

Some(Token::new(kind, self.span.to(joint.span)))
Expand Down
1 change: 1 addition & 0 deletions src/libsyntax/print/pprust.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ fn token_kind_to_string_ext(tok: &TokenKind, convert_dollar_crate: Option<Span>)
token::Whitespace => " ".to_string(),
token::Comment => "/* */".to_string(),
token::Shebang(s) => format!("/* shebang: {}*/", s),
token::Unknown(s) => s.to_string(),

token::Interpolated(ref nt) => nonterminal_to_string(nt),
}
Expand Down
Loading

0 comments on commit e502615

Please sign in to comment.