Skip to content

Commit

Permalink
review comments: add FIXME comments and formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
estebank committed Jul 25, 2019
1 parent 70c817a commit 6844976
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
15 changes: 11 additions & 4 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,11 +389,18 @@ impl<'a> StringReader<'a> {
self.pos,
"unknown start of token",
c);
if let Some(t) = unicode_chars::check_for_substitution(self, start, c, &mut err) {
err.emit();
return Ok(t);
// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
// instead of keeping a table in `check_for_substitution`into the token. Ideally,
// this should be inside `rustc_lexer`. However, we should first remove compound
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
// as there will be less overall work to do this way.
return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
Some(token) => {
err.emit();
Ok(token)
}
None => Err(err),
}
return Err(err)
}
};
Ok(kind)
Expand Down
14 changes: 10 additions & 4 deletions src/libsyntax/parse/lexer/unicode_chars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

use super::StringReader;
use errors::{Applicability, DiagnosticBuilder};
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw};
use crate::parse::token;

#[rustfmt::skip] // for line breaks
Expand Down Expand Up @@ -298,18 +298,20 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[
('>', "Fullwidth Greater-Than Sign", '>'),
];

// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
// fancier error recovery to it, as there will be less overall work to do this way.
const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
(' ', "Space", Some(token::Whitespace)),
('_', "Underscore", None),
('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
(',', "Comma", Some(token::Comma)),
(';', "Semicolon", Some(token::Semi)),
(':', "Colon", Some(token::Colon)),
('!', "Exclamation Mark", Some(token::Not)),
('?', "Question Mark", Some(token::Question)),
('.', "Period", Some(token::Dot)),
('\'', "Single Quote", None), // Literals are already lexed by this point, so we can't recover
('"', "Quotation Mark", None), // gracefully just by spitting the correct token out.
('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))),
(')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))),
('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))),
Expand All @@ -324,6 +326,10 @@ const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
('<', "Less-Than Sign", Some(token::Lt)),
('=', "Equals Sign", Some(token::Eq)),
('>', "Greater-Than Sign", Some(token::Gt)),
// FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
// spitting the correct token out.
('\'', "Single Quote", None),
('"', "Quotation Mark", None),
];

crate fn check_for_substitution<'a>(
Expand Down

0 comments on commit 6844976

Please sign in to comment.