Skip to content

Commit

Permalink
Rollup merge of rust-lang#109203 - Ezrashaw:refactor-ident-parsing, r…
Browse files Browse the repository at this point in the history
…=Nilstrieb

refactor/feat: refactor identifier parsing a bit

\+ error recovery for `expected_ident_found`

Prior art: rust-lang#108854
  • Loading branch information
matthiaskrgr authored Mar 22, 2023
2 parents 0392e29 + 05b5046 commit 34fa6da
Show file tree
Hide file tree
Showing 12 changed files with 250 additions and 95 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ parse_expected_identifier_found_reserved_keyword = expected identifier, found re
parse_expected_identifier_found_doc_comment = expected identifier, found doc comment
parse_expected_identifier = expected identifier
parse_sugg_escape_to_use_as_identifier = escape `{$ident_name}` to use it as an identifier
parse_sugg_escape_identifier = escape `{$ident_name}` to use it as an identifier
parse_sugg_remove_comma = remove this comma
Expand Down
11 changes: 7 additions & 4 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -888,12 +888,12 @@ pub(crate) struct InvalidMetaItem {

#[derive(Subdiagnostic)]
#[suggestion(
parse_sugg_escape_to_use_as_identifier,
parse_sugg_escape_identifier,
style = "verbose",
applicability = "maybe-incorrect",
code = "r#"
)]
pub(crate) struct SuggEscapeToUseAsIdentifier {
pub(crate) struct SuggEscapeIdentifier {
#[primary_span]
pub span: Span,
pub ident_name: String,
Expand Down Expand Up @@ -937,7 +937,7 @@ impl ExpectedIdentifierFound {
pub(crate) struct ExpectedIdentifier {
pub span: Span,
pub token: Token,
pub suggest_raw: Option<SuggEscapeToUseAsIdentifier>,
pub suggest_raw: Option<SuggEscapeIdentifier>,
pub suggest_remove_comma: Option<SuggRemoveComma>,
pub help_cannot_start_number: Option<HelpIdentifierStartsWithNumber>,
}
Expand Down Expand Up @@ -986,7 +986,10 @@ impl<'a, G: EmissionGuarantee> IntoDiagnostic<'a, G> for ExpectedIdentifier {

#[derive(Subdiagnostic)]
#[help(parse_invalid_identifier_with_leading_number)]
pub(crate) struct HelpIdentifierStartsWithNumber;
pub(crate) struct HelpIdentifierStartsWithNumber {
#[primary_span]
pub num_span: Span,
}

pub(crate) struct ExpectedSemi {
pub span: Span,
Expand Down
123 changes: 89 additions & 34 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ use super::{
use crate::errors::{
AmbiguousPlus, AttributeOnParamType, BadQPathStage2, BadTypePlus, BadTypePlusSub,
ComparisonOperatorsCannotBeChained, ComparisonOperatorsCannotBeChainedSugg,
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentOnParamType,
DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
ConstGenericWithoutBraces, ConstGenericWithoutBracesSugg, DocCommentDoesNotDocumentAnything,
DocCommentOnParamType, DoubleColonInBound, ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg,
GenericParamsWithoutAngleBrackets, GenericParamsWithoutAngleBracketsSugg,
HelpIdentifierStartsWithNumber, InInTypo, IncorrectAwait, IncorrectSemicolon,
IncorrectUseOfAwait, ParenthesesInForHead, ParenthesesInForHeadSugg,
PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst,
StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, StructLiteralNeedingParens,
StructLiteralNeedingParensSugg, SuggEscapeToUseAsIdentifier, SuggRemoveComma,
StructLiteralNeedingParensSugg, SuggEscapeIdentifier, SuggRemoveComma,
UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration,
UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead,
};
Expand All @@ -38,7 +38,7 @@ use rustc_errors::{
use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::source_map::Spanned;
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::{Span, SpanSnippetError, DUMMY_SP};
use rustc_span::{Span, SpanSnippetError, Symbol, DUMMY_SP};
use std::mem::take;
use std::ops::{Deref, DerefMut};
use thin_vec::{thin_vec, ThinVec};
Expand Down Expand Up @@ -268,7 +268,21 @@ impl<'a> Parser<'a> {
self.sess.source_map().span_to_snippet(span)
}

pub(super) fn expected_ident_found(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
/// Emits an error with suggestions if an identifier was expected but not found.
///
/// Returns a possibly recovered identifier.
pub(super) fn expected_ident_found(
&mut self,
recover: bool,
) -> PResult<'a, (Ident, /* is_raw */ bool)> {
if let TokenKind::DocComment(..) = self.prev_token.kind {
return Err(DocCommentDoesNotDocumentAnything {
span: self.prev_token.span,
missing_comma: None,
}
.into_diagnostic(&self.sess.span_diagnostic));
}

let valid_follow = &[
TokenKind::Eq,
TokenKind::Colon,
Expand All @@ -281,31 +295,51 @@ impl<'a> Parser<'a> {
TokenKind::CloseDelim(Delimiter::Parenthesis),
];

let suggest_raw = match self.token.ident() {
Some((ident, false))
if ident.is_raw_guess()
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind)) =>
{
Some(SuggEscapeToUseAsIdentifier {
span: ident.span.shrink_to_lo(),
// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
ident_name: ident.name.to_string(),
})
}
_ => None,
};
let mut recovered_ident = None;
// we take this here so that the correct original token is retained in
// the diagnostic, regardless of eager recovery.
let bad_token = self.token.clone();

// suggest prepending a keyword in identifier position with `r#`
let suggest_raw = if let Some((ident, false)) = self.token.ident()
&& ident.is_raw_guess()
&& self.look_ahead(1, |t| valid_follow.contains(&t.kind))
{
recovered_ident = Some((ident, true));

// `Symbol::to_string()` is different from `Symbol::into_diagnostic_arg()`,
// which uses `Symbol::to_ident_string()` and "helpfully" adds an implicit `r#`
let ident_name = ident.name.to_string();

Some(SuggEscapeIdentifier {
span: ident.span.shrink_to_lo(),
ident_name
})
} else { None };

let suggest_remove_comma =
if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) {
if recover {
self.bump();
recovered_ident = self.ident_or_err(false).ok();
};

let suggest_remove_comma = (self.token == token::Comma
&& self.look_ahead(1, |t| t.is_ident()))
.then_some(SuggRemoveComma { span: self.token.span });
Some(SuggRemoveComma { span: bad_token.span })
} else {
None
};

let help_cannot_start_number =
self.is_lit_bad_ident().then_some(HelpIdentifierStartsWithNumber);
let help_cannot_start_number = self.is_lit_bad_ident().map(|(len, valid_portion)| {
let (invalid, valid) = self.token.span.split_at(len as u32);

recovered_ident = Some((Ident::new(valid_portion, valid), false));

HelpIdentifierStartsWithNumber { num_span: invalid }
});

let err = ExpectedIdentifier {
span: self.token.span,
token: self.token.clone(),
span: bad_token.span,
token: bad_token,
suggest_raw,
suggest_remove_comma,
help_cannot_start_number,
Expand All @@ -314,6 +348,7 @@ impl<'a> Parser<'a> {

// if the token we have is a `<`
// it *might* be a misplaced generic
// FIXME: could we recover with this?
if self.token == token::Lt {
// all keywords that could have generic applied
let valid_prev_keywords =
Expand Down Expand Up @@ -364,18 +399,38 @@ impl<'a> Parser<'a> {
}
}

err
if let Some(recovered_ident) = recovered_ident && recover {
err.emit();
Ok(recovered_ident)
} else {
Err(err)
}
}

pub(super) fn expected_ident_found_err(&mut self) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
self.expected_ident_found(false).unwrap_err()
}

/// Checks if the current token is a integer or float literal and looks like
/// it could be a invalid identifier with digits at the start.
pub(super) fn is_lit_bad_ident(&mut self) -> bool {
matches!(self.token.uninterpolate().kind, token::Literal(Lit { kind: token::LitKind::Integer | token::LitKind::Float, .. })
// ensure that the integer literal is followed by a *invalid*
// suffix: this is how we know that it is a identifier with an
// invalid beginning.
if rustc_ast::MetaItemLit::from_token(&self.token).is_none()
)
///
/// Returns the number of characters (bytes) composing the invalid portion
/// of the identifier and the valid portion of the identifier.
pub(super) fn is_lit_bad_ident(&mut self) -> Option<(usize, Symbol)> {
// ensure that the integer literal is followed by a *invalid*
// suffix: this is how we know that it is a identifier with an
// invalid beginning.
if let token::Literal(Lit {
kind: token::LitKind::Integer | token::LitKind::Float,
symbol,
suffix,
}) = self.token.kind
&& rustc_ast::MetaItemLit::from_token(&self.token).is_none()
{
Some((symbol.as_str().len(), suffix.unwrap()))
} else {
None
}
}

pub(super) fn expected_one_of_not_found(
Expand Down
10 changes: 5 additions & 5 deletions compiler/rustc_parse/src/parser/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1181,7 +1181,7 @@ impl<'a> Parser<'a> {
defaultness: Defaultness,
) -> PResult<'a, ItemInfo> {
let impl_span = self.token.span;
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();

// Only try to recover if this is implementing a trait for a type
let mut impl_info = match self.parse_item_impl(attrs, defaultness) {
Expand Down Expand Up @@ -1744,7 +1744,7 @@ impl<'a> Parser<'a> {
/// Parses a field identifier. Specialized version of `parse_ident_common`
/// for better diagnostics and suggestions.
fn parse_field_ident(&mut self, adt_ty: &str, lo: Span) -> PResult<'a, Ident> {
let (ident, is_raw) = self.ident_or_err()?;
let (ident, is_raw) = self.ident_or_err(true)?;
if !is_raw && ident.is_reserved() {
let snapshot = self.create_snapshot_for_diagnostic();
let err = if self.check_fn_front_matter(false, Case::Sensitive) {
Expand Down Expand Up @@ -1776,7 +1776,7 @@ impl<'a> Parser<'a> {
Err(err) => {
err.cancel();
self.restore_snapshot(snapshot);
self.expected_ident_found()
self.expected_ident_found_err()
}
}
} else if self.eat_keyword(kw::Struct) {
Expand All @@ -1792,11 +1792,11 @@ impl<'a> Parser<'a> {
Err(err) => {
err.cancel();
self.restore_snapshot(snapshot);
self.expected_ident_found()
self.expected_ident_found_err()
}
}
} else {
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();
if self.eat_keyword_noexpect(kw::Let)
&& let removal_span = self.prev_token.span.until(self.token.span)
&& let Ok(ident) = self.parse_ident_common(false)
Expand Down
34 changes: 19 additions & 15 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ use thin_vec::ThinVec;
use tracing::debug;

use crate::errors::{
DocCommentDoesNotDocumentAnything, IncorrectVisibilityRestriction, MismatchedClosingDelimiter,
NonStringAbiLiteral,
IncorrectVisibilityRestriction, MismatchedClosingDelimiter, NonStringAbiLiteral,
};

bitflags::bitflags! {
Expand Down Expand Up @@ -552,21 +551,11 @@ impl<'a> Parser<'a> {
self.parse_ident_common(true)
}

fn ident_or_err(&mut self) -> PResult<'a, (Ident, /* is_raw */ bool)> {
self.token.ident().ok_or_else(|| match self.prev_token.kind {
TokenKind::DocComment(..) => DocCommentDoesNotDocumentAnything {
span: self.prev_token.span,
missing_comma: None,
}
.into_diagnostic(&self.sess.span_diagnostic),
_ => self.expected_ident_found(),
})
}

fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> {
let (ident, is_raw) = self.ident_or_err()?;
let (ident, is_raw) = self.ident_or_err(recover)?;

if !is_raw && ident.is_reserved() {
let mut err = self.expected_ident_found();
let mut err = self.expected_ident_found_err();
if recover {
err.emit();
} else {
Expand All @@ -577,6 +566,21 @@ impl<'a> Parser<'a> {
Ok(ident)
}

fn ident_or_err(&mut self, recover: bool) -> PResult<'a, (Ident, /* is_raw */ bool)> {
let result = self.token.ident().ok_or_else(|| self.expected_ident_found(recover));

let (ident, is_raw) = match result {
Ok(ident) => ident,
Err(err) => match err {
// we recovered!
Ok(ident) => ident,
Err(err) => return Err(err),
},
};

Ok((ident, is_raw))
}

/// Checks if the next token is `tok`, and returns `true` if so.
///
/// This method will automatically add `tok` to `expected_tokens` if `tok` is not
Expand Down
14 changes: 8 additions & 6 deletions compiler/rustc_parse/src/parser/pat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,6 @@ impl<'a> Parser<'a> {
lo = self.token.span;
}

if self.is_lit_bad_ident() {
return Err(self.expected_ident_found());
}

let pat = if self.check(&token::BinOp(token::And)) || self.token.kind == token::AndAnd {
self.parse_pat_deref(expected)?
} else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
Expand Down Expand Up @@ -395,7 +391,13 @@ impl<'a> Parser<'a> {
} else {
PatKind::Lit(const_expr)
}
} else if self.can_be_ident_pat() {
// Don't eagerly error on semantically invalid tokens when matching
// declarative macros, as the input to those doesn't have to be
// semantically valid. For attribute/derive proc macros this is not the
// case, so doing the recovery for them is fine.
} else if self.can_be_ident_pat()
|| (self.is_lit_bad_ident().is_some() && self.may_recover())
{
// Parse `ident @ pat`
// This can give false positives and parse nullary enums,
// they are dealt with later in resolve.
Expand Down Expand Up @@ -594,7 +596,7 @@ impl<'a> Parser<'a> {
// Make sure we don't allow e.g. `let mut $p;` where `$p:pat`.
if let token::Interpolated(nt) = &self.token.kind {
if let token::NtPat(_) = **nt {
self.expected_ident_found().emit();
self.expected_ident_found_err().emit();
}
}

Expand Down
12 changes: 12 additions & 0 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,18 @@ impl Span {
})
}

/// Splits a span into two composite spans around a certain position.
pub fn split_at(self, pos: u32) -> (Span, Span) {
let len = self.hi().0 - self.lo().0;
debug_assert!(pos <= len);

let split_pos = BytePos(self.lo().0 + pos);
(
Span::new(self.lo(), split_pos, self.ctxt(), self.parent()),
Span::new(split_pos, self.hi(), self.ctxt(), self.parent()),
)
}

/// Returns a `Span` that would enclose both `self` and `end`.
///
/// Note that this can also be used to extend the span "backwards":
Expand Down
16 changes: 16 additions & 0 deletions tests/ui/parser/ident-recovery.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
fn ,comma() {
//~^ ERROR expected identifier, found `,`
struct Foo {
x: i32,,
//~^ ERROR expected identifier, found `,`
y: u32,
}
}

fn break() {
//~^ ERROR expected identifier, found keyword `break`
let continue = 5;
//~^ ERROR expected identifier, found keyword `continue`
}

fn main() {}
Loading

0 comments on commit 34fa6da

Please sign in to comment.