Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve parser #125117

Merged
merged 4 commits into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3250,6 +3250,7 @@ pub enum ItemKind {
}

impl ItemKind {
/// "a" or "an"
pub fn article(&self) -> &'static str {
use ItemKind::*;
match self {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ pub(crate) struct IncorrectSemicolon<'a> {
#[suggestion(style = "short", code = "", applicability = "machine-applicable")]
pub span: Span,
#[help]
pub opt_help: Option<()>,
pub show_help: bool,
pub name: &'a str,
}

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/lexer/tokentrees.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
// we have no way of tracking this in the lexer itself, so we piggyback on the parser
let mut in_cond = false;
while parser.token != token::Eof {
if let Err(diff_err) = parser.err_diff_marker() {
if let Err(diff_err) = parser.err_vcs_conflict_marker() {
diff_errs.push(diff_err);
} else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) {
in_cond = true;
Expand Down
76 changes: 43 additions & 33 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1817,34 +1817,31 @@ impl<'a> Parser<'a> {
Ok(P(T::recovered(Some(P(QSelf { ty, path_span, position: 0 })), path)))
}

pub fn maybe_consume_incorrect_semicolon(&mut self, items: &[P<Item>]) -> bool {
if self.token.kind == TokenKind::Semi {
self.bump();

let mut err =
IncorrectSemicolon { span: self.prev_token.span, opt_help: None, name: "" };
/// This function gets called in places where a semicolon is NOT expected and if there's a
/// semicolon it emits the appropriate error and returns true.
pub fn maybe_consume_incorrect_semicolon(&mut self, previous_item: Option<&Item>) -> bool {
if self.token.kind != TokenKind::Semi {
return false;
}

if !items.is_empty() {
let previous_item = &items[items.len() - 1];
let previous_item_kind_name = match previous_item.kind {
// Check previous item to add it to the diagnostic, for example to say
// `enum declarations are not followed by a semicolon`
let err = match previous_item {
Some(previous_item) => {
let name = match previous_item.kind {
// Say "braced struct" because tuple-structs and
// braceless-empty-struct declarations do take a semicolon.
ItemKind::Struct(..) => Some("braced struct"),
ItemKind::Enum(..) => Some("enum"),
ItemKind::Trait(..) => Some("trait"),
ItemKind::Union(..) => Some("union"),
_ => None,
ItemKind::Struct(..) => "braced struct",
_ => previous_item.kind.descr(),
};
if let Some(name) = previous_item_kind_name {
err.opt_help = Some(());
err.name = name;
}
IncorrectSemicolon { span: self.token.span, name, show_help: true }
}
self.dcx().emit_err(err);
true
} else {
false
}
None => IncorrectSemicolon { span: self.token.span, name: "", show_help: false },
};
self.dcx().emit_err(err);

self.bump();
true
}

/// Creates a `Diag` for an unexpected token `t` and tries to recover if it is a
Expand Down Expand Up @@ -2957,13 +2954,23 @@ impl<'a> Parser<'a> {
err
}

pub fn is_diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> bool {
/// This checks if this is a conflict marker, depending of the parameter passed.
///
/// * `>>>>>`
/// * `=====`
/// * `<<<<<`
///
Comment on lines +2959 to +2962
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// * `>>>>>`
/// * `=====`
/// * `<<<<<`
///
/// * `>>>>>`
/// * `|||||`
/// * `=====`
/// * `<<<<<`

pub fn is_vcs_conflict_marker(
&mut self,
long_kind: &TokenKind,
short_kind: &TokenKind,
) -> bool {
(0..3).all(|i| self.look_ahead(i, |tok| tok == long_kind))
&& self.look_ahead(3, |tok| tok == short_kind)
}

fn diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> {
if self.is_diff_marker(long_kind, short_kind) {
fn conflict_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> {
if self.is_vcs_conflict_marker(long_kind, short_kind) {
let lo = self.token.span;
for _ in 0..4 {
self.bump();
Expand All @@ -2973,15 +2980,16 @@ impl<'a> Parser<'a> {
None
}

pub fn recover_diff_marker(&mut self) {
if let Err(err) = self.err_diff_marker() {
pub fn recover_vcs_conflict_marker(&mut self) {
if let Err(err) = self.err_vcs_conflict_marker() {
err.emit();
FatalError.raise();
}
}

pub fn err_diff_marker(&mut self) -> PResult<'a, ()> {
let Some(start) = self.diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) else {
pub fn err_vcs_conflict_marker(&mut self) -> PResult<'a, ()> {
let Some(start) = self.conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt)
else {
return Ok(());
};
let mut spans = Vec::with_capacity(3);
Expand All @@ -2993,13 +3001,15 @@ impl<'a> Parser<'a> {
if self.token.kind == TokenKind::Eof {
break;
}
if let Some(span) = self.diff_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or)) {
if let Some(span) = self.conflict_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or))
{
middlediff3 = Some(span);
}
if let Some(span) = self.diff_marker(&TokenKind::EqEq, &TokenKind::Eq) {
if let Some(span) = self.conflict_marker(&TokenKind::EqEq, &TokenKind::Eq) {
middle = Some(span);
}
if let Some(span) = self.diff_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt) {
if let Some(span) = self.conflict_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt)
{
spans.push(span);
end = Some(span);
break;
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3734,7 +3734,7 @@ impl<'a> Parser<'a> {
/// Parses `ident (COLON expr)?`.
fn parse_expr_field(&mut self) -> PResult<'a, ExprField> {
let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
let lo = this.token.span;

Expand Down
56 changes: 26 additions & 30 deletions compiler/rustc_parse/src/parser/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ impl<'a> Parser<'a> {
}

/// Parses the contents of a module (inner attributes followed by module items).
/// We exit once we hit `term`
pub fn parse_mod(
&mut self,
term: &TokenKind,
Expand All @@ -59,13 +60,13 @@ impl<'a> Parser<'a> {
let post_attr_lo = self.token.span;
let mut items = ThinVec::new();
while let Some(item) = self.parse_item(ForceCollect::No)? {
self.maybe_consume_incorrect_semicolon(Some(&item));
items.push(item);
self.maybe_consume_incorrect_semicolon(&items);
}

if !self.eat(term) {
let token_str = super::token_descr(&self.token);
if !self.maybe_consume_incorrect_semicolon(&items) {
if !self.maybe_consume_incorrect_semicolon(items.last().map(|x| &**x)) {
let msg = format!("expected item, found {token_str}");
let mut err = self.dcx().struct_span_err(self.token.span, msg);
let span = self.token.span;
Expand Down Expand Up @@ -101,9 +102,9 @@ impl<'a> Parser<'a> {
fn_parse_mode: FnParseMode,
force_collect: ForceCollect,
) -> PResult<'a, Option<Item>> {
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
self.parse_item_common(attrs, true, false, fn_parse_mode, force_collect)
}

Expand Down Expand Up @@ -194,12 +195,12 @@ impl<'a> Parser<'a> {
fn_parse_mode: FnParseMode,
case: Case,
) -> PResult<'a, Option<ItemInfo>> {
let def_final = def == &Defaultness::Final;
let check_pub = def == &Defaultness::Final;
let mut def_ = || mem::replace(def, Defaultness::Final);

let info = if self.eat_keyword_case(kw::Use, case) {
self.parse_use_item()?
} else if self.check_fn_front_matter(def_final, case) {
} else if self.check_fn_front_matter(check_pub, case) {
// FUNCTION ITEM
let (ident, sig, generics, body) =
self.parse_fn(attrs, fn_parse_mode, lo, vis, case)?;
Expand Down Expand Up @@ -310,7 +311,7 @@ impl<'a> Parser<'a> {
Ok(Some(info))
}

fn recover_import_as_use(&mut self) -> PResult<'a, Option<(Ident, ItemKind)>> {
fn recover_import_as_use(&mut self) -> PResult<'a, Option<ItemInfo>> {
let span = self.token.span;
let token_name = super::token_descr(&self.token);
let snapshot = self.create_snapshot_for_diagnostic();
Expand All @@ -328,7 +329,7 @@ impl<'a> Parser<'a> {
}
}

fn parse_use_item(&mut self) -> PResult<'a, (Ident, ItemKind)> {
fn parse_use_item(&mut self) -> PResult<'a, ItemInfo> {
let tree = self.parse_use_tree()?;
if let Err(mut e) = self.expect_semi() {
match tree.kind {
Expand Down Expand Up @@ -723,7 +724,7 @@ impl<'a> Parser<'a> {
if self.recover_doc_comment_before_brace() {
continue;
}
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
match parse_item(self) {
Ok(None) => {
let mut is_unnecessary_semicolon = !items.is_empty()
Expand Down Expand Up @@ -1070,7 +1071,7 @@ impl<'a> Parser<'a> {
/// ```
fn parse_use_tree_list(&mut self) -> PResult<'a, ThinVec<(UseTree, ast::NodeId)>> {
self.parse_delim_comma_seq(Delimiter::Brace, |p| {
p.recover_diff_marker();
p.recover_vcs_conflict_marker();
Ok((p.parse_use_tree()?, DUMMY_NODE_ID))
})
.map(|(r, _)| r)
Expand Down Expand Up @@ -1497,9 +1498,9 @@ impl<'a> Parser<'a> {
}

fn parse_enum_variant(&mut self, span: Span) -> PResult<'a, Option<Variant>> {
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
let variant_attrs = self.parse_outer_attributes()?;
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
let help = "enum variants can be `Variant`, `Variant = <integer>`, \
`Variant(Type, ..., TypeN)` or `Variant { fields: Types }`";
self.collect_tokens_trailing_token(
Expand Down Expand Up @@ -1688,6 +1689,10 @@ impl<'a> Parser<'a> {
Ok((class_name, ItemKind::Union(vdata, generics)))
}

/// This function parses the fields of record structs:
///
/// - `struct S { ... }`
/// - `enum E { Variant { ... } }`
pub(crate) fn parse_record_struct_body(
&mut self,
adt_ty: &str,
Expand All @@ -1714,19 +1719,10 @@ impl<'a> Parser<'a> {
self.eat(&token::CloseDelim(Delimiter::Brace));
} else {
let token_str = super::token_descr(&self.token);
let msg = format!(
"expected {}`{{` after struct name, found {}",
if parsed_where { "" } else { "`where`, or " },
token_str
);
let where_str = if parsed_where { "" } else { "`where`, or " };
let msg = format!("expected {where_str}`{{` after struct name, found {token_str}");
let mut err = self.dcx().struct_span_err(self.token.span, msg);
err.span_label(
self.token.span,
format!(
"expected {}`{{` after struct name",
if parsed_where { "" } else { "`where`, or " }
),
);
err.span_label(self.token.span, format!("expected {where_str}`{{` after struct name",));
return Err(err);
}

Expand All @@ -1740,7 +1736,7 @@ impl<'a> Parser<'a> {
let attrs = p.parse_outer_attributes()?;
p.collect_tokens_trailing_token(attrs, ForceCollect::No, |p, attrs| {
let mut snapshot = None;
if p.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
if p.is_vcs_conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
// Account for `<<<<<<<` diff markers. We can't proactively error here because
// that can be a valid type start, so we snapshot and reparse only we've
// encountered another parse error.
Expand All @@ -1751,7 +1747,7 @@ impl<'a> Parser<'a> {
Ok(vis) => vis,
Err(err) => {
if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker();
snapshot.recover_vcs_conflict_marker();
}
return Err(err);
}
Expand All @@ -1760,7 +1756,7 @@ impl<'a> Parser<'a> {
Ok(ty) => ty,
Err(err) => {
if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker();
snapshot.recover_vcs_conflict_marker();
}
return Err(err);
}
Expand All @@ -1785,9 +1781,9 @@ impl<'a> Parser<'a> {

/// Parses an element of a struct declaration.
fn parse_field_def(&mut self, adt_ty: &str) -> PResult<'a, FieldDef> {
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
let lo = this.token.span;
let vis = this.parse_visibility(FollowedByType::No)?;
Expand Down Expand Up @@ -2647,7 +2643,7 @@ impl<'a> Parser<'a> {
}

let (mut params, _) = self.parse_paren_comma_seq(|p| {
p.recover_diff_marker();
p.recover_vcs_conflict_marker();
let snapshot = p.create_snapshot_for_diagnostic();
let param = p.parse_param_general(req_name, first_param).or_else(|e| {
let guar = e.emit();
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_parse/src/parser/stmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ impl<'a> Parser<'a> {
if self.token == token::Eof {
break;
}
if self.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
if self.is_vcs_conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
// Account for `<<<<<<<` diff markers. We can't proactively error here because
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Account for `<<<<<<<` diff markers. We can't proactively error here because
// Account for `<<<<<<<` conflict markers. We can't proactively error here because

// that can be a valid path start, so we snapshot and reparse only we've
// encountered another parse error.
Expand All @@ -576,7 +576,7 @@ impl<'a> Parser<'a> {
let stmt = match self.parse_full_stmt(recover) {
Err(mut err) if recover.yes() => {
if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker();
snapshot.recover_vcs_conflict_marker();
}
if self.token == token::Colon {
// if a previous and next token of the current one is
Expand Down
4 changes: 2 additions & 2 deletions src/librustdoc/doctest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -686,9 +686,9 @@ pub(crate) fn make_test(
}
}

// The supplied slice is only used for diagnostics,
// The supplied item is only used for diagnostics,
// which are swallowed here anyway.
parser.maybe_consume_incorrect_semicolon(&[]);
parser.maybe_consume_incorrect_semicolon(None);
}

// Reset errors so that they won't be reported as compiler bugs when dropping the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ error: expected item, found `;`
|
LL | mod M {};
| ^ help: remove this semicolon
|
= help: module declarations are not followed by a semicolon

error: expected item, found `;`
--> $DIR/recover-from-semicolon-trailing-item.rs:4:12
Expand All @@ -17,6 +19,8 @@ error: expected item, found `;`
|
LL | fn foo(a: usize) {};
| ^ help: remove this semicolon
|
= help: function declarations are not followed by a semicolon

error[E0308]: mismatched types
--> $DIR/recover-from-semicolon-trailing-item.rs:10:20
Expand Down
Loading