Skip to content

Commit

Permalink
Remove Lexer's dependency on Parser.
Browse files Browse the repository at this point in the history
Lexing precedes parsing, as you'd expect: `Lexer` creates a
`TokenStream` and `Parser` then parses that `TokenStream`.

But, in a horrendous violation of layering abstractions and common
sense, `Lexer` depends on `Parser`! The `Lexer::unclosed_delim_err`
method does some error recovery that relies on creating a `Parser` to do
some post-processing of the `TokenStream` that the `Lexer` just created.

This commit just removes `unclosed_delim_err`. This change removes
`Lexer`'s dependency on `Parser`, and also means that `lex_token_tree`'s
return value can have a more typical form.

The cost is slightly worse error messages in two obscure cases, as shown
in these tests:
- tests/ui/parser/brace-in-let-chain.rs: there is slightly less
  explanation in this case involving an extra `{`.
- tests/ui/parser/diff-markers/unclosed-delims{,-in-macro}.rs: the diff
  marker detection is no longer supported (because that detection is
  implemented in the parser).

In my opinion this cost is outweighed by the magnitude of the code
cleanup.
  • Loading branch information
nnethercote committed Dec 12, 2024
1 parent a94fce9 commit 2e412fe
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 174 deletions.
40 changes: 23 additions & 17 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,30 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
token: Token::dummy(),
diag_info: TokenTreeDiagInfo::default(),
};
let (_open_spacing, stream, res) = lexer.lex_token_trees(/* is_delimited */ false);
let unmatched_delims = lexer.diag_info.unmatched_delims;

if res.is_ok() && unmatched_delims.is_empty() {
Ok(stream)
} else {
// Return error if there are unmatched delimiters or unclosed delimiters.
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
// because the delimiter mismatch is more likely to be the root cause of error
let mut buffer: Vec<_> = unmatched_delims
.into_iter()
.filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
.collect();
if let Err(errs) = res {
// Add unclosing delimiter or diff marker errors
buffer.extend(errs);
let res = lexer.lex_token_trees(/* is_delimited */ false);

let mut unmatched_delims: Vec<_> = lexer
.diag_info
.unmatched_delims
.into_iter()
.filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
.collect();

match res {
Ok((_open_spacing, stream)) => {
if unmatched_delims.is_empty() {
Ok(stream)
} else {
// Return error if there are unmatched delimiters or unclosed delimiters.
Err(unmatched_delims)
}
}
Err(errs) => {
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
// because the delimiter mismatch is more likely to be the root cause of error
unmatched_delims.extend(errs);
Err(unmatched_delims)
}
Err(buffer)
}
}

Expand Down
94 changes: 14 additions & 80 deletions compiler/rustc_parse/src/lexer/tokentrees.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
use rustc_ast::token::{self, Delimiter, Token};
use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
use rustc_ast_pretty::pprust::token_to_string;
use rustc_errors::{Applicability, Diag};
use rustc_span::symbol::kw;
use rustc_errors::Diag;

use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level};
use super::{Lexer, UnmatchedDelim};
use crate::Parser;

impl<'psess, 'src> Lexer<'psess, 'src> {
// Lex into a token stream. The `Spacing` in the result is that of the
// opening delimiter.
pub(super) fn lex_token_trees(
&mut self,
is_delimited: bool,
) -> (Spacing, TokenStream, Result<(), Vec<Diag<'psess>>>) {
) -> Result<(Spacing, TokenStream), Vec<Diag<'psess>>> {
// Move past the opening delimiter.
let open_spacing = self.bump_minimal();

Expand All @@ -27,25 +25,25 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
buf.push(match self.lex_token_tree_open_delim(delim) {
Ok(val) => val,
Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)),
Err(errs) => return Err(errs),
})
}
token::CloseDelim(delim) => {
// Invisible delimiters cannot occur here because `TokenTreesReader` parses
// code directly from strings, with no macro expansion involved.
debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
return (
open_spacing,
TokenStream::new(buf),
if is_delimited { Ok(()) } else { Err(vec![self.close_delim_err(delim)]) },
);
return if is_delimited {
Ok((open_spacing, TokenStream::new(buf)))
} else {
Err(vec![self.close_delim_err(delim)])
};
}
token::Eof => {
return (
open_spacing,
TokenStream::new(buf),
if is_delimited { Err(vec![self.eof_err()]) } else { Ok(()) },
);
return if is_delimited {
Err(vec![self.eof_err()])
} else {
Ok((open_spacing, TokenStream::new(buf)))
};
}
_ => {
// Get the next normal token.
Expand Down Expand Up @@ -107,10 +105,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
// Lex the token trees within the delimiters.
// We stop at any delimiter so we can try to recover if the user
// uses an incorrect delimiter.
let (open_spacing, tts, res) = self.lex_token_trees(/* is_delimited */ true);
if let Err(errs) = res {
return Err(self.unclosed_delim_err(tts, errs));
}
let (open_spacing, tts) = self.lex_token_trees(/* is_delimited */ true)?;

// Expand to cover the entire delimited token tree.
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
Expand Down Expand Up @@ -247,67 +242,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
this_spacing
}

fn unclosed_delim_err(
&mut self,
tts: TokenStream,
mut errs: Vec<Diag<'psess>>,
) -> Vec<Diag<'psess>> {
// If there are unclosed delims, see if there are diff markers and if so, point them
// out instead of complaining about the unclosed delims.
let mut parser = Parser::new(self.psess, tts, None);
let mut diff_errs = vec![];
// Suggest removing a `{` we think appears in an `if`/`while` condition.
// We want to suggest removing a `{` only if we think we're in an `if`/`while` condition,
// but we have no way of tracking this in the lexer itself, so we piggyback on the parser.
let mut in_cond = false;
while parser.token != token::Eof {
if let Err(diff_err) = parser.err_vcs_conflict_marker() {
diff_errs.push(diff_err);
} else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) {
in_cond = true;
} else if matches!(
parser.token.kind,
token::CloseDelim(Delimiter::Brace) | token::FatArrow
) {
// End of the `if`/`while` body, or the end of a `match` guard.
in_cond = false;
} else if in_cond && parser.token == token::OpenDelim(Delimiter::Brace) {
// Store the `&&` and `let` to use their spans later when creating the diagnostic
let maybe_andand = parser.look_ahead(1, |t| t.clone());
let maybe_let = parser.look_ahead(2, |t| t.clone());
if maybe_andand == token::OpenDelim(Delimiter::Brace) {
// This might be the beginning of the `if`/`while` body (i.e., the end of the
// condition).
in_cond = false;
} else if maybe_andand == token::AndAnd && maybe_let.is_keyword(kw::Let) {
let mut err = parser.dcx().struct_span_err(
parser.token.span,
"found a `{` in the middle of a let-chain",
);
err.span_suggestion(
parser.token.span,
"consider removing this brace to parse the `let` as part of the same chain",
"",
Applicability::MachineApplicable,
);
err.span_label(
maybe_andand.span.to(maybe_let.span),
"you might have meant to continue the let-chain here",
);
errs.push(err);
}
}
parser.bump();
}
if !diff_errs.is_empty() {
for err in errs {
err.cancel();
}
return diff_errs;
}
errs
}

fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
// An unexpected closing delimiter (i.e., there is no matching opening delimiter).
let token_str = token_to_string(&self.token);
Expand Down
4 changes: 2 additions & 2 deletions tests/ui/parser/brace-in-let-chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
#![feature(let_chains)]
fn main() {
if let () = ()
&& let () = () { //~ERROR: found a `{` in the middle of a let-chain
&& let () = () {
&& let () = ()
{
}
}

fn quux() {
while let () = ()
&& let () = () { //~ERROR: found a `{` in the middle of a let-chain
&& let () = () {
&& let () = ()
{
}
Expand Down
30 changes: 1 addition & 29 deletions tests/ui/parser/brace-in-let-chain.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -27,33 +27,5 @@ LL | }
LL | }
| ^

error: found a `{` in the middle of a let-chain
--> $DIR/brace-in-let-chain.rs:14:24
|
LL | && let () = () {
| ^
LL | && let () = ()
| ------ you might have meant to continue the let-chain here
|
help: consider removing this brace to parse the `let` as part of the same chain
|
LL - && let () = () {
LL + && let () = ()
|

error: found a `{` in the middle of a let-chain
--> $DIR/brace-in-let-chain.rs:6:24
|
LL | && let () = () {
| ^
LL | && let () = ()
| ------ you might have meant to continue the let-chain here
|
help: consider removing this brace to parse the `let` as part of the same chain
|
LL - && let () = () {
LL + && let () = ()
|

error: aborting due to 3 previous errors
error: aborting due to 1 previous error

6 changes: 4 additions & 2 deletions tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
// The diff marker detection was removed for this example, because it relied on
// the lexer having a dependency on the parser, which was horrible.

macro_rules! foo {
<<<<<<< HEAD
//~^ ERROR encountered diff marker
() {
=======
() { //
>>>>>>> 7a4f13c blah blah blah
}
}
} //~ this file contains an unclosed delimiter
27 changes: 10 additions & 17 deletions tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
error: encountered diff marker
--> $DIR/unclosed-delims-in-macro.rs:2:1
error: this file contains an unclosed delimiter
--> $DIR/unclosed-delims-in-macro.rs:11:48
|
LL | macro_rules! foo {
| - unclosed delimiter
LL | <<<<<<< HEAD
| ^^^^^^^ between this marker and `=======` is the code that we're merging into
LL | () {
| - this delimiter might not be properly closed...
...
LL | =======
| ------- between this marker and `>>>>>>>` is the incoming code
LL | () { //
LL | >>>>>>> 7a4f13c blah blah blah
| ^^^^^^^ this marker concludes the conflict region
|
= note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts
to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers
= help: if you're having merge conflicts after pulling new code:
the top section is the code you already had and the bottom section is the remote code
if you're in the middle of a rebase:
the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased
= note: for an explanation on these markers from the `git` documentation:
visit <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts>
LL | }
| - ^
| |
| ...as it matches this but it has different indentation

error: aborting due to 1 previous error

14 changes: 4 additions & 10 deletions tests/ui/parser/diff-markers/unclosed-delims.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
// The diff marker detection was removed for this example, because it relied on
// the lexer having a dependency on the parser, which was horrible.

mod tests {
#[test]
<<<<<<< HEAD
//~^ ERROR encountered diff marker
//~| NOTE between this marker and `=======`

//~| NOTE conflict markers indicate that
//~| HELP if you're having merge conflicts
//~| NOTE for an explanation on these markers

fn test1() {
=======
//~^ NOTE between this marker and `>>>>>>>`
fn test2() {
>>>>>>> 7a4f13c blah blah blah
//~^ NOTE this marker concludes the conflict region
}
}
} //~ this file contains an unclosed delimiter
27 changes: 10 additions & 17 deletions tests/ui/parser/diff-markers/unclosed-delims.stderr
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
error: encountered diff marker
--> $DIR/unclosed-delims.rs:3:1
error: this file contains an unclosed delimiter
--> $DIR/unclosed-delims.rs:12:48
|
LL | <<<<<<< HEAD
| ^^^^^^^ between this marker and `=======` is the code that we're merging into
LL | mod tests {
| - unclosed delimiter
...
LL | =======
| ------- between this marker and `>>>>>>>` is the incoming code
LL | fn test1() {
| - this delimiter might not be properly closed...
...
LL | >>>>>>> 7a4f13c blah blah blah
| ^^^^^^^ this marker concludes the conflict region
|
= note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts
to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers
= help: if you're having merge conflicts after pulling new code:
the top section is the code you already had and the bottom section is the remote code
if you're in the middle of a rebase:
the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased
= note: for an explanation on these markers from the `git` documentation:
visit <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts>
LL | }
| - ^
| |
| ...as it matches this but it has different indentation

error: aborting due to 1 previous error

0 comments on commit 2e412fe

Please sign in to comment.