From da7dd4fa2553d5ea10a8526191efe00e8bc9674d Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Sat, 13 Nov 2021 11:14:17 +0000 Subject: [PATCH 1/4] Print full char literal on error if any are non-printing --- .../src/lexer/unescape_error_reporting.rs | 10 ++++++++++ .../char/whitespace-character-literal.rs | 9 +++++++++ .../char/whitespace-character-literal.stderr | 18 ++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 src/test/ui/parser/char/whitespace-character-literal.rs create mode 100644 src/test/ui/parser/char/whitespace-character-literal.stderr diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 569f186a72766..0f6594a2a7f32 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -82,6 +82,16 @@ pub(crate) fn emit_unescape_error( Applicability::MachineApplicable, ); } + } else { + if lit.chars().filter(|x| x.is_whitespace() || x.is_control()).count() >= 1 { + handler.span_note( + span, + &format!( + "there are non-printing characters, the full sequence is `{}`", + lit.escape_default(), + ), + ); + } } if !has_help { diff --git a/src/test/ui/parser/char/whitespace-character-literal.rs b/src/test/ui/parser/char/whitespace-character-literal.rs new file mode 100644 index 0000000000000..ecb5c3cf49eda --- /dev/null +++ b/src/test/ui/parser/char/whitespace-character-literal.rs @@ -0,0 +1,9 @@ +// This tests that the error generated when a character literal has multiple +// characters in it contains a note about non-printing characters. + +fn main() { + // x + let _hair_space_around = ' x​'; + //~^ ERROR: character literal may only contain one codepoint + //~| NOTE: there are non-printing characters, the full sequence is `\u{200a}x\u{200b}` +} diff --git a/src/test/ui/parser/char/whitespace-character-literal.stderr b/src/test/ui/parser/char/whitespace-character-literal.stderr new file mode 100644 index 0000000000000..a12088ce77d11 --- /dev/null +++ b/src/test/ui/parser/char/whitespace-character-literal.stderr @@ -0,0 +1,18 @@ +error: character literal may only contain one codepoint + --> $DIR/whitespace-character-literal.rs:6:30 + | +LL | let _hair_space_around = ' x​'; + | ^^^^ + | +note: there are non-printing characters, the full sequence is `\u{200a}x\u{200b}` + --> $DIR/whitespace-character-literal.rs:6:31 + | +LL | let _hair_space_around = ' x​'; + | ^^ +help: if you meant to write a `str` literal, use double quotes + | +LL | let _hair_space_around = " x​"; + | ~~~~ + +error: aborting due to previous error + From 65e02be7e43cc5c772f8c4d72a0aecd63c8c4b00 Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Sat, 13 Nov 2021 12:46:22 +0000 Subject: [PATCH 2/4] Suggest removing the non-printing characters --- .../src/lexer/unescape_error_reporting.rs | 17 ++++++++++++++++- .../parser/char/whitespace-character-literal.rs | 3 ++- .../char/whitespace-character-literal.stderr | 13 ++++++------- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 0f6594a2a7f32..aa7ab4a953ca2 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -7,6 +7,10 @@ use rustc_errors::{pluralize, Applicability, Handler}; use rustc_lexer::unescape::{EscapeError, Mode}; use rustc_span::{BytePos, Span}; +fn printing(ch: char) -> bool { + unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) != 0 && !ch.is_whitespace() +} + pub(crate) fn emit_unescape_error( handler: &Handler, // interior part of the literal, without quotes @@ -83,7 +87,11 @@ pub(crate) fn emit_unescape_error( ); } } else { - if lit.chars().filter(|x| x.is_whitespace() || x.is_control()).count() >= 1 { + let printable: Vec = lit.chars().filter(|x| printing(*x)).collect(); + + if let [ch] = printable.as_slice() { + has_help = true; + handler.span_note( span, &format!( @@ -91,6 +99,13 @@ pub(crate) fn emit_unescape_error( lit.escape_default(), ), ); + + handler.span_suggestion( + span, + "consider removing the non-printing characters", + ch.to_string(), + Applicability::MaybeIncorrect, + ); } } diff --git a/src/test/ui/parser/char/whitespace-character-literal.rs b/src/test/ui/parser/char/whitespace-character-literal.rs index ecb5c3cf49eda..de5e09204b462 100644 --- a/src/test/ui/parser/char/whitespace-character-literal.rs +++ b/src/test/ui/parser/char/whitespace-character-literal.rs @@ -2,8 +2,9 @@ // characters in it contains a note about non-printing characters. fn main() { - // x let _hair_space_around = ' x​'; //~^ ERROR: character literal may only contain one codepoint //~| NOTE: there are non-printing characters, the full sequence is `\u{200a}x\u{200b}` + //~| HELP: consider removing the non-printing characters + //~| SUGGESTION: x } diff --git a/src/test/ui/parser/char/whitespace-character-literal.stderr b/src/test/ui/parser/char/whitespace-character-literal.stderr index a12088ce77d11..aa4fe4cf01f64 100644 --- a/src/test/ui/parser/char/whitespace-character-literal.stderr +++ b/src/test/ui/parser/char/whitespace-character-literal.stderr @@ -1,18 +1,17 @@ +['x'] error: character literal may only contain one codepoint - --> $DIR/whitespace-character-literal.rs:6:30 + --> $DIR/whitespace-character-literal.rs:5:30 | LL | let _hair_space_around = ' x​'; - | ^^^^ + | ^--^ + | | + | help: consider removing the non-printing characters: `x` | note: there are non-printing characters, the full sequence is `\u{200a}x\u{200b}` - --> $DIR/whitespace-character-literal.rs:6:31 + --> $DIR/whitespace-character-literal.rs:5:31 | LL | let _hair_space_around = ' x​'; | ^^ -help: if you meant to write a `str` literal, use double quotes - | -LL | let _hair_space_around = " x​"; - | ~~~~ error: aborting due to previous error From 6a34355804cb13cbbe1ad60aea58001759e05e66 Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Sat, 13 Nov 2021 13:25:19 +0000 Subject: [PATCH 3/4] Remove debug output from test stderr --- src/test/ui/parser/char/whitespace-character-literal.stderr | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/ui/parser/char/whitespace-character-literal.stderr b/src/test/ui/parser/char/whitespace-character-literal.stderr index aa4fe4cf01f64..d73de41a8099b 100644 --- a/src/test/ui/parser/char/whitespace-character-literal.stderr +++ b/src/test/ui/parser/char/whitespace-character-literal.stderr @@ -1,4 +1,3 @@ -['x'] error: character literal may only contain one codepoint --> $DIR/whitespace-character-literal.rs:5:30 | From e1973929a9d1c28027ded911b10ecc2a3a3d2f9a Mon Sep 17 00:00:00 2001 From: 5225225 <5225225@mailbox.org> Date: Sat, 13 Nov 2021 15:08:20 +0000 Subject: [PATCH 4/4] Inline printable function --- .../src/lexer/unescape_error_reporting.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index aa7ab4a953ca2..7f68112a427ba 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -7,10 +7,6 @@ use rustc_errors::{pluralize, Applicability, Handler}; use rustc_lexer::unescape::{EscapeError, Mode}; use rustc_span::{BytePos, Span}; -fn printing(ch: char) -> bool { - unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) != 0 && !ch.is_whitespace() -} - pub(crate) fn emit_unescape_error( handler: &Handler, // interior part of the literal, without quotes @@ -87,7 +83,13 @@ pub(crate) fn emit_unescape_error( ); } } else { - let printable: Vec = lit.chars().filter(|x| printing(*x)).collect(); + let printable: Vec = lit + .chars() + .filter(|&x| { + unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0 + && !x.is_whitespace() + }) + .collect(); if let [ch] = printable.as_slice() { has_help = true;