Skip to content

Commit

Permalink
Rollup merge of #88795 - FabianWolff:issue-88684, r=wesleywiser
Browse files Browse the repository at this point in the history
Print a note if a character literal contains a variation selector

Fixes #88684.
  • Loading branch information
the8472 authored Sep 21, 2021
2 parents 840acd3 + 0d8245b commit c2cdba4
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 14 deletions.
61 changes: 47 additions & 14 deletions compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use std::iter::once;
use std::ops::Range;

use rustc_errors::{Applicability, Handler};
use rustc_errors::{pluralize, Applicability, Handler};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span};

Expand Down Expand Up @@ -49,24 +49,57 @@ pub(crate) fn emit_unescape_error(
.emit();
}
EscapeError::MoreThanOneChar => {
let (prefix, msg) = if mode.is_bytes() {
("b", "if you meant to write a byte string literal, use double quotes")
} else {
("", "if you meant to write a `str` literal, use double quotes")
};
use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};

handler
.struct_span_err(
span_with_quotes,
"character literal may only contain one codepoint",
)
.span_suggestion(
let mut has_help = false;
let mut handler = handler.struct_span_err(
span_with_quotes,
"character literal may only contain one codepoint",
);

if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
let escaped_marks =
lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
handler.span_note(
span,
&format!(
"this `{}` is followed by the combining mark{} `{}`",
lit.chars().next().unwrap(),
pluralize!(escaped_marks.len()),
escaped_marks.join(""),
),
);
let normalized = lit.nfc().to_string();
if normalized.chars().count() == 1 {
has_help = true;
handler.span_suggestion(
span,
&format!(
"consider using the normalized form `{}` of this character",
normalized.chars().next().unwrap().escape_default()
),
normalized,
Applicability::MachineApplicable,
);
}
}

if !has_help {
let (prefix, msg) = if mode.is_bytes() {
("b", "if you meant to write a byte string literal, use double quotes")
} else {
("", "if you meant to write a `str` literal, use double quotes")
};

handler.span_suggestion(
span_with_quotes,
msg,
format!("{}\"{}\"", prefix, lit),
Applicability::MachineApplicable,
)
.emit();
);
}

handler.emit();
}
EscapeError::EscapeOnlyChar => {
let (c, char_span) = last_char();
Expand Down
21 changes: 21 additions & 0 deletions src/test/ui/parser/unicode-character-literal.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Regression test for #88684: Improve diagnostics for combining marks
// in character literals.

// run-rustfix

fn main() {
let _spade = "♠️";
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _s = "ṩ̂̊";
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _a = 'Å';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
//~| HELP: consider using the normalized form `\u{c5}` of this character
}
21 changes: 21 additions & 0 deletions src/test/ui/parser/unicode-character-literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Regression test for #88684: Improve diagnostics for combining marks
// in character literals.

// run-rustfix

fn main() {
let _spade = '♠️';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _s = 'ṩ̂̊';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _a = '';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
//~| HELP: consider using the normalized form `\u{c5}` of this character
}
48 changes: 48 additions & 0 deletions src/test/ui/parser/unicode-character-literal.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:7:18
|
LL | let _spade = '♠️';
| ^^^
|
note: this `♠` is followed by the combining mark `\u{fe0f}`
--> $DIR/unicode-character-literal.rs:7:19
|
LL | let _spade = '♠️';
| ^
help: if you meant to write a `str` literal, use double quotes
|
LL | let _spade = "♠️";
| ~~~

error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:12:14
|
LL | let _s = 'ṩ̂̊';
| ^^^
|
note: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
--> $DIR/unicode-character-literal.rs:12:15
|
LL | let _s = 'ṩ̂̊';
| ^
help: if you meant to write a `str` literal, use double quotes
|
LL | let _s = "ṩ̂̊";
| ~~~

error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:17:14
|
LL | let _a = 'Å';
| ^-^
| |
| help: consider using the normalized form `\u{c5}` of this character: `Å`
|
note: this `A` is followed by the combining mark `\u{30a}`
--> $DIR/unicode-character-literal.rs:17:15
|
LL | let _a = 'Å';
| ^

error: aborting due to 3 previous errors

0 comments on commit c2cdba4

Please sign in to comment.