Skip to content

Commit

Permalink
rustc_errors: use perfect hashing for character replacements
Browse files Browse the repository at this point in the history
  • Loading branch information
GrigorenkoPV committed Aug 6, 2024
1 parent 8c7e0e1 commit 789baed
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 50 deletions.
15 changes: 15 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2712,6 +2712,7 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_macros",
"phf_shared 0.11.2",
]

Expand Down Expand Up @@ -2745,6 +2746,19 @@ dependencies = [
"rand",
]

[[package]]
name = "phf_macros"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
dependencies = [
"phf_generator 0.11.2",
"phf_shared 0.11.2",
"proc-macro2",
"quote",
"syn 2.0.67",
]

[[package]]
name = "phf_shared"
version = "0.10.0"
Expand Down Expand Up @@ -3653,6 +3667,7 @@ version = "0.0.0"
dependencies = [
"annotate-snippets 0.10.2",
"derive_setters",
"phf",
"rustc_ast",
"rustc_ast_pretty",
"rustc_data_structures",
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_errors/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ edition = "2021"
# tidy-alphabetical-start
annotate-snippets = "0.10"
derive_setters = "0.1.6"
phf = { version = "0.11.2", features = ["macros"] }
rustc_ast = { path = "../rustc_ast" }
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
rustc_data_structures = { path = "../rustc_data_structures" }
Expand Down
96 changes: 46 additions & 50 deletions compiler/rustc_errors/src/emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2563,66 +2563,62 @@ fn num_decimal_digits(num: usize) -> usize {
}

// We replace some characters so the CLI output is always consistent and underlines aligned.
// Keep the following list in sync with `rustc_span::char_width`.
// ATTENTION: keep lexicografically sorted so that the binary search will work
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
// tidy-alphabetical-start
const OUTPUT_REPLACEMENTS: phf::Map<char, &'static str> = phf::phf_map![
// In terminals without Unicode support the following will be garbled, but in *all* terminals
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
// support" gate.
('\0', "␀"),
('\u{0001}', "␁"),
('\u{0002}', "␂"),
('\u{0003}', "␃"),
('\u{0004}', "␄"),
('\u{0005}', "␅"),
('\u{0006}', "␆"),
('\u{0007}', "␇"),
('\u{0008}', "␈"),
('\u{0009}', " "), // We do our own tab replacement
('\u{000b}', "␋"),
('\u{000c}', "␌"),
('\u{000d}', "␍"),
('\u{000e}', "␎"),
('\u{000f}', "␏"),
('\u{0010}', "␐"),
('\u{0011}', "␑"),
('\u{0012}', "␒"),
('\u{0013}', "␓"),
('\u{0014}', "␔"),
('\u{0015}', "␕"),
('\u{0016}', "␖"),
('\u{0017}', "␗"),
('\u{0018}', "␘"),
('\u{0019}', "␙"),
('\u{001a}', "␚"),
('\u{001b}', "␛"),
('\u{001c}', "␜"),
('\u{001d}', "␝"),
('\u{001e}', "␞"),
('\u{001f}', "␟"),
('\u{007f}', "␡"),
('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters.
('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently
('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always.
('\u{202d}', "�"),
('\u{202e}', "�"),
('\u{2066}', "�"),
('\u{2067}', "�"),
('\u{2068}', "�"),
('\u{2069}', "�"),
// tidy-alphabetical-end
'\0' => "␀",
'\t' => " ", // We do our own tab replacement
'\r' => "␍",
'\u{0001}' => "␁",
'\u{0002}' => "␂",
'\u{0003}' => "␃",
'\u{0004}' => "␄",
'\u{0005}' => "␅",
'\u{0006}' => "␆",
'\u{0007}' => "␇",
'\u{0008}' => "␈",
'\u{000b}' => "␋",
'\u{000c}' => "␌",
'\u{000e}' => "␎",
'\u{000f}' => "␏",
'\u{0010}' => "␐",
'\u{0011}' => "␑",
'\u{0012}' => "␒",
'\u{0013}' => "␓",
'\u{0014}' => "␔",
'\u{0015}' => "␕",
'\u{0016}' => "␖",
'\u{0017}' => "␗",
'\u{0018}' => "␘",
'\u{0019}' => "␙",
'\u{001a}' => "␚",
'\u{001b}' => "␛",
'\u{001c}' => "␜",
'\u{001d}' => "␝",
'\u{001e}' => "␞",
'\u{001f}' => "␟",
'\u{007f}' => "␡",
'\u{200d}' => "", // Replace ZWJ for consistent terminal output of grapheme clusters.
'\u{202a}' => "�", // The following unicode text flow control characters are inconsistently
'\u{202b}' => "�", // supported across CLIs and can cause confusion due to the bytes on disk
'\u{202c}' => "�", // not corresponding to the visible source code, so we replace them always.
'\u{202d}' => "�",
'\u{202e}' => "�",
'\u{2066}' => "�",
'\u{2067}' => "�",
'\u{2068}' => "�",
'\u{2069}' => "�",
];

fn normalize_whitespace(s: &str) -> String {
// Scan the input string for a character in the ordered table above. If it's present, replace
// it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
// char. At the end, allocate all chars into a string in one operation.
s.chars().fold(String::with_capacity(s.len()), |mut s, c| {
match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) {
Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1),
_ => s.push(c),
match OUTPUT_REPLACEMENTS.get(&c) {
Some(r) => s.push_str(r),
None => s.push(c),
}
s
})
Expand Down
5 changes: 5 additions & 0 deletions src/tools/tidy/src/deps.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,10 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
"parking_lot_core",
"pathdiff",
"perf-event-open-sys",
"phf",
"phf_generator",
"phf_macros",
"phf_shared",
"pin-project-lite",
"polonius-engine",
"portable-atomic", // dependency for platforms doesn't support `AtomicU64` in std
Expand Down Expand Up @@ -386,6 +390,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
"sha2",
"sharded-slab",
"shlex",
"siphasher",
"smallvec",
"snap",
"stable_deref_trait",
Expand Down

0 comments on commit 789baed

Please sign in to comment.