From 8455361bdd2a0e422cae684945ad527495480dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Thu, 28 Nov 2019 08:56:01 -0800 Subject: [PATCH] Account for unicode confusables on typoed identifiers When a name can't be matched to any existing identifier, we look for identifiers that have the same "skeleton" for possible suggestions, instead of relying only on Levenshtein distance. --- Cargo.lock | 11 ++++ src/librustc_resolve/Cargo.toml | 1 + src/librustc_resolve/diagnostics.rs | 8 ++- src/libsyntax/Cargo.toml | 1 + src/libsyntax/util/lev_distance.rs | 57 +++++++++++-------- .../suggestions/unicode-confusable-typo.fixed | 11 ++++ .../ui/suggestions/unicode-confusable-typo.rs | 11 ++++ .../unicode-confusable-typo.stderr | 18 ++++++ 8 files changed, 92 insertions(+), 26 deletions(-) create mode 100644 src/test/ui/suggestions/unicode-confusable-typo.fixed create mode 100644 src/test/ui/suggestions/unicode-confusable-typo.rs create mode 100644 src/test/ui/suggestions/unicode-confusable-typo.stderr diff --git a/Cargo.lock b/Cargo.lock index 6e152e96070d3..e7eccc700c7fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3871,6 +3871,7 @@ dependencies = [ "syntax", "syntax_expand", "syntax_pos", + "unicode_skeleton", ] [[package]] @@ -4471,6 +4472,7 @@ dependencies = [ "serialize", "smallvec 1.0.0", "syntax_pos", + "unicode_skeleton", ] [[package]] @@ -5045,6 +5047,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "unicode_skeleton" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66bd74db2c088d393d1fbf83db2cd5663137640f072d128287dd53c882a0f412" +dependencies = [ + "unicode-normalization", +] + [[package]] name = "unstable-book-gen" version = "0.1.0" diff --git a/src/librustc_resolve/Cargo.toml b/src/librustc_resolve/Cargo.toml index 6cce893f8ecdd..431a34dde4b4a 100644 --- a/src/librustc_resolve/Cargo.toml +++ b/src/librustc_resolve/Cargo.toml @@ -23,3 +23,4 @@ rustc_data_structures = { path = "../librustc_data_structures" } rustc_metadata = { path = "../librustc_metadata" } rustc_error_codes = { path = "../librustc_error_codes" } smallvec = { version = "1.0", features = ["union", "may_dangle"] } +unicode_skeleton = "0.1.1" diff --git a/src/librustc_resolve/diagnostics.rs b/src/librustc_resolve/diagnostics.rs index e134b8b92ac2e..d2fc24835e048 100644 --- a/src/librustc_resolve/diagnostics.rs +++ b/src/librustc_resolve/diagnostics.rs @@ -488,9 +488,11 @@ impl<'a> Resolver<'a> { &ident.as_str(), None, ) { - Some(found) if found != ident.name => suggestions - .into_iter() - .find(|suggestion| suggestion.candidate == found), + Some(found) if found != ident.name => suggestions.into_iter() + .find(|suggestion| unicode_skeleton::confusable( + suggestion.candidate.as_str().chars(), + found.as_str().chars(), + )), _ => None, } } diff --git a/src/libsyntax/Cargo.toml b/src/libsyntax/Cargo.toml index dff23076c82e6..b07beb998a0e8 100644 --- a/src/libsyntax/Cargo.toml +++ b/src/libsyntax/Cargo.toml @@ -23,3 +23,4 @@ rustc_lexer = { path = "../librustc_lexer" } rustc_macros = { path = "../librustc_macros" } smallvec = { version = "1.0", features = ["union", "may_dangle"] } rustc_error_codes = { path = "../librustc_error_codes" } +unicode_skeleton = "0.1.1" diff --git a/src/libsyntax/util/lev_distance.rs b/src/libsyntax/util/lev_distance.rs index 4127a8c7fce25..1cf7e7ae212e6 100644 --- a/src/libsyntax/util/lev_distance.rs +++ b/src/libsyntax/util/lev_distance.rs @@ -49,34 +49,45 @@ pub fn find_best_match_for_name<'a, T>(iter_names: T, where T: Iterator { let max_dist = dist.map_or_else(|| cmp::max(lookup.len(), 3) / 3, |d| d); - let (case_insensitive_match, levenstein_match) = iter_names - .filter_map(|&name| { - let dist = lev_distance(lookup, &name.as_str()); - if dist <= max_dist { - Some((name, dist)) - } else { - None - } - }) - // Here we are collecting the next structure: - // (case_insensitive_match, (levenstein_match, levenstein_distance)) - .fold((None, None), |result, (candidate, dist)| { - ( - if candidate.as_str().to_uppercase() == lookup.to_uppercase() { - Some(candidate) + let (case_insensitive_match, levenstein_match, is_confusable) = iter_names + .filter_map(|&name| { + let dist = lev_distance(lookup, &name.as_str()); + let is_confusable = unicode_skeleton::confusable(lookup, name.as_str().chars()); + if dist <= max_dist || is_confusable { + Some((name, dist, is_confusable)) } else { - result.0 - }, - match result.1 { - None => Some((candidate, dist)), - Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }) + None } - ) - }); + }) + // Here we are collecting the next structure: + // (case_insensitive_match, (levenstein_match, levenstein_distance)) + .fold((None, None, None), |result, (candidate, dist, is_confusable)| { + ( + if candidate.as_str().to_uppercase() == lookup.to_uppercase() { + Some(candidate) + } else { + result.0 + }, + match result.1 { + None => Some((candidate, dist)), + Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }), + }, + match (is_confusable, result.2) { + (false, Some((c, d))) => Some((c, d)), + (false, None) => None, + (true, None) => Some((candidate, dist)), + (true, Some((c, d))) => Some(if dist < d { (candidate, dist) } else { (c, d) }), + } + ) + }); if let Some(candidate) = case_insensitive_match { Some(candidate) // exact case insensitive match has a higher priority + } else if let Some((candidate, _)) = is_confusable { + Some(candidate) + } else if let Some((candidate, _)) = levenstein_match { + Some(candidate) } else { - if let Some((candidate, _)) = levenstein_match { Some(candidate) } else { None } + None } } diff --git a/src/test/ui/suggestions/unicode-confusable-typo.fixed b/src/test/ui/suggestions/unicode-confusable-typo.fixed new file mode 100644 index 0000000000000..bb8db97f0b2ce --- /dev/null +++ b/src/test/ui/suggestions/unicode-confusable-typo.fixed @@ -0,0 +1,11 @@ +// run-rustfix +#![feature(non_ascii_idents)] + +struct β„π“Šπ“ˆπ“‰; + +fn main() { + let ΓΌ = β„π“Šπ“ˆπ“‰; + //~^ ERROR cannot find value `Rust` in this scope + let _ = ΓΌ; + //~^ ERROR cannot find value `u` in this scope +} diff --git a/src/test/ui/suggestions/unicode-confusable-typo.rs b/src/test/ui/suggestions/unicode-confusable-typo.rs new file mode 100644 index 0000000000000..c34c12703970b --- /dev/null +++ b/src/test/ui/suggestions/unicode-confusable-typo.rs @@ -0,0 +1,11 @@ +// run-rustfix +#![feature(non_ascii_idents)] + +struct β„π“Šπ“ˆπ“‰; + +fn main() { + let ΓΌ = Rust; + //~^ ERROR cannot find value `Rust` in this scope + let _ = u; + //~^ ERROR cannot find value `u` in this scope +} diff --git a/src/test/ui/suggestions/unicode-confusable-typo.stderr b/src/test/ui/suggestions/unicode-confusable-typo.stderr new file mode 100644 index 0000000000000..829b0c93a8504 --- /dev/null +++ b/src/test/ui/suggestions/unicode-confusable-typo.stderr @@ -0,0 +1,18 @@ +error[E0425]: cannot find value `Rust` in this scope + --> $DIR/unicode-confusable-typo.rs:7:13 + | +LL | struct β„π“Šπ“ˆπ“‰; + | ------------ similarly named unit struct `β„π“Šπ“ˆπ“‰` defined here +... +LL | let ΓΌ = Rust; + | ^^^^ help: a unit struct with a similar name exists: `β„π“Šπ“ˆπ“‰` + +error[E0425]: cannot find value `u` in this scope + --> $DIR/unicode-confusable-typo.rs:9:13 + | +LL | let _ = u; + | ^ help: a local variable with a similar name exists: `ΓΌ` + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0425`.