diff --git a/Cargo.lock b/Cargo.lock index 8bd1e5f3308b4..32a6f5fce74fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,7 +74,7 @@ name = "arena" version = "0.0.0" dependencies = [ "rustc_data_structures", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -492,7 +492,7 @@ dependencies = [ "regex-syntax", "semver", "serde", - "smallvec 1.0.0", + "smallvec 1.4.0", "toml", "unicode-normalization", "url 2.1.0", @@ -2428,7 +2428,7 @@ dependencies = [ "cloudabi", "libc", "redox_syscall", - "smallvec 1.0.0", + "smallvec 1.4.0", "winapi 0.3.8", ] @@ -3151,7 +3151,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81dfcfbb0ddfd533abf8c076e3b49d1e5042d1962526a12ce2c66d514b24cca3" dependencies = [ "rustc-ap-rustc_data_structures", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3174,7 +3174,7 @@ dependencies = [ "rustc-ap-rustc_span", "rustc-ap-serialize", "scoped-tls", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3223,7 +3223,7 @@ dependencies = [ "rustc-ap-rustc_session", "rustc-ap-rustc_span", "rustc-ap-serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3249,7 +3249,7 @@ dependencies = [ "rustc-hash", "rustc-rayon", "rustc-rayon-core", - "smallvec 1.0.0", + "smallvec 1.4.0", "stable_deref_trait", "winapi 0.3.8", ] @@ -3291,7 +3291,7 @@ dependencies = [ "rustc-ap-rustc_session", "rustc-ap-rustc_span", "rustc-ap-serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3318,7 +3318,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32220c3e6cdf226f38e4474b747dca15f3106bb680c74f10b299af3f6cdb1663" dependencies = [ "rustc-ap-serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3358,7 +3358,7 @@ dependencies = [ "rustc-ap-rustc_lexer", "rustc-ap-rustc_session", "rustc-ap-rustc_span", - "smallvec 1.0.0", + "smallvec 1.4.0", "unicode-normalization", ] @@ -3423,7 +3423,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "756e8f526ec7906e132188bf25e3c10a6ee42ab77294ecb3b3602647f0508eef" dependencies = [ "indexmap", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3507,7 +3507,7 @@ dependencies = [ "serde", "serde_json", "smallvec 0.6.10", - "smallvec 1.0.0", + "smallvec 1.4.0", "syn 0.15.35", "url 2.1.0", "winapi 0.3.8", @@ -3518,7 +3518,7 @@ name = "rustc_apfloat" version = "0.0.0" dependencies = [ "bitflags", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3533,7 +3533,7 @@ dependencies = [ "rustc_span", "scoped-tls", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3551,7 +3551,7 @@ dependencies = [ "rustc_session", "rustc_span", "rustc_target", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3612,7 +3612,7 @@ dependencies = [ "rustc_session", "rustc_span", "rustc_target", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3641,7 +3641,7 @@ dependencies = [ "rustc_span", "rustc_target", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3694,7 +3694,7 @@ dependencies = [ "rustc-rayon-core", "rustc_index", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", "stable_deref_trait", "winapi 0.3.8", ] @@ -3768,7 +3768,7 @@ dependencies = [ "rustc_session", "rustc_span", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3797,7 +3797,7 @@ dependencies = [ "rustc_span", "rustc_target", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3833,7 +3833,7 @@ name = "rustc_index" version = "0.0.0" dependencies = [ "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3853,7 +3853,7 @@ dependencies = [ "rustc_span", "rustc_target", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -3895,7 +3895,7 @@ dependencies = [ "rustc_ty", "rustc_typeck", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", "tempfile", "winapi 0.3.8", ] @@ -3968,7 +3968,7 @@ dependencies = [ "rustc_span", "rustc_target", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", "stable_deref_trait", "winapi 0.3.8", ] @@ -4000,7 +4000,7 @@ dependencies = [ "rustc_target", "scoped-tls", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4029,7 +4029,7 @@ dependencies = [ "rustc_target", "rustc_trait_selection", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4053,7 +4053,7 @@ dependencies = [ "rustc_target", "rustc_trait_selection", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4134,7 +4134,7 @@ dependencies = [ "rustc_index", "rustc_span", "serialize", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4157,7 +4157,7 @@ dependencies = [ "rustc_middle", "rustc_session", "rustc_span", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4272,7 +4272,7 @@ dependencies = [ "rustc_session", "rustc_span", "rustc_target", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4287,7 +4287,7 @@ dependencies = [ "rustc_middle", "rustc_span", "rustc_trait_selection", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4324,7 +4324,7 @@ dependencies = [ "rustc_span", "rustc_target", "rustc_trait_selection", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4568,7 +4568,7 @@ name = "serialize" version = "0.0.0" dependencies = [ "indexmap", - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -4635,9 +4635,9 @@ checksum = "ab606a9c5e214920bb66c458cd7be8ef094f813f20fe77a54cc7dbfff220d4b7" [[package]] name = "smallvec" -version = "1.0.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ecf3b85f68e8abaa7555aa5abdb1153079387e60b718283d732f03897fcfc86" +checksum = "c7cb5678e1615754284ec264d9bb5b4c27d2018577fd90ac0ceb578591ed5ee4" [[package]] name = "socket2" @@ -5359,11 +5359,11 @@ dependencies = [ [[package]] name = "unicode-normalization" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf" +checksum = "5479532badd04e128284890390c1e876ef7a993d0570b3597ae43dfa1d59afa4" dependencies = [ - "smallvec 1.0.0", + "smallvec 1.4.0", ] [[package]] @@ -5374,10 +5374,11 @@ checksum = "5b2c5c29e805da6817f5af6a627d65adb045cebf05cccd5a3493d6109454391c" [[package]] name = "unicode-security" -version = "0.0.2" +version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c49d35967fa037b881acc34ef717c38c4b5560eba10e3685271b3f530bb19634" +checksum = "a5f9011bbed9c13372bc8df618b55a38138445199caf3b61d432c6859c36dee0" dependencies = [ + "unicode-normalization", "unicode-script", ] diff --git a/src/librustc_expand/proc_macro_server.rs b/src/librustc_expand/proc_macro_server.rs index 5f21ff503d59e..10baffb28ac2d 100644 --- a/src/librustc_expand/proc_macro_server.rs +++ b/src/librustc_expand/proc_macro_server.rs @@ -141,10 +141,10 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec)> SingleQuote => op!('\''), Ident(name, false) if name == kw::DollarCrate => tt!(Ident::dollar_crate()), - Ident(name, is_raw) => tt!(Ident::new(name, is_raw)), + Ident(name, is_raw) => tt!(Ident::new(sess, name, is_raw)), Lifetime(name) => { let ident = ast::Ident::new(name, span).without_first_quote(); - stack.push(tt!(Ident::new(ident.name, false))); + stack.push(tt!(Ident::new(sess, ident.name, false))); tt!(Punct::new('\'', true)) } Literal(lit) => tt!(Literal { lit }), @@ -322,7 +322,7 @@ impl Ident { false } } - fn new(sym: Symbol, is_raw: bool, span: Span) -> Ident { + fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident { let sym = nfc_normalize(&sym.as_str()); let string = sym.as_str(); if !Self::is_valid(&string) { @@ -331,6 +331,7 @@ impl Ident { if is_raw && !sym.can_be_raw() { panic!("`{}` cannot be a raw identifier", string); } + sess.symbol_gallery.insert(sym, span); Ident { sym, is_raw, span } } fn dollar_crate(span: Span) -> Ident { @@ -495,7 +496,7 @@ impl server::Punct for Rustc<'_> { impl server::Ident for Rustc<'_> { fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident { - Ident::new(Symbol::intern(string), is_raw, span) + Ident::new(self.sess, Symbol::intern(string), is_raw, span) } fn span(&mut self, ident: Self::Ident) -> Self::Span { ident.span diff --git a/src/librustc_lint/Cargo.toml b/src/librustc_lint/Cargo.toml index b238a3156fae5..ada6f2a9381dc 100644 --- a/src/librustc_lint/Cargo.toml +++ b/src/librustc_lint/Cargo.toml @@ -10,7 +10,7 @@ path = "lib.rs" [dependencies] log = "0.4" -unicode-security = "0.0.2" +unicode-security = "0.0.3" rustc_middle = { path = "../librustc_middle" } rustc_ast_pretty = { path = "../librustc_ast_pretty" } rustc_attr = { path = "../librustc_attr" } diff --git a/src/librustc_lint/levels.rs b/src/librustc_lint/levels.rs index 7e8e4f59707c1..293dd326d3b08 100644 --- a/src/librustc_lint/levels.rs +++ b/src/librustc_lint/levels.rs @@ -388,6 +388,11 @@ impl<'s> LintLevelsBuilder<'s> { self.cur = push.prev; } + /// Find the lint level for a lint. + pub fn lint_level(&self, lint: &'static Lint) -> (Level, LintSource) { + self.sets.get_lint_level(lint, self.cur, None, self.sess) + } + /// Used to emit a lint-related diagnostic based on the current state of /// this lint context. pub fn struct_lint( @@ -396,7 +401,7 @@ impl<'s> LintLevelsBuilder<'s> { span: Option, decorate: impl for<'a> FnOnce(LintDiagnosticBuilder<'a>), ) { - let (level, src) = self.sets.get_lint_level(lint, self.cur, None, self.sess); + let (level, src) = self.lint_level(lint); struct_lint_level(self.sess, lint, level, src, span, decorate) } diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs index 470fac2675bac..94fc1a228df03 100644 --- a/src/librustc_lint/non_ascii_idents.rs +++ b/src/librustc_lint/non_ascii_idents.rs @@ -1,5 +1,9 @@ use crate::{EarlyContext, EarlyLintPass, LintContext}; use rustc_ast::ast; +use rustc_data_structures::fx::FxHashMap; +use rustc_span::symbol::SymbolStr; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; declare_lint! { pub NON_ASCII_IDENTS, @@ -13,9 +17,144 @@ declare_lint! { "detects uncommon Unicode codepoints in identifiers" } -declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS]); +// FIXME: Change this to warn. +declare_lint! { + pub CONFUSABLE_IDENTS, + Allow, + "detects visually confusable pairs between identifiers" +} + +declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]); + +enum CowBoxSymStr { + Interned(SymbolStr), + Owned(Box), +} + +impl Deref for CowBoxSymStr { + type Target = str; + + fn deref(&self) -> &str { + match self { + CowBoxSymStr::Interned(interned) => interned, + CowBoxSymStr::Owned(ref owned) => owned, + } + } +} + +impl Hash for CowBoxSymStr { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(&**self, state) + } +} + +impl PartialEq for CowBoxSymStr { + #[inline] + fn eq(&self, other: &CowBoxSymStr) -> bool { + PartialEq::eq(&**self, &**other) + } +} + +impl Eq for CowBoxSymStr {} + +fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr { + use std::mem::swap; + use unicode_security::confusable_detection::skeleton; + buffer.clear(); + buffer.extend(skeleton(&symbol_str)); + if symbol_str == *buffer { + CowBoxSymStr::Interned(symbol_str) + } else { + let mut owned = String::new(); + swap(buffer, &mut owned); + CowBoxSymStr::Owned(owned.into_boxed_str()) + } +} + +fn is_in_ascii_confusable_closure(c: char) -> bool { + // FIXME: move this table to `unicode_security` crate. + // data here corresponds to Unicode 13. + const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)]; + let c = c as u64; + for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE { + if c >= range_start && c <= range_end { + return true; + } + } + false +} + +fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool { + // FIXME: move this table to `unicode_security` crate. + // data here corresponds to Unicode 13. + const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[ + 0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba, + 0x2080, + ]; + let c = c as u64; + for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST { + if c == item { + return true; + } + } + false +} impl EarlyLintPass for NonAsciiIdents { + fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) { + use rustc_session::lint::Level; + if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow { + return; + } + let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); + let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len()); + let mut in_fast_path = true; + for (symbol, sp) in symbols.iter() { + // fast path + let symbol_str = symbol.as_str(); + if !symbol_str.chars().all(is_in_ascii_confusable_closure) { + // fallback to slow path. + symbol_strs_and_spans.clear(); + in_fast_path = false; + break; + } + if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) { + symbol_strs_and_spans.push((symbol_str, *sp)); + } + } + if !in_fast_path { + // slow path + for (symbol, sp) in symbols.iter() { + let symbol_str = symbol.as_str(); + symbol_strs_and_spans.push((symbol_str, *sp)); + } + } + drop(symbols); + symbol_strs_and_spans.sort_by_key(|x| x.0.clone()); + let mut skeleton_map = + FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default()); + let mut str_buf = String::new(); + for (symbol_str, sp) in symbol_strs_and_spans { + let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf); + skeleton_map + .entry(skeleton) + .and_modify(|(existing_symbolstr, existing_span)| { + cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| { + lint.build(&format!( + "identifier pair considered confusable between `{}` and `{}`", + existing_symbolstr, symbol_str + )) + .span_label( + *existing_span, + "this is where the previous identifier occurred", + ) + .emit(); + }); + }) + .or_insert((symbol_str, sp)); + } + } fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: ast::Ident) { use unicode_security::GeneralSecurityProfile; let name_str = ident.name.as_str(); diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs index 96321ef2145e5..f676a34a1d12b 100644 --- a/src/librustc_parse/lexer/mod.rs +++ b/src/librustc_parse/lexer/mod.rs @@ -222,8 +222,9 @@ impl<'a> StringReader<'a> { ident_start = ident_start + BytePos(2); } let sym = nfc_normalize(self.str_from(ident_start)); + let span = self.mk_sp(start, self.pos); + self.sess.symbol_gallery.insert(sym, span); if is_raw_ident { - let span = self.mk_sp(start, self.pos); if !sym.can_be_raw() { self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); } diff --git a/src/librustc_session/parse.rs b/src/librustc_session/parse.rs index 387d35422c43e..69d3e99b7458e 100644 --- a/src/librustc_session/parse.rs +++ b/src/librustc_session/parse.rs @@ -60,6 +60,20 @@ impl GatedSpans { } } +#[derive(Default)] +pub struct SymbolGallery { + /// All symbols occurred and their first occurrance span. + pub symbols: Lock>, +} + +impl SymbolGallery { + /// Insert a symbol and its span into symbol gallery. + /// If the symbol has occurred before, ignore the new occurance. + pub fn insert(&self, symbol: Symbol, span: Span) { + self.symbols.lock().entry(symbol).or_insert(span); + } +} + /// Construct a diagnostic for a language feature error due to the given `span`. /// The `feature`'s `Symbol` is the one you used in `active.rs` and `rustc_span::symbols`. pub fn feature_err<'a>( @@ -118,6 +132,7 @@ pub struct ParseSess { pub ambiguous_block_expr_parse: Lock>, pub injected_crate_name: Once, pub gated_spans: GatedSpans, + pub symbol_gallery: SymbolGallery, /// The parser has reached `Eof` due to an unclosed brace. Used to silence unnecessary errors. pub reached_eof: Lock, } @@ -143,6 +158,7 @@ impl ParseSess { ambiguous_block_expr_parse: Lock::new(FxHashMap::default()), injected_crate_name: Once::new(), gated_spans: GatedSpans::default(), + symbol_gallery: SymbolGallery::default(), reached_eof: Lock::new(false), } } diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs new file mode 100644 index 0000000000000..12093837d2630 --- /dev/null +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs @@ -0,0 +1,9 @@ +#![feature(non_ascii_idents)] +#![deny(confusable_idents)] +#![allow(uncommon_codepoints, non_upper_case_globals)] + +const s: usize = 42; //~ ERROR identifier pair considered confusable + +fn main() { + let s = "rust"; +} diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr new file mode 100644 index 0000000000000..40ee18acb3cd4 --- /dev/null +++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr @@ -0,0 +1,17 @@ +error: identifier pair considered confusable between `s` and `s` + --> $DIR/lint-confusable-idents.rs:5:7 + | +LL | const s: usize = 42; + | ^^ +... +LL | let s = "rust"; + | - this is where the previous identifier occurred + | +note: the lint level is defined here + --> $DIR/lint-confusable-idents.rs:2:9 + | +LL | #![deny(confusable_idents)] + | ^^^^^^^^^^^^^^^^^ + +error: aborting due to previous error +