Merge pull request #146 from birkenfeld/unicode

unicode: add lint against non-ascii chars in literals (Allow by default)
rust-lang · Aug 12, 2015 · dbd396d · dbd396d
2 parents 79bf774 + 3044d3d
commit dbd396d
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 19 deletions.
diff --git a/src/lib.rs b/src/lib.rs
@@ -78,6 +78,7 @@ pub fn plugin_registrar(reg: &mut Registry) {
                                            attrs::INLINE_ALWAYS,
                                            collapsible_if::COLLAPSIBLE_IF,
                                            unicode::ZERO_WIDTH_SPACE,
+                                           unicode::NON_ASCII_LITERAL,
                                            strings::STRING_ADD_ASSIGN,
                                            returns::NEEDLESS_RETURN,
                                            misc::MODULO_ONE,

diff --git a/src/unicode.rs b/src/unicode.rs
@@ -4,13 +4,14 @@ use syntax::codemap::{BytePos, Span};
 use utils::span_lint;
 
 declare_lint!{ pub ZERO_WIDTH_SPACE, Deny, "Zero-width space is confusing" }
+declare_lint!{ pub NON_ASCII_LITERAL, Allow, "Lint literal non-ASCII chars in literals" }
 
 #[derive(Copy, Clone)]
 pub struct Unicode;
 
 impl LintPass for Unicode {
     fn get_lints(&self) -> LintArray {
-        lint_array!(ZERO_WIDTH_SPACE)
+        lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL)
     }
 
     fn check_expr(&mut self, cx: &Context, expr: &Expr) {
@@ -23,24 +24,21 @@ impl LintPass for Unicode {
 }
 
 fn check_str(cx: &Context, string: &str, span: Span) {
-    let mut start: Option<usize> = None;
     for (i, c) in string.char_indices() {
         if c == '\u{200B}' {
-            if start.is_none() { start = Some(i); }
-        } else {
-            lint_zero_width(cx, span, start);
-            start = None;
+            str_pos_lint(cx, ZERO_WIDTH_SPACE, span, i,
+                         "zero-width space detected. Consider using `\\u{200B}`.");
+        }
+        if c as u32 > 0x7F {
+            str_pos_lint(cx, NON_ASCII_LITERAL, span, i, &format!(
+                "literal non-ASCII character detected. Consider using `\\u{{{:X}}}`.", c as u32));
         }
     }
-    lint_zero_width(cx, span, start);
 }
 
-fn lint_zero_width(cx: &Context, span: Span, start: Option<usize>) {
-    start.map(|index| {
-        span_lint(cx, ZERO_WIDTH_SPACE, Span {
-            lo: span.lo + BytePos(index as u32),
-            hi: span.lo + BytePos(index as u32),
-            expn_id: span.expn_id,
-        }, "zero-width space detected. Consider using `\\u{200B}`.")
-    });
+fn str_pos_lint(cx: &Context, lint: &'static Lint, span: Span, index: usize, msg: &str) {
+    span_lint(cx, lint, Span { lo: span.lo + BytePos((1 + index) as u32),
+                               hi: span.lo + BytePos((1 + index) as u32),
+                               expn_id: span.expn_id }, msg);
+
 }
diff --git a/tests/compile-fail/unicode.rs b/tests/compile-fail/unicode.rs
@@ -4,18 +4,18 @@
 #[deny(zero_width_space)]
 fn zero() {
     print!("Here >< is a ZWS, and another");
-                            //~^ ERROR zero-width space detected. Consider using `\u{200B}`
-                              //~^^ ERROR zero-width space detected. Consider using `\u{200B}`
+               //~^ ERROR zero-width space detected. Consider using `\u{200B}`
+                            //~^^ ERROR zero-width space detected. Consider using `\u{200B}`
 }
 
 //#[deny(unicode_canon)]
 fn canon() {
     print!("̀ah?"); //not yet ~ERROR non-canonical unicode sequence detected. Consider using à
 }
 
-//#[deny(ascii_only)]
+#[deny(non_ascii_literal)]
 fn uni() {
-    println!("Üben!"); //not yet ~ERROR Unicode literal detected. Consider using \u{FC}
+    print!("Üben!"); //~ERROR literal non-ASCII character detected. Consider using `\u{DC}`
 }
 
 fn main() {