Skip to content

Commit

Permalink
Merge pull request #146 from birkenfeld/unicode
Browse files Browse the repository at this point in the history
unicode: add lint against non-ascii chars in literals (Allow by default)
  • Loading branch information
Manishearth committed Aug 12, 2015
2 parents 79bf774 + 3044d3d commit dbd396d
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 19 deletions.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ pub fn plugin_registrar(reg: &mut Registry) {
attrs::INLINE_ALWAYS,
collapsible_if::COLLAPSIBLE_IF,
unicode::ZERO_WIDTH_SPACE,
unicode::NON_ASCII_LITERAL,
strings::STRING_ADD_ASSIGN,
returns::NEEDLESS_RETURN,
misc::MODULO_ONE,
Expand Down
28 changes: 13 additions & 15 deletions src/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ use syntax::codemap::{BytePos, Span};
use utils::span_lint;

declare_lint!{ pub ZERO_WIDTH_SPACE, Deny, "Zero-width space is confusing" }
declare_lint!{ pub NON_ASCII_LITERAL, Allow, "Lint literal non-ASCII chars in literals" }

#[derive(Copy, Clone)]
pub struct Unicode;

impl LintPass for Unicode {
fn get_lints(&self) -> LintArray {
lint_array!(ZERO_WIDTH_SPACE)
lint_array!(ZERO_WIDTH_SPACE, NON_ASCII_LITERAL)
}

fn check_expr(&mut self, cx: &Context, expr: &Expr) {
Expand All @@ -23,24 +24,21 @@ impl LintPass for Unicode {
}

fn check_str(cx: &Context, string: &str, span: Span) {
let mut start: Option<usize> = None;
for (i, c) in string.char_indices() {
if c == '\u{200B}' {
if start.is_none() { start = Some(i); }
} else {
lint_zero_width(cx, span, start);
start = None;
str_pos_lint(cx, ZERO_WIDTH_SPACE, span, i,
"zero-width space detected. Consider using `\\u{200B}`.");
}
if c as u32 > 0x7F {
str_pos_lint(cx, NON_ASCII_LITERAL, span, i, &format!(
"literal non-ASCII character detected. Consider using `\\u{{{:X}}}`.", c as u32));
}
}
lint_zero_width(cx, span, start);
}

fn lint_zero_width(cx: &Context, span: Span, start: Option<usize>) {
start.map(|index| {
span_lint(cx, ZERO_WIDTH_SPACE, Span {
lo: span.lo + BytePos(index as u32),
hi: span.lo + BytePos(index as u32),
expn_id: span.expn_id,
}, "zero-width space detected. Consider using `\\u{200B}`.")
});
fn str_pos_lint(cx: &Context, lint: &'static Lint, span: Span, index: usize, msg: &str) {
span_lint(cx, lint, Span { lo: span.lo + BytePos((1 + index) as u32),
hi: span.lo + BytePos((1 + index) as u32),
expn_id: span.expn_id }, msg);

}
8 changes: 4 additions & 4 deletions tests/compile-fail/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
#[deny(zero_width_space)]
fn zero() {
print!("Here >​< is a ZWS, and ​another");
//~^ ERROR zero-width space detected. Consider using `\u{200B}`
//~^^ ERROR zero-width space detected. Consider using `\u{200B}`
//~^ ERROR zero-width space detected. Consider using `\u{200B}`
//~^^ ERROR zero-width space detected. Consider using `\u{200B}`
}

//#[deny(unicode_canon)]
fn canon() {
print!("̀ah?"); //not yet ~ERROR non-canonical unicode sequence detected. Consider using à
}

//#[deny(ascii_only)]
#[deny(non_ascii_literal)]
fn uni() {
println!("Üben!"); //not yet ~ERROR Unicode literal detected. Consider using \u{FC}
print!("Üben!"); //~ERROR literal non-ASCII character detected. Consider using `\u{DC}`
}

fn main() {
Expand Down

0 comments on commit dbd396d

Please sign in to comment.