From 515ca9312393bd00af0e867fceee9aff9b6e565d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Fri, 2 Oct 2020 11:54:31 +0200 Subject: [PATCH 1/4] Look for soft hyphens as well --- clippy_lints/src/unicode.rs | 14 +++++++------- tests/ui/unicode.rs | 2 ++ tests/ui/unicode.stderr | 14 ++++++++++---- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/clippy_lints/src/unicode.rs b/clippy_lints/src/unicode.rs index d8c57f0e7ae7..d3fe60042a87 100644 --- a/clippy_lints/src/unicode.rs +++ b/clippy_lints/src/unicode.rs @@ -8,18 +8,18 @@ use rustc_span::source_map::Span; use unicode_normalization::UnicodeNormalization; declare_clippy_lint! { - /// **What it does:** Checks for the Unicode zero-width space in the code. + /// **What it does:** Checks for invisible Unicode characters in the code. /// /// **Why is this bad?** Having an invisible character in the code makes for all /// sorts of April fools, but otherwise is very much frowned upon. /// /// **Known problems:** None. /// - /// **Example:** You don't see it, but there may be a zero-width space - /// somewhere in this text. + /// **Example:** You don't see it, but there may be a zero-width space or soft hyphen + /// some­where in this text. pub ZERO_WIDTH_SPACE, correctness, - "using a zero-width space in a string literal, which is confusing" + "using an invisible character in a string literal, which is confusing" } declare_clippy_lint! { @@ -91,14 +91,14 @@ fn escape>(s: T) -> String { fn check_str(cx: &LateContext<'_>, span: Span, id: HirId) { let string = snippet(cx, span, ""); - if string.contains('\u{200B}') { + if let Some(invisible) = string.chars().find(|c| ['\u{200B}', '\u{ad}'].contains(&c)) { span_lint_and_sugg( cx, ZERO_WIDTH_SPACE, span, - "zero-width space detected", + &format!("invisible character detected: {:?}", invisible), "consider replacing the string with", - string.replace("\u{200B}", "\\u{200B}"), + string.replace("\u{200B}", "\\u{200B}").replace("\u{ad}", "\\u{AD}"), Applicability::MachineApplicable, ); } diff --git a/tests/ui/unicode.rs b/tests/ui/unicode.rs index 27db9594f3b3..f3fd1c57da63 100644 --- a/tests/ui/unicode.rs +++ b/tests/ui/unicode.rs @@ -2,6 +2,8 @@ fn zero() { print!("Here >​< is a ZWS, and ​another"); print!("This\u{200B}is\u{200B}fine"); + print!("Here >­< is a SHY, and ­another"); + print!("This\u{ad}is\u{ad}fine"); } #[warn(clippy::unicode_not_nfc)] diff --git a/tests/ui/unicode.stderr b/tests/ui/unicode.stderr index 4575a132e5b2..b0445b070fdd 100644 --- a/tests/ui/unicode.stderr +++ b/tests/ui/unicode.stderr @@ -1,4 +1,4 @@ -error: zero-width space detected +error: invisible character detected: '/u{200b}' --> $DIR/unicode.rs:3:12 | LL | print!("Here >​< is a ZWS, and ​another"); @@ -6,8 +6,14 @@ LL | print!("Here >​< is a ZWS, and ​another"); | = note: `-D clippy::zero-width-space` implied by `-D warnings` +error: invisible character detected: '/u{ad}' + --> $DIR/unicode.rs:5:12 + | +LL | print!("Here >­< is a SHY, and ­another"); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{AD}< is a SHY, and /u{AD}another"` + error: non-NFC Unicode sequence detected - --> $DIR/unicode.rs:9:12 + --> $DIR/unicode.rs:11:12 | LL | print!("̀àh?"); | ^^^^^ help: consider replacing the string with: `"̀àh?"` @@ -15,12 +21,12 @@ LL | print!("̀àh?"); = note: `-D clippy::unicode-not-nfc` implied by `-D warnings` error: literal non-ASCII character detected - --> $DIR/unicode.rs:15:12 + --> $DIR/unicode.rs:17:12 | LL | print!("Üben!"); | ^^^^^^^ help: consider replacing the string with: `"/u{dc}ben!"` | = note: `-D clippy::non-ascii-literal` implied by `-D warnings` -error: aborting due to 3 previous errors +error: aborting due to 4 previous errors From 45f25f82fe652e073445e6f1601d25a7a292d01c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Fri, 2 Oct 2020 12:02:54 +0200 Subject: [PATCH 2/4] Run update_lints --- src/lintlist/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lintlist/mod.rs b/src/lintlist/mod.rs index 16ceb6179654..3654dbc6124a 100644 --- a/src/lintlist/mod.rs +++ b/src/lintlist/mod.rs @@ -2813,7 +2813,7 @@ pub static ref ALL_LINTS: Vec = vec![ Lint { name: "zero_width_space", group: "correctness", - desc: "using a zero-width space in a string literal, which is confusing", + desc: "using an invisible character in a string literal, which is confusing", deprecation: None, module: "unicode", }, From 998bd3b6b4d168099346e460ae42897dc3667882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Sat, 3 Oct 2020 00:03:33 +0200 Subject: [PATCH 3/4] Rename lint to invisible_characters --- CHANGELOG.md | 2 +- clippy_lints/src/lib.rs | 7 ++++--- clippy_lints/src/unicode.rs | 10 +++++----- src/lintlist/mod.rs | 14 +++++++------- tests/ui/unicode.rs | 2 +- tests/ui/unicode.stderr | 6 +++--- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0de6f4b4235f..617bf32f4639 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1647,6 +1647,7 @@ Released 2018-09-13 [`invalid_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_ref [`invalid_regex`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_regex [`invalid_upcast_comparisons`]: https://rust-lang.github.io/rust-clippy/master/index.html#invalid_upcast_comparisons +[`invisible_characters`]: https://rust-lang.github.io/rust-clippy/master/index.html#invisible_characters [`items_after_statements`]: https://rust-lang.github.io/rust-clippy/master/index.html#items_after_statements [`iter_cloned_collect`]: https://rust-lang.github.io/rust-clippy/master/index.html#iter_cloned_collect [`iter_next_loop`]: https://rust-lang.github.io/rust-clippy/master/index.html#iter_next_loop @@ -1922,6 +1923,5 @@ Released 2018-09-13 [`zero_divided_by_zero`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_divided_by_zero [`zero_prefixed_literal`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_prefixed_literal [`zero_ptr`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_ptr -[`zero_width_space`]: https://rust-lang.github.io/rust-clippy/master/index.html#zero_width_space [`zst_offset`]: https://rust-lang.github.io/rust-clippy/master/index.html#zst_offset diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs index 10da59c7a7a0..91244ec2724b 100644 --- a/clippy_lints/src/lib.rs +++ b/clippy_lints/src/lib.rs @@ -854,9 +854,9 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: &types::UNIT_CMP, &types::UNNECESSARY_CAST, &types::VEC_BOX, + &unicode::INVISIBLE_CHARACTERS, &unicode::NON_ASCII_LITERAL, &unicode::UNICODE_NOT_NFC, - &unicode::ZERO_WIDTH_SPACE, &unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD, &unnamed_address::FN_ADDRESS_COMPARISONS, &unnamed_address::VTABLE_ADDRESS_COMPARISONS, @@ -1511,7 +1511,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: LintId::of(&types::UNIT_CMP), LintId::of(&types::UNNECESSARY_CAST), LintId::of(&types::VEC_BOX), - LintId::of(&unicode::ZERO_WIDTH_SPACE), + LintId::of(&unicode::INVISIBLE_CHARACTERS), LintId::of(&unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD), LintId::of(&unnamed_address::FN_ADDRESS_COMPARISONS), LintId::of(&unnamed_address::VTABLE_ADDRESS_COMPARISONS), @@ -1779,7 +1779,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: LintId::of(&types::ABSURD_EXTREME_COMPARISONS), LintId::of(&types::CAST_REF_TO_MUT), LintId::of(&types::UNIT_CMP), - LintId::of(&unicode::ZERO_WIDTH_SPACE), + LintId::of(&unicode::INVISIBLE_CHARACTERS), LintId::of(&unit_return_expecting_ord::UNIT_RETURN_EXPECTING_ORD), LintId::of(&unnamed_address::FN_ADDRESS_COMPARISONS), LintId::of(&unnamed_address::VTABLE_ADDRESS_COMPARISONS), @@ -1910,6 +1910,7 @@ pub fn register_renamed(ls: &mut rustc_lint::LintStore) { ls.register_renamed("clippy::for_loop_over_option", "clippy::for_loops_over_fallibles"); ls.register_renamed("clippy::for_loop_over_result", "clippy::for_loops_over_fallibles"); ls.register_renamed("clippy::identity_conversion", "clippy::useless_conversion"); + ls.register_renamed("clippy::zero_width_space", "clippy::invisible_characters"); } // only exists to let the dogfood integration test works. diff --git a/clippy_lints/src/unicode.rs b/clippy_lints/src/unicode.rs index d3fe60042a87..d6c8d317dc2f 100644 --- a/clippy_lints/src/unicode.rs +++ b/clippy_lints/src/unicode.rs @@ -17,7 +17,7 @@ declare_clippy_lint! { /// /// **Example:** You don't see it, but there may be a zero-width space or soft hyphen /// some­where in this text. - pub ZERO_WIDTH_SPACE, + pub INVISIBLE_CHARACTERS, correctness, "using an invisible character in a string literal, which is confusing" } @@ -63,7 +63,7 @@ declare_clippy_lint! { "using a Unicode literal not in NFC normal form (see [Unicode tr15](http://www.unicode.org/reports/tr15/) for further information)" } -declare_lint_pass!(Unicode => [ZERO_WIDTH_SPACE, NON_ASCII_LITERAL, UNICODE_NOT_NFC]); +declare_lint_pass!(Unicode => [INVISIBLE_CHARACTERS, NON_ASCII_LITERAL, UNICODE_NOT_NFC]); impl LateLintPass<'_> for Unicode { fn check_expr(&mut self, cx: &LateContext<'_>, expr: &'_ Expr<'_>) { @@ -91,12 +91,12 @@ fn escape>(s: T) -> String { fn check_str(cx: &LateContext<'_>, span: Span, id: HirId) { let string = snippet(cx, span, ""); - if let Some(invisible) = string.chars().find(|c| ['\u{200B}', '\u{ad}'].contains(&c)) { + if string.chars().any(|c| ['\u{200B}', '\u{ad}'].contains(&c)) { span_lint_and_sugg( cx, - ZERO_WIDTH_SPACE, + INVISIBLE_CHARACTERS, span, - &format!("invisible character detected: {:?}", invisible), + "invisible character detected", "consider replacing the string with", string.replace("\u{200B}", "\\u{200B}").replace("\u{ad}", "\\u{AD}"), Applicability::MachineApplicable, diff --git a/src/lintlist/mod.rs b/src/lintlist/mod.rs index 3654dbc6124a..e7df733d3a27 100644 --- a/src/lintlist/mod.rs +++ b/src/lintlist/mod.rs @@ -969,6 +969,13 @@ pub static ref ALL_LINTS: Vec = vec![ deprecation: None, module: "types", }, + Lint { + name: "invisible_characters", + group: "correctness", + desc: "using an invisible character in a string literal, which is confusing", + deprecation: None, + module: "unicode", + }, Lint { name: "items_after_statements", group: "pedantic", @@ -2810,13 +2817,6 @@ pub static ref ALL_LINTS: Vec = vec![ deprecation: None, module: "misc", }, - Lint { - name: "zero_width_space", - group: "correctness", - desc: "using an invisible character in a string literal, which is confusing", - deprecation: None, - module: "unicode", - }, Lint { name: "zst_offset", group: "correctness", diff --git a/tests/ui/unicode.rs b/tests/ui/unicode.rs index f3fd1c57da63..b6944e048593 100644 --- a/tests/ui/unicode.rs +++ b/tests/ui/unicode.rs @@ -1,4 +1,4 @@ -#[warn(clippy::zero_width_space)] +#[warn(clippy::invisible_characters)] fn zero() { print!("Here >​< is a ZWS, and ​another"); print!("This\u{200B}is\u{200B}fine"); diff --git a/tests/ui/unicode.stderr b/tests/ui/unicode.stderr index b0445b070fdd..595d80ea2792 100644 --- a/tests/ui/unicode.stderr +++ b/tests/ui/unicode.stderr @@ -1,12 +1,12 @@ -error: invisible character detected: '/u{200b}' +error: invisible character detected --> $DIR/unicode.rs:3:12 | LL | print!("Here >​< is a ZWS, and ​another"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{200B}< is a ZWS, and /u{200B}another"` | - = note: `-D clippy::zero-width-space` implied by `-D warnings` + = note: `-D clippy::invisible-characters` implied by `-D warnings` -error: invisible character detected: '/u{ad}' +error: invisible character detected --> $DIR/unicode.rs:5:12 | LL | print!("Here >­< is a SHY, and ­another"); From 572e4c4837e5f955cdc3751b9ad63f0bfb86beac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Sat, 3 Oct 2020 00:07:56 +0200 Subject: [PATCH 4/4] Add WJ --- clippy_lints/src/unicode.rs | 7 +++++-- tests/ui/unicode.rs | 2 ++ tests/ui/unicode.stderr | 12 +++++++++--- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/clippy_lints/src/unicode.rs b/clippy_lints/src/unicode.rs index d6c8d317dc2f..93d59cc7fcd1 100644 --- a/clippy_lints/src/unicode.rs +++ b/clippy_lints/src/unicode.rs @@ -91,14 +91,17 @@ fn escape>(s: T) -> String { fn check_str(cx: &LateContext<'_>, span: Span, id: HirId) { let string = snippet(cx, span, ""); - if string.chars().any(|c| ['\u{200B}', '\u{ad}'].contains(&c)) { + if string.chars().any(|c| ['\u{200B}', '\u{ad}', '\u{2060}'].contains(&c)) { span_lint_and_sugg( cx, INVISIBLE_CHARACTERS, span, "invisible character detected", "consider replacing the string with", - string.replace("\u{200B}", "\\u{200B}").replace("\u{ad}", "\\u{AD}"), + string + .replace("\u{200B}", "\\u{200B}") + .replace("\u{ad}", "\\u{AD}") + .replace("\u{2060}", "\\u{2060}"), Applicability::MachineApplicable, ); } diff --git a/tests/ui/unicode.rs b/tests/ui/unicode.rs index b6944e048593..1f596c312fe3 100644 --- a/tests/ui/unicode.rs +++ b/tests/ui/unicode.rs @@ -4,6 +4,8 @@ fn zero() { print!("This\u{200B}is\u{200B}fine"); print!("Here >­< is a SHY, and ­another"); print!("This\u{ad}is\u{ad}fine"); + print!("Here >⁠< is a WJ, and ⁠another"); + print!("This\u{2060}is\u{2060}fine"); } #[warn(clippy::unicode_not_nfc)] diff --git a/tests/ui/unicode.stderr b/tests/ui/unicode.stderr index 595d80ea2792..3fca463c620b 100644 --- a/tests/ui/unicode.stderr +++ b/tests/ui/unicode.stderr @@ -12,8 +12,14 @@ error: invisible character detected LL | print!("Here >­< is a SHY, and ­another"); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{AD}< is a SHY, and /u{AD}another"` +error: invisible character detected + --> $DIR/unicode.rs:7:12 + | +LL | print!("Here >⁠< is a WJ, and ⁠another"); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider replacing the string with: `"Here >/u{2060}< is a WJ, and /u{2060}another"` + error: non-NFC Unicode sequence detected - --> $DIR/unicode.rs:11:12 + --> $DIR/unicode.rs:13:12 | LL | print!("̀àh?"); | ^^^^^ help: consider replacing the string with: `"̀àh?"` @@ -21,12 +27,12 @@ LL | print!("̀àh?"); = note: `-D clippy::unicode-not-nfc` implied by `-D warnings` error: literal non-ASCII character detected - --> $DIR/unicode.rs:17:12 + --> $DIR/unicode.rs:19:12 | LL | print!("Üben!"); | ^^^^^^^ help: consider replacing the string with: `"/u{dc}ben!"` | = note: `-D clippy::non-ascii-literal` implied by `-D warnings` -error: aborting due to 4 previous errors +error: aborting due to 5 previous errors