diff --git a/clippy_lints/src/tabs_in_doc_comments.rs b/clippy_lints/src/tabs_in_doc_comments.rs index a0492a88f912a..e2c144709f5b7 100644 --- a/clippy_lints/src/tabs_in_doc_comments.rs +++ b/clippy_lints/src/tabs_in_doc_comments.rs @@ -104,30 +104,32 @@ fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> { // tracker to decide if the last group of tabs is not closed by a non-tab character let mut is_active = false; - let chars_array: Vec<_> = the_str.chars().collect(); + // Note that we specifically need the char _byte_ indices here, not the positional indexes + // within the char array to deal with multi-byte characters properly. `char_indices` does + // exactly that. It provides an iterator over tuples of the form `(byte position, char)`. + let char_indices: Vec<_> = the_str.char_indices().collect(); - if chars_array == vec!['\t'] { + if let [(_, '\t')] = char_indices.as_slice() { return vec![(0, 1)]; } - for (index, arr) in chars_array.windows(2).enumerate() { - let index = u32::try_from(index).expect(line_length_way_to_long); - match arr { - ['\t', '\t'] => { + for entry in char_indices.windows(2) { + match entry { + [(_, '\t'), (_, '\t')] => { // either string starts with double tab, then we have to set it active, // otherwise is_active is true anyway is_active = true; }, - [_, '\t'] => { + [(_, _), (index_b, '\t')] => { // as ['\t', '\t'] is excluded, this has to be a start of a tab group, // set indices accordingly is_active = true; - current_start = index + 1; + current_start = u32::try_from(*index_b).unwrap(); }, - ['\t', _] => { + [(_, '\t'), (index_b, _)] => { // this now has to be an end of the group, hence we have to push a new tuple is_active = false; - spans.push((current_start, index + 1)); + spans.push((current_start, u32::try_from(*index_b).unwrap())); }, _ => {}, } @@ -137,7 +139,7 @@ fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> { if is_active { spans.push(( current_start, - u32::try_from(the_str.chars().count()).expect(line_length_way_to_long), + u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long), )); } @@ -148,6 +150,13 @@ fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> { mod tests_for_get_chunks_of_tabs { use super::get_chunks_of_tabs; + #[test] + fn test_unicode_han_string() { + let res = get_chunks_of_tabs(" \u{4f4d}\t"); + + assert_eq!(res, vec![(4, 5)]); + } + #[test] fn test_empty_string() { let res = get_chunks_of_tabs(""); diff --git a/tests/ui/crashes/ice-5835.rs b/tests/ui/crashes/ice-5835.rs new file mode 100644 index 0000000000000..5e99cb432b6e2 --- /dev/null +++ b/tests/ui/crashes/ice-5835.rs @@ -0,0 +1,9 @@ +#[rustfmt::skip] +pub struct Foo { + /// 位 + /// ^ Do not remove this tab character. + /// It was required to trigger the ICE. + pub bar: u8, +} + +fn main() {} diff --git a/tests/ui/crashes/ice-5835.stderr b/tests/ui/crashes/ice-5835.stderr new file mode 100644 index 0000000000000..c972bcb60a0cd --- /dev/null +++ b/tests/ui/crashes/ice-5835.stderr @@ -0,0 +1,10 @@ +error: using tabs in doc comments is not recommended + --> $DIR/ice-5835.rs:3:10 + | +LL | /// 位 + | ^^^^ help: consider using four spaces per tab + | + = note: `-D clippy::tabs-in-doc-comments` implied by `-D warnings` + +error: aborting due to previous error +