diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 3fa0fa3b0f06c..8aeaf85b09f01 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -192,26 +192,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool, } } -// FIXME #3961: This is not the right way to convert string byte -// offsets to characters. -fn all_whitespace(s: &str, begin: uint, end: uint) -> bool { - let mut i: uint = begin; - while i != end { - if !is_whitespace(s[i] as char) { return false; } i += 1u; +// Returns None if the first col chars of s contain a non-whitespace char. +// Otherwise returns Some(k) where k is first char offset after that leading +// whitespace. Note k may be outside bounds of s. +fn all_whitespace(s: &str, col: CharPos) -> Option { + let len = s.len(); + let mut col = col.to_uint(); + let mut cursor: uint = 0; + while col > 0 && cursor < len { + let r: str::CharRange = str::char_range_at(s, cursor); + if !r.ch.is_whitespace() { + return None; + } + cursor = r.next; + col -= 1; } - return true; + return Some(cursor); } fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str], s: ~str, col: CharPos) { let len = s.len(); - // FIXME #3961: Doing bytewise comparison and slicing with CharPos - let col = col.to_uint(); - let s1 = if all_whitespace(s, 0, uint::min(len, col)) { - if col < len { - str::slice(s, col, len).to_owned() - } else { ~"" } - } else { s }; + let s1 = match all_whitespace(s, col) { + None => s, + Some(col) => { + if col < len { + str::slice(s, col, len).to_owned() + } else { ~"" } + } + }; debug!("pushing line: %s", s1); lines.push(s1); } diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp new file mode 100644 index 0000000000000..9c80057ccef02 --- /dev/null +++ b/src/test/pretty/block-comment-wchar.pp @@ -0,0 +1,116 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is meant as a test case for Issue 3961. +// +// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs +// +// pp-exact:block-comment-wchar.pp +fn f() { + fn nested() { + /* + Spaced2 + */ + /* + Spaced10 + */ + /* + Tabbed8+2 + */ + /* + CR8+2 + */ + } + /* + Spaced2: (prefixed so start of space aligns with comment) + */ + /* + Tabbed2: (more indented b/c *start* of space will align with comment) + */ + /* + Spaced6: (Alignment removed and realigning spaces inserted) + */ + /* + Tabbed4+2: (Alignment removed and realigning spaces inserted) + */ + + /* + VT4+2: (should align) + */ + /* + FF4+2: (should align) + */ + /* + CR4+2: (should align) + */ + /* + // (NEL deliberately omitted) + */ + /* + Ogham Space Mark 4+2: (should align) + */ + /* + Mongolian Vowel Separator 4+2: (should align) + */ + /* + Four-per-em space 4+2: (should align) + */ + + /* + Mongolian Vowel Sep count 1: (should align) + Mongolian Vowel Sep count 2: (should align) + Mongolian Vowel Sep count 3: (should align) + Mongolian Vowel Sep count 4: (should align) + Mongolian Vowel Sep count 5: (should align) + Mongolian Vowel Sep count 6: (should align) + Mongolian Vowel Sep count 7: (should align) + Mongolian Vowel Sep count 8: (should align) + Mongolian Vowel Sep count 9: (should align) + Mongolian Vowel Sep count A: (should align) + Mongolian Vowel Sep count B: (should align) + Mongolian Vowel Sep count C: (should align) + Mongolian Vowel Sep count D: (should align) + Mongolian Vowel Sep count E: (should align) + Mongolian Vowel Sep count F: (should align) + */ + + + + /* */ + + /* + Hello from offset 6 + Space 6+2: compare A + Mongolian Vowel Separator 6+2: compare B + */ + + /*᠎*/ + + /* + Hello from another offset 6 with wchars establishing column offset + Space 6+2: compare C + Mongolian Vowel Separator 6+2: compare D + */ +} + +fn main() { + // Taken from http://en.wikipedia.org/wiki/Whitespace_character + let chars = + ['\x0A', '\x0B', '\x0C', '\x0D', '\x20', + // '\x85', // for some reason Rust thinks NEL isn't whitespace + '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003', + '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', + '\u2028', '\u2029', '\u202F', '\u205F', '\u3000']; + // <= bugs in pretty-printer? + for vec::each(chars) |c| { + let ws = c.is_whitespace(); + io::println(fmt!("%? %?" , c , ws)); + } +} diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs new file mode 100644 index 0000000000000..a56fa91f39f09 --- /dev/null +++ b/src/test/pretty/block-comment-wchar.rs @@ -0,0 +1,109 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is meant as a test case for Issue 3961. +// +// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs +// +// pp-exact:block-comment-wchar.pp +fn f() { + fn nested() { + /* + Spaced2 + */ + /* + Spaced10 + */ + /* + Tabbed8+2 + */ + /* + CR8+2 + */ + } + /* + Spaced2: (prefixed so start of space aligns with comment) + */ + /* + Tabbed2: (more indented b/c *start* of space will align with comment) + */ + /* + Spaced6: (Alignment removed and realigning spaces inserted) + */ + /* + Tabbed4+2: (Alignment removed and realigning spaces inserted) + */ + + /* + VT4+2: (should align) + */ + /* + FF4+2: (should align) + */ + /* + CR4+2: (should align) + */ + /* + // (NEL deliberately omitted) + */ + /* +     Ogham Space Mark 4+2: (should align) + */ + /* +᠎᠎᠎᠎ Mongolian Vowel Separator 4+2: (should align) + */ + /* +     Four-per-em space 4+2: (should align) + */ + + /* + ᠎ Mongolian Vowel Sep count 1: (should align) + ᠎ Mongolian Vowel Sep count 2: (should align) + ᠎᠎ Mongolian Vowel Sep count 3: (should align) + ᠎ Mongolian Vowel Sep count 4: (should align) + ᠎ ᠎ Mongolian Vowel Sep count 5: (should align) + ᠎᠎ Mongolian Vowel Sep count 6: (should align) + ᠎᠎᠎ Mongolian Vowel Sep count 7: (should align) +᠎ Mongolian Vowel Sep count 8: (should align) +᠎ ᠎ Mongolian Vowel Sep count 9: (should align) +᠎ ᠎ Mongolian Vowel Sep count A: (should align) +᠎ ᠎᠎ Mongolian Vowel Sep count B: (should align) +᠎᠎ Mongolian Vowel Sep count C: (should align) +᠎᠎ ᠎ Mongolian Vowel Sep count D: (should align) +᠎᠎᠎ Mongolian Vowel Sep count E: (should align) +᠎᠎᠎᠎ Mongolian Vowel Sep count F: (should align) + */ + + +/* */ /* + Hello from offset 6 + Space 6+2: compare A +᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare B + */ +/*᠎*/ /* + Hello from another offset 6 with wchars establishing column offset + Space 6+2: compare C +᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare D + */ +} + +fn main() { + // Taken from http://en.wikipedia.org/wiki/Whitespace_character + let chars = + ['\x0A', '\x0B', '\x0C', '\x0D', '\x20', + // '\x85', // for some reason Rust thinks NEL isn't whitespace + '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003', + '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', + '\u2028', '\u2029', '\u202F', '\u205F', '\u3000']; + for vec::each(chars) |c| { + let ws = c.is_whitespace(); + io::println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer? + } +}