Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

std: Change escape_unicode to use new escapes #19885

Merged
merged 1 commit into from
Dec 17, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1597,17 +1597,24 @@ mod tests {

#[test]
fn test_escape_unicode() {
assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
assert_eq!("abc".escape_unicode(),
String::from_str("\\u{61}\\u{62}\\u{63}"));
assert_eq!("a c".escape_unicode(),
String::from_str("\\u{61}\\u{20}\\u{63}"));
assert_eq!("\r\n\t".escape_unicode(),
String::from_str("\\u{d}\\u{a}\\u{9}"));
assert_eq!("'\"\\".escape_unicode(),
String::from_str("\\u{27}\\u{22}\\u{5c}"));
assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
String::from_str("\\x00\\x01\\u00fe\\u00ff"));
assert_eq!("\u{100}\u{ffff}".escape_unicode(), String::from_str("\\u0100\\uffff"));
String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
assert_eq!("\u{100}\u{ffff}".escape_unicode(),
String::from_str("\\u{100}\\u{ffff}"));
assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
String::from_str("\\U00010000\\U0010ffff"));
assert_eq!("ab\u{fb00}".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
assert_eq!("\u{1d4ea}\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
String::from_str("\\u{10000}\\u{10ffff}"));
assert_eq!("ab\u{fb00}".escape_unicode(),
String::from_str("\\u{61}\\u{62}\\u{fb00}"));
assert_eq!("\u{1d4ea}\r".escape_unicode(),
String::from_str("\\u{1d4ea}\\u{d}"));
}

#[test]
Expand All @@ -1616,11 +1623,14 @@ mod tests {
assert_eq!("a c".escape_default(), String::from_str("a c"));
assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
assert_eq!("\u{100}\u{ffff}".escape_default(), String::from_str("\\u0100\\uffff"));
assert_eq!("\u{100}\u{ffff}".escape_default(),
String::from_str("\\u{100}\\u{ffff}"));
assert_eq!("\u{10000}\u{10ffff}".escape_default(),
String::from_str("\\U00010000\\U0010ffff"));
assert_eq!("ab\u{fb00}".escape_default(), String::from_str("ab\\ufb00"));
assert_eq!("\u{1d4ea}\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
String::from_str("\\u{10000}\\u{10ffff}"));
assert_eq!("ab\u{fb00}".escape_default(),
String::from_str("ab\\u{fb00}"));
assert_eq!("\u{1d4ea}\r".escape_default(),
String::from_str("\\u{1d4ea}\\r"));
}

#[test]
Expand Down
143 changes: 68 additions & 75 deletions src/libcore/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
#![allow(non_snake_case)]
#![doc(primitive = "char")]

use iter::Iterator;
use mem::transmute;
use ops::FnMut;
use option::Option;
use option::Option::{None, Some};
use iter::{range_step, Iterator, RangeStep};
use option::Option;
use slice::SliceExt;

// UTF-8 ranges and tags for encoding characters
Expand Down Expand Up @@ -156,34 +156,15 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
}
}

///
/// Returns the hexadecimal Unicode escape of a `char`
///
/// The rules are as follows:
///
/// - chars in [0,0xff] get 2-digit escapes: `\\xNN`
/// - chars in [0x100,0xffff] get 4-digit escapes: `\\u{NNNN}`
/// - chars above 0x10000 get 8-digit escapes: `\\u{{NNN}NNNNN}`
///
/// Deprecated, call the escape_unicode method instead.
#[deprecated = "use the Char::escape_unicode method"]
pub fn escape_unicode<F>(c: char, mut f: F) where F: FnMut(char) {
for char in c.escape_unicode() {
f(char);
}
}

///
/// Returns a 'default' ASCII and C++11-like literal escape of a `char`
///
/// The default is chosen with a bias toward producing literals that are
/// legal in a variety of languages, including C++11 and similar C-family
/// languages. The exact rules are:
///
/// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
/// - Single-quote, double-quote and backslash chars are backslash-escaped.
/// - Any other chars in the range [0x20,0x7e] are not escaped.
/// - Any other chars are given hex Unicode escapes; see `escape_unicode`.
///
/// Deprecated, call the escape_default method instead.
#[deprecated = "use the Char::escape_default method"]
pub fn escape_default<F>(c: char, mut f: F) where F: FnMut(char) {
for c in c.escape_default() {
Expand Down Expand Up @@ -267,13 +248,11 @@ pub trait Char {
/// Returns an iterator that yields the hexadecimal Unicode escape
/// of a character, as `char`s.
///
/// The rules are as follows:
///
/// * Characters in [0,0xff] get 2-digit escapes: `\\xNN`
/// * Characters in [0x100,0xffff] get 4-digit escapes: `\\u{NNNN}`.
/// * Characters above 0x10000 get 8-digit escapes: `\\u{{NNN}NNNNN}`.
/// All characters are escaped with Rust syntax of the form `\\u{NNNN}`
/// where `NNNN` is the shortest hexadecimal representation of the code
/// point.
#[unstable = "pending error conventions, trait organization"]
fn escape_unicode(self) -> UnicodeEscapedChars;
fn escape_unicode(self) -> EscapeUnicode;

/// Returns an iterator that yields the 'default' ASCII and
/// C++11-like literal escape of a character, as `char`s.
Expand All @@ -288,7 +267,7 @@ pub trait Char {
/// * Any other chars in the range [0x20,0x7e] are not escaped.
/// * Any other chars are given hex Unicode escapes; see `escape_unicode`.
#[unstable = "pending error conventions, trait organization"]
fn escape_default(self) -> DefaultEscapedChars;
fn escape_default(self) -> EscapeDefault;

/// Returns the amount of bytes this character would need if encoded in
/// UTF-8.
Expand Down Expand Up @@ -358,23 +337,23 @@ impl Char for char {
fn from_u32(i: u32) -> Option<char> { from_u32(i) }

#[unstable = "pending error conventions, trait organization"]
fn escape_unicode(self) -> UnicodeEscapedChars {
UnicodeEscapedChars { c: self, state: UnicodeEscapedCharsState::Backslash }
fn escape_unicode(self) -> EscapeUnicode {
EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash }
}

#[unstable = "pending error conventions, trait organization"]
fn escape_default(self) -> DefaultEscapedChars {
fn escape_default(self) -> EscapeDefault {
let init_state = match self {
'\t' => DefaultEscapedCharsState::Backslash('t'),
'\r' => DefaultEscapedCharsState::Backslash('r'),
'\n' => DefaultEscapedCharsState::Backslash('n'),
'\\' => DefaultEscapedCharsState::Backslash('\\'),
'\'' => DefaultEscapedCharsState::Backslash('\''),
'"' => DefaultEscapedCharsState::Backslash('"'),
'\x20' ... '\x7e' => DefaultEscapedCharsState::Char(self),
_ => DefaultEscapedCharsState::Unicode(self.escape_unicode())
'\t' => EscapeDefaultState::Backslash('t'),
'\r' => EscapeDefaultState::Backslash('r'),
'\n' => EscapeDefaultState::Backslash('n'),
'\\' => EscapeDefaultState::Backslash('\\'),
'\'' => EscapeDefaultState::Backslash('\''),
'"' => EscapeDefaultState::Backslash('"'),
'\x20' ... '\x7e' => EscapeDefaultState::Char(self),
_ => EscapeDefaultState::Unicode(self.escape_unicode())
};
DefaultEscapedChars { state: init_state }
EscapeDefault { state: init_state }
}

#[inline]
Expand Down Expand Up @@ -451,72 +430,86 @@ impl Char for char {

/// An iterator over the characters that represent a `char`, as escaped by
/// Rust's unicode escaping rules.
pub struct UnicodeEscapedChars {
pub struct EscapeUnicode {
c: char,
state: UnicodeEscapedCharsState
state: EscapeUnicodeState
}

enum UnicodeEscapedCharsState {
enum EscapeUnicodeState {
Backslash,
Type,
Value(RangeStep<i32>),
LeftBrace,
Value(uint),
RightBrace,
Done,
}

impl Iterator<char> for UnicodeEscapedChars {
impl Iterator<char> for EscapeUnicode {
fn next(&mut self) -> Option<char> {
match self.state {
UnicodeEscapedCharsState::Backslash => {
self.state = UnicodeEscapedCharsState::Type;
EscapeUnicodeState::Backslash => {
self.state = EscapeUnicodeState::Type;
Some('\\')
}
UnicodeEscapedCharsState::Type => {
let (typechar, pad) = if self.c <= '\x7f' { ('x', 2) }
else if self.c <= '\u{ffff}' { ('u', 4) }
else { ('U', 8) };
self.state = UnicodeEscapedCharsState::Value(range_step(4 * (pad - 1), -1, -4i32));
Some(typechar)
EscapeUnicodeState::Type => {
self.state = EscapeUnicodeState::LeftBrace;
Some('u')
}
UnicodeEscapedCharsState::Value(ref mut range_step) => match range_step.next() {
Some(offset) => {
let offset = offset as uint;
let v = match ((self.c as i32) >> offset) & 0xf {
i @ 0 ... 9 => '0' as i32 + i,
i => 'a' as i32 + (i - 10)
};
Some(unsafe { transmute(v) })
EscapeUnicodeState::LeftBrace => {
let mut n = 0u;
while (self.c as u32) >> (4 * (n + 1)) != 0 {
n += 1;
}
None => None
self.state = EscapeUnicodeState::Value(n);
Some('{')
}
EscapeUnicodeState::Value(offset) => {
let v = match ((self.c as i32) >> (offset * 4)) & 0xf {
i @ 0 ... 9 => '0' as i32 + i,
i => 'a' as i32 + (i - 10)
};
if offset == 0 {
self.state = EscapeUnicodeState::RightBrace;
} else {
self.state = EscapeUnicodeState::Value(offset - 1);
}
Some(unsafe { transmute(v) })
}
EscapeUnicodeState::RightBrace => {
self.state = EscapeUnicodeState::Done;
Some('}')
}
EscapeUnicodeState::Done => None,
}
}
}

/// An iterator over the characters that represent a `char`, escaped
/// for maximum portability.
pub struct DefaultEscapedChars {
state: DefaultEscapedCharsState
pub struct EscapeDefault {
state: EscapeDefaultState
}

enum DefaultEscapedCharsState {
enum EscapeDefaultState {
Backslash(char),
Char(char),
Done,
Unicode(UnicodeEscapedChars),
Unicode(EscapeUnicode),
}

impl Iterator<char> for DefaultEscapedChars {
impl Iterator<char> for EscapeDefault {
fn next(&mut self) -> Option<char> {
match self.state {
DefaultEscapedCharsState::Backslash(c) => {
self.state = DefaultEscapedCharsState::Char(c);
EscapeDefaultState::Backslash(c) => {
self.state = EscapeDefaultState::Char(c);
Some('\\')
}
DefaultEscapedCharsState::Char(c) => {
self.state = DefaultEscapedCharsState::Done;
EscapeDefaultState::Char(c) => {
self.state = EscapeDefaultState::Done;
Some(c)
}
DefaultEscapedCharsState::Done => None,
DefaultEscapedCharsState::Unicode(ref mut iter) => iter.next()
EscapeDefaultState::Done => None,
EscapeDefaultState::Unicode(ref mut iter) => iter.next()
}
}
}
Expand Down
41 changes: 19 additions & 22 deletions src/libcoretest/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,38 +135,35 @@ fn test_escape_default() {
let s = string('~');
assert_eq!(s, "~");
let s = string('\x00');
assert_eq!(s, "\\x00");
assert_eq!(s, "\\u{0}");
let s = string('\x1f');
assert_eq!(s, "\\x1f");
assert_eq!(s, "\\u{1f}");
let s = string('\x7f');
assert_eq!(s, "\\x7f");
let s = string('\u00ff');
assert_eq!(s, "\\u00ff");
let s = string('\u011b');
assert_eq!(s, "\\u011b");
let s = string('\U0001d4b6');
assert_eq!(s, "\\U0001d4b6");
assert_eq!(s, "\\u{7f}");
let s = string('\u{ff}');
assert_eq!(s, "\\u{ff}");
let s = string('\u{11b}');
assert_eq!(s, "\\u{11b}");
let s = string('\u{1d4b6}');
assert_eq!(s, "\\u{1d4b6}");
}

#[test]
fn test_escape_unicode() {
fn string(c: char) -> String {
let mut result = String::new();
escape_unicode(c, |c| { result.push(c); });
return result;
}
fn string(c: char) -> String { c.escape_unicode().collect() }

let s = string('\x00');
assert_eq!(s, "\\x00");
assert_eq!(s, "\\u{0}");
let s = string('\n');
assert_eq!(s, "\\x0a");
assert_eq!(s, "\\u{a}");
let s = string(' ');
assert_eq!(s, "\\x20");
assert_eq!(s, "\\u{20}");
let s = string('a');
assert_eq!(s, "\\x61");
let s = string('\u011b');
assert_eq!(s, "\\u011b");
let s = string('\U0001d4b6');
assert_eq!(s, "\\U0001d4b6");
assert_eq!(s, "\\u{61}");
let s = string('\u{11b}');
assert_eq!(s, "\\u{11b}");
let s = string('\u{1d4b6}');
assert_eq!(s, "\\u{1d4b6}");
}

#[test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// aux-build:macro_crate_test.rs
// ignore-stage1
// ignore-android
// error-pattern: unknown start of token: \x00
// error-pattern: unknown start of token: \u{0}

// Issue #15750 and #15962 : this test is checking that the standard
// parser rejects embedded idents. pnkfelix did not want to attempt
Expand Down