From 41d0a89e3ad99a9fdf700ea7d15750fe1cbfab14 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 17 Aug 2016 17:41:33 +0200 Subject: [PATCH 1/2] Implement From for u32, and From for char These fit with other From implementations between integer types. This helps the coding style of avoiding the 'as' operator that sometimes silently truncates, and signals that these specific conversions are lossless and infaillible. --- src/libcore/char.rs | 34 ++++++++++++++++++++++++++++++++++ src/libcoretest/char.rs | 8 ++++++++ 2 files changed, 42 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 4677f0b523f42..47a8678d608ae 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -175,6 +175,40 @@ pub unsafe fn from_u32_unchecked(i: u32) -> char { transmute(i) } +#[stable(feature = "char_convert", since = "1.13.0")] +impl From for u32 { + #[inline] + fn from(c: char) -> Self { + c as u32 + } +} + +/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF. +/// +/// Unicode is designed such that this effectively decodes bytes +/// with the character encoding that IANA calls ISO-8859-1. +/// This encoding is compatible with ASCII. +/// +/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hypen), +/// which leaves some "blanks", byte values that are not assigned to any character. +/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. +/// +/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, +/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks +/// to punctuation and various Latin characters. +/// +/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) +/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases +/// for a superset of Windows-1252 that fills the remaining blanks with corresponding +/// C0 and C1 control codes. +#[stable(feature = "char_convert", since = "1.13.0")] +impl From for char { + #[inline] + fn from(i: u8) -> Self { + i as char + } +} + /// Converts a digit in the given radix to a `char`. /// /// A 'radix' here is sometimes also called a 'base'. A radix of two diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 333503d738943..92a2b23d242b9 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -10,6 +10,14 @@ use std::char; +#[test] +fn test_convert() { + assert_eq!(u32::from('a'), 0x61); + assert_eq!(char::from(b'\0'), '\0'); + assert_eq!(char::from(b'a'), 'a'); + assert_eq!(char::from(b'\xFF'), '\u{FF}'); +} + #[test] fn test_is_lowercase() { assert!('a'.is_lowercase()); From f040208d533e1d6d9ee0e0408ee74e26e14d1284 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Wed, 17 Aug 2016 18:01:41 +0200 Subject: [PATCH 2/2] Implement TryFrom for char For symmetry with From for u32. --- src/libcore/char.rs | 35 +++++++++++++++++++++++++++++------ src/libcoretest/char.rs | 10 ++++++++++ src/librustc_unicode/char.rs | 2 ++ src/librustc_unicode/lib.rs | 1 + src/libstd/error.rs | 7 +++++++ 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 47a8678d608ae..ad492c81bd38a 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -16,6 +16,8 @@ #![stable(feature = "core_char", since = "1.2.0")] use char_private::is_printable; +use convert::TryFrom; +use fmt; use iter::FusedIterator; use mem::transmute; @@ -122,12 +124,7 @@ pub const MAX: char = '\u{10ffff}'; #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn from_u32(i: u32) -> Option { - // catch out-of-bounds and surrogates - if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { - None - } else { - Some(unsafe { from_u32_unchecked(i) }) - } + char::try_from(i).ok() } /// Converts a `u32` to a `char`, ignoring validity. @@ -209,6 +206,32 @@ impl From for char { } } +#[unstable(feature = "try_from", issue = "33417")] +impl TryFrom for char { + type Err = CharTryFromError; + + #[inline] + fn try_from(i: u32) -> Result { + if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { + Err(CharTryFromError(())) + } else { + Ok(unsafe { from_u32_unchecked(i) }) + } + } +} + +/// The error type returned when a conversion from u32 to char fails. +#[unstable(feature = "try_from", issue = "33417")] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct CharTryFromError(()); + +#[unstable(feature = "try_from", issue = "33417")] +impl fmt::Display for CharTryFromError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + "converted integer out of range for `char`".fmt(f) + } +} + /// Converts a digit in the given radix to a `char`. /// /// A 'radix' here is sometimes also called a 'base'. A radix of two diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 92a2b23d242b9..199437a431eee 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -9,6 +9,7 @@ // except according to those terms. use std::char; +use std::convert::TryFrom; #[test] fn test_convert() { @@ -16,6 +17,15 @@ fn test_convert() { assert_eq!(char::from(b'\0'), '\0'); assert_eq!(char::from(b'a'), 'a'); assert_eq!(char::from(b'\xFF'), '\u{FF}'); + assert_eq!(char::try_from(0_u32), Ok('\0')); + assert_eq!(char::try_from(0x61_u32), Ok('a')); + assert_eq!(char::try_from(0xD7FF_u32), Ok('\u{D7FF}')); + assert!(char::try_from(0xD800_u32).is_err()); + assert!(char::try_from(0xDFFF_u32).is_err()); + assert_eq!(char::try_from(0xE000_u32), Ok('\u{E000}')); + assert_eq!(char::try_from(0x10FFFF_u32), Ok('\u{10FFFF}')); + assert!(char::try_from(0x110000_u32).is_err()); + assert!(char::try_from(0xFFFF_FFFF_u32).is_err()); } #[test] diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs index c2b7d7045ddd8..5a0c27d9c609f 100644 --- a/src/librustc_unicode/char.rs +++ b/src/librustc_unicode/char.rs @@ -40,6 +40,8 @@ pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked}; pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDebug, EscapeDefault, EscapeUnicode}; // unstable reexports +#[unstable(feature = "try_from", issue = "33417")] +pub use core::char::CharTryFromError; #[unstable(feature = "decode_utf8", issue = "33906")] pub use core::char::{DecodeUtf8, decode_utf8}; #[unstable(feature = "unicode", issue = "27783")] diff --git a/src/librustc_unicode/lib.rs b/src/librustc_unicode/lib.rs index b812c262ac197..65bd717e01a82 100644 --- a/src/librustc_unicode/lib.rs +++ b/src/librustc_unicode/lib.rs @@ -38,6 +38,7 @@ #![feature(fused)] #![feature(lang_items)] #![feature(staged_api)] +#![feature(try_from)] #![feature(unicode)] mod tables; diff --git a/src/libstd/error.rs b/src/libstd/error.rs index ab537f39bf96a..1629062001003 100644 --- a/src/libstd/error.rs +++ b/src/libstd/error.rs @@ -302,6 +302,13 @@ impl<'a, T: ?Sized + Reflect> Error for cell::BorrowMutError<'a, T> { } } +#[unstable(feature = "try_from", issue = "33417")] +impl Error for char::CharTryFromError { + fn description(&self) -> &str { + "converted integer out of range for `char`" + } +} + // copied from any.rs impl Error + 'static { /// Returns true if the boxed type is the same as `T`