From 96f387920c21a83fa620b1838066fd4c73c4b486 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 20 May 2020 23:46:57 -0400 Subject: [PATCH 1/5] impl Step for char Enables Range to be iterable Note: https://rust.godbolt.org/z/fdveKo An iteration over all char ('\0'..=char::MAX) includes unreachable panic code currently. Updating RangeInclusive::next to call Step::forward_unchecked (which is safe to do but not done yet becuase it wasn't necessary) successfully removes the panic from this iteration. --- src/libcore/iter/range.rs | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/libcore/iter/range.rs b/src/libcore/iter/range.rs index d74df82bddd9d..6837aab7af1a2 100644 --- a/src/libcore/iter/range.rs +++ b/src/libcore/iter/range.rs @@ -1,3 +1,4 @@ +use crate::char; use crate::convert::TryFrom; use crate::mem; use crate::ops::{self, Add, Sub, Try}; @@ -400,6 +401,69 @@ step_integer_impls! { wider than usize: [u32 i32], [u64 i64], [u128 i128]; } +#[unstable(feature = "step_trait", reason = "recently redesigned", issue = "42168")] +unsafe impl Step for char { + #[inline] + fn steps_between(&start: &char, &end: &char) -> Option { + let start = start as u32; + let end = end as u32; + if start <= end { + let count = end - start + 1; + if start < 0xD800 && 0xE000 <= end { + usize::try_from(count - 0x800).ok() + } else { + usize::try_from(count).ok() + } + } else { + None + } + } + + #[inline] + fn forward_checked(start: char, count: usize) -> Option { + let start = start as u32; + let mut res = Step::forward_checked(start, count)?; + if start < 0xD800 && 0xD800 <= res { + res = Step::forward_checked(res, 0x800)?; + } + if res <= char::MAX as u32 { + Some(unsafe { char::from_u32_unchecked(res) }) + } else { + None + } + } + + #[inline] + fn backward_checked(start: char, count: usize) -> Option { + let start = start as u32; + let mut res = Step::backward_checked(start, count)?; + if start >= 0xE000 && 0xE000 > res { + res = Step::backward_checked(res, 0x800)?; + } + Some(unsafe { char::from_u32_unchecked(res) }) + } + + #[inline] + unsafe fn forward_unchecked(start: char, count: usize) -> char { + let start = start as u32; + let mut res = Step::forward_unchecked(start, count); + if start < 0xD800 && 0xD800 <= res { + res = Step::forward_unchecked(res, 0x800); + } + char::from_u32_unchecked(res) + } + + #[inline] + unsafe fn backward_unchecked(start: char, count: usize) -> char { + let start = start as u32; + let mut res = Step::backward_unchecked(start, count); + if start >= 0xE000 && 0xE000 > res { + res = Step::backward_unchecked(res, 0x800); + } + char::from_u32_unchecked(res) + } +} + macro_rules! range_exact_iter_impl { ($($t:ty)*) => ($( #[stable(feature = "rust1", since = "1.0.0")] From c25b82f5bb9770374db31906dedb497034a151fd Mon Sep 17 00:00:00 2001 From: CAD97 Date: Wed, 20 May 2020 23:50:28 -0400 Subject: [PATCH 2/5] Use Step::forward_unchecked in RangeInclusive::next --- src/libcore/iter/range.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/libcore/iter/range.rs b/src/libcore/iter/range.rs index 6837aab7af1a2..3fa4cf2ca0da6 100644 --- a/src/libcore/iter/range.rs +++ b/src/libcore/iter/range.rs @@ -646,7 +646,11 @@ impl Iterator for ops::RangeInclusive { } let is_iterating = self.start < self.end; Some(if is_iterating { - let n = Step::forward(self.start.clone(), 1); + // SAFETY: just checked precondition + // We use the unchecked version here, because + // otherwise `for _ in '\0'..=char::MAX` + // does not successfully remove panicking code. + let n = unsafe { Step::forward_unchecked(self.start.clone(), 1) }; mem::replace(&mut self.start, n) } else { self.exhausted = true; From 27d1cd857e8998c423f329efce7c995a26dcdb6c Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 21 May 2020 13:21:55 -0400 Subject: [PATCH 3/5] Add safety annotations in iter::range --- src/libcore/iter/range.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libcore/iter/range.rs b/src/libcore/iter/range.rs index 3fa4cf2ca0da6..5db790e491c2d 100644 --- a/src/libcore/iter/range.rs +++ b/src/libcore/iter/range.rs @@ -427,6 +427,8 @@ unsafe impl Step for char { res = Step::forward_checked(res, 0x800)?; } if res <= char::MAX as u32 { + // SAFETY: res is a valid unicode scalar + // (below 0x110000 and not in 0xD800..0xE000) Some(unsafe { char::from_u32_unchecked(res) }) } else { None @@ -440,6 +442,8 @@ unsafe impl Step for char { if start >= 0xE000 && 0xE000 > res { res = Step::backward_checked(res, 0x800)?; } + // SAFETY: res is a valid unicode scalar + // (below 0x110000 and not in 0xD800..0xE000) Some(unsafe { char::from_u32_unchecked(res) }) } From b1d1f256ef1902312803d3d1bd0235bfdf864aec Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 21 May 2020 13:43:01 -0400 Subject: [PATCH 4/5] Add a test for char ranges --- src/libcore/tests/iter.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/libcore/tests/iter.rs b/src/libcore/tests/iter.rs index 52cf068f0a567..6605249f18004 100644 --- a/src/libcore/tests/iter.rs +++ b/src/libcore/tests/iter.rs @@ -1932,6 +1932,16 @@ fn test_range() { ); } +#[test] +fn test_char_range() { + use std::char; + assert!(('\0'..=char::MAX).eq((0..=char::MAX as u32).filter_map(char::from_u32))); + assert!(('\0'..=char::MAX).rev().eq((0..=char::MAX as u32).filter_map(char::from_u32).rev())); + + assert_eq!(('\u{D7FF}'..='\u{E000}').count(), 2); + assert_eq!(('\u{D7FF}'..'\u{E000}').count(), 1); +} + #[test] fn test_range_exhaustion() { let mut r = 10..10; From cd6a8cae2aa07bd456a1816196e3c9aa2fcb72d6 Mon Sep 17 00:00:00 2001 From: CAD97 Date: Thu, 28 May 2020 01:11:46 -0400 Subject: [PATCH 5/5] FIx off-by-one in char::steps_between --- src/libcore/iter/range.rs | 2 +- src/libcore/tests/iter.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libcore/iter/range.rs b/src/libcore/iter/range.rs index 5db790e491c2d..fc4454ede3588 100644 --- a/src/libcore/iter/range.rs +++ b/src/libcore/iter/range.rs @@ -408,7 +408,7 @@ unsafe impl Step for char { let start = start as u32; let end = end as u32; if start <= end { - let count = end - start + 1; + let count = end - start; if start < 0xD800 && 0xE000 <= end { usize::try_from(count - 0x800).ok() } else { diff --git a/src/libcore/tests/iter.rs b/src/libcore/tests/iter.rs index 6605249f18004..c5d636ac8da55 100644 --- a/src/libcore/tests/iter.rs +++ b/src/libcore/tests/iter.rs @@ -1939,7 +1939,9 @@ fn test_char_range() { assert!(('\0'..=char::MAX).rev().eq((0..=char::MAX as u32).filter_map(char::from_u32).rev())); assert_eq!(('\u{D7FF}'..='\u{E000}').count(), 2); + assert_eq!(('\u{D7FF}'..='\u{E000}').size_hint(), (2, Some(2))); assert_eq!(('\u{D7FF}'..'\u{E000}').count(), 1); + assert_eq!(('\u{D7FF}'..'\u{E000}').size_hint(), (1, Some(1))); } #[test]