From 0f14bea448dfdafccbecbb6302a55191a763562a Mon Sep 17 00:00:00 2001 From: Mario Carneiro Date: Thu, 17 Feb 2022 20:27:53 -0800 Subject: [PATCH 1/2] Optimize char_try_from_u32 The optimization was proposed by @falk-hueffner in https://rust-lang.zulipchat.com/#narrow/stream/219381-t-libs/topic/Micro-optimizing.20char.3A.3Afrom_u32/near/272146171, and I simplified it a bit and added an explanation of why the optimization is correct. --- library/core/src/char/convert.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/library/core/src/char/convert.rs b/library/core/src/char/convert.rs index 1774ddd7cbb2c..56dc2a594e176 100644 --- a/library/core/src/char/convert.rs +++ b/library/core/src/char/convert.rs @@ -271,7 +271,20 @@ impl FromStr for char { #[inline] const fn char_try_from_u32(i: u32) -> Result { - if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { + // This is an optimized version of the check + // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF), + // which can also be written as + // i >= 0x110000 || (i >= 0xD800 && i < 0xE000). + // + // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is + // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same. + // In particular, numbers >= 0x110000 stay in this range. + // + // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single + // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped + // surrogate range as well as the numbers originally larger than 0x110000. + // + if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 { Err(CharTryFromError(())) } else { // SAFETY: checked that it's a legal unicode value From 7c3ebec0caf23a11773c8291005649dd488ca2ee Mon Sep 17 00:00:00 2001 From: Mario Carneiro Date: Thu, 17 Feb 2022 22:14:54 -0800 Subject: [PATCH 2/2] fix --- library/core/src/char/convert.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/library/core/src/char/convert.rs b/library/core/src/char/convert.rs index 56dc2a594e176..139841368d6a1 100644 --- a/library/core/src/char/convert.rs +++ b/library/core/src/char/convert.rs @@ -6,8 +6,6 @@ use crate::fmt; use crate::mem::transmute; use crate::str::FromStr; -use super::MAX; - /// Converts a `u32` to a `char`. /// /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with