From 0cac9152116b5b4e4c5f84a05e0ceb20dc325a0c Mon Sep 17 00:00:00 2001 From: "Chai T. Rex" Date: Mon, 26 Aug 2024 01:34:25 -0400 Subject: [PATCH 1/2] Improve `isqrt` tests and add benchmarks * Choose test inputs more thoroughly and systematically. * Check that `isqrt` and `checked_isqrt` have equivalent results for signed types, either equivalent numerically or equivalent as a panic and a `None`. * Check that `isqrt` has numerically-equivalent results for unsigned types and their `NonZero` counterparts. * Reuse `ilog10` benchmarks, plus benchmarks that use a uniform distribution. --- library/core/benches/lib.rs | 1 + library/core/benches/num/int_sqrt/mod.rs | 62 ++++++ library/core/benches/num/mod.rs | 1 + library/core/tests/num/int_macros.rs | 32 --- library/core/tests/num/int_sqrt.rs | 248 +++++++++++++++++++++++ library/core/tests/num/mod.rs | 1 + 6 files changed, 313 insertions(+), 32 deletions(-) create mode 100644 library/core/benches/num/int_sqrt/mod.rs create mode 100644 library/core/tests/num/int_sqrt.rs diff --git a/library/core/benches/lib.rs b/library/core/benches/lib.rs index 32d15c386cb1b..3f1c58bbd7204 100644 --- a/library/core/benches/lib.rs +++ b/library/core/benches/lib.rs @@ -8,6 +8,7 @@ #![feature(iter_array_chunks)] #![feature(iter_next_chunk)] #![feature(iter_advance_by)] +#![feature(isqrt)] extern crate test; diff --git a/library/core/benches/num/int_sqrt/mod.rs b/library/core/benches/num/int_sqrt/mod.rs new file mode 100644 index 0000000000000..3c9d173e456a1 --- /dev/null +++ b/library/core/benches/num/int_sqrt/mod.rs @@ -0,0 +1,62 @@ +use rand::Rng; +use test::{black_box, Bencher}; + +macro_rules! int_sqrt_bench { + ($t:ty, $predictable:ident, $random:ident, $random_small:ident, $random_uniform:ident) => { + #[bench] + fn $predictable(bench: &mut Bencher) { + bench.iter(|| { + for n in 0..(<$t>::BITS / 8) { + for i in 1..=(100 as $t) { + let x = black_box(i << (n * 8)); + black_box(x.isqrt()); + } + } + }); + } + + #[bench] + fn $random(bench: &mut Bencher) { + let mut rng = crate::bench_rng(); + /* Exponentially distributed random numbers from the whole range of the type. */ + let numbers: Vec<$t> = + (0..256).map(|_| rng.gen::<$t>() >> rng.gen_range(0..<$t>::BITS)).collect(); + bench.iter(|| { + for x in &numbers { + black_box(black_box(x).isqrt()); + } + }); + } + + #[bench] + fn $random_small(bench: &mut Bencher) { + let mut rng = crate::bench_rng(); + /* Exponentially distributed random numbers from the range 0..256. */ + let numbers: Vec<$t> = + (0..256).map(|_| (rng.gen::() >> rng.gen_range(0..u8::BITS)) as $t).collect(); + bench.iter(|| { + for x in &numbers { + black_box(black_box(x).isqrt()); + } + }); + } + + #[bench] + fn $random_uniform(bench: &mut Bencher) { + let mut rng = crate::bench_rng(); + /* Exponentially distributed random numbers from the whole range of the type. */ + let numbers: Vec<$t> = (0..256).map(|_| rng.gen::<$t>()).collect(); + bench.iter(|| { + for x in &numbers { + black_box(black_box(x).isqrt()); + } + }); + } + }; +} + +int_sqrt_bench! {u8, u8_sqrt_predictable, u8_sqrt_random, u8_sqrt_random_small, u8_sqrt_uniform} +int_sqrt_bench! {u16, u16_sqrt_predictable, u16_sqrt_random, u16_sqrt_random_small, u16_sqrt_uniform} +int_sqrt_bench! {u32, u32_sqrt_predictable, u32_sqrt_random, u32_sqrt_random_small, u32_sqrt_uniform} +int_sqrt_bench! {u64, u64_sqrt_predictable, u64_sqrt_random, u64_sqrt_random_small, u64_sqrt_uniform} +int_sqrt_bench! {u128, u128_sqrt_predictable, u128_sqrt_random, u128_sqrt_random_small, u128_sqrt_uniform} diff --git a/library/core/benches/num/mod.rs b/library/core/benches/num/mod.rs index c1dc3a3062256..7ff7443cfa7fe 100644 --- a/library/core/benches/num/mod.rs +++ b/library/core/benches/num/mod.rs @@ -2,6 +2,7 @@ mod dec2flt; mod flt2dec; mod int_log; mod int_pow; +mod int_sqrt; use std::str::FromStr; diff --git a/library/core/tests/num/int_macros.rs b/library/core/tests/num/int_macros.rs index 7cd3b54e3f39a..830a96204ca03 100644 --- a/library/core/tests/num/int_macros.rs +++ b/library/core/tests/num/int_macros.rs @@ -288,38 +288,6 @@ macro_rules! int_module { assert_eq!(r.saturating_pow(0), 1 as $T); } - #[test] - fn test_isqrt() { - assert_eq!($T::MIN.checked_isqrt(), None); - assert_eq!((-1 as $T).checked_isqrt(), None); - assert_eq!((0 as $T).isqrt(), 0 as $T); - assert_eq!((1 as $T).isqrt(), 1 as $T); - assert_eq!((2 as $T).isqrt(), 1 as $T); - assert_eq!((99 as $T).isqrt(), 9 as $T); - assert_eq!((100 as $T).isqrt(), 10 as $T); - } - - #[cfg(not(miri))] // Miri is too slow - #[test] - fn test_lots_of_isqrt() { - let n_max: $T = (1024 * 1024).min($T::MAX as u128) as $T; - for n in 0..=n_max { - let isqrt: $T = n.isqrt(); - - assert!(isqrt.pow(2) <= n); - let (square, overflow) = (isqrt + 1).overflowing_pow(2); - assert!(overflow || square > n); - } - - for n in ($T::MAX - 127)..=$T::MAX { - let isqrt: $T = n.isqrt(); - - assert!(isqrt.pow(2) <= n); - let (square, overflow) = (isqrt + 1).overflowing_pow(2); - assert!(overflow || square > n); - } - } - #[test] fn test_div_floor() { let a: $T = 8; diff --git a/library/core/tests/num/int_sqrt.rs b/library/core/tests/num/int_sqrt.rs new file mode 100644 index 0000000000000..d68db0787d22c --- /dev/null +++ b/library/core/tests/num/int_sqrt.rs @@ -0,0 +1,248 @@ +macro_rules! tests { + ($isqrt_consistency_check_fn_macro:ident : $($T:ident)+) => { + $( + mod $T { + $isqrt_consistency_check_fn_macro!($T); + + // Check that the following produce the correct values from + // `isqrt`: + // + // * the first and last 128 nonnegative values + // * powers of two, minus one + // * powers of two + // + // For signed types, check that `checked_isqrt` and `isqrt` + // either produce the same numeric value or respectively + // produce `None` and a panic. Make sure to do a consistency + // check for `<$T>::MIN` as well, as no nonnegative values + // negate to it. + // + // For unsigned types check that `isqrt` produces the same + // numeric value for `$T` and `NonZero<$T>`. + #[test] + fn isqrt() { + isqrt_consistency_check(<$T>::MIN); + + for n in (0..=127) + .chain(<$T>::MAX - 127..=<$T>::MAX) + .chain((0..<$T>::MAX.count_ones()).map(|exponent| (1 << exponent) - 1)) + .chain((0..<$T>::MAX.count_ones()).map(|exponent| 1 << exponent)) + { + isqrt_consistency_check(n); + + let isqrt_n = n.isqrt(); + assert!( + isqrt_n + .checked_mul(isqrt_n) + .map(|isqrt_n_squared| isqrt_n_squared <= n) + .unwrap_or(false), + "`{n}.isqrt()` should be lower than {isqrt_n}." + ); + assert!( + (isqrt_n + 1) + .checked_mul(isqrt_n + 1) + .map(|isqrt_n_plus_1_squared| n < isqrt_n_plus_1_squared) + .unwrap_or(true), + "`{n}.isqrt()` should be higher than {isqrt_n})." + ); + } + } + + // Check the square roots of: + // + // * the first 1,024 perfect squares + // * halfway between each of the first 1,024 perfect squares + // and the next perfect square + // * the next perfect square after the each of the first 1,024 + // perfect squares, minus one + // * the last 1,024 perfect squares + // * the last 1,024 perfect squares, minus one + // * halfway between each of the last 1,024 perfect squares + // and the previous perfect square + #[test] + // Skip this test on Miri, as it takes too long to run. + #[cfg(not(miri))] + fn isqrt_extended() { + // The correct value is worked out by using the fact that + // the nth nonzero perfect square is the sum of the first n + // odd numbers: + // + // 1 = 1 + // 4 = 1 + 3 + // 9 = 1 + 3 + 5 + // 16 = 1 + 3 + 5 + 7 + // + // Note also that the last odd number added in is two times + // the square root of the previous perfect square, plus + // one: + // + // 1 = 2*0 + 1 + // 3 = 2*1 + 1 + // 5 = 2*2 + 1 + // 7 = 2*3 + 1 + // + // That means we can add the square root of this perfect + // square once to get about halfway to the next perfect + // square, then we can add the square root of this perfect + // square again to get to the next perfect square, minus + // one, then we can add one to get to the next perfect + // square. + // + // This allows us to, for each of the first 1,024 perfect + // squares, test that the square roots of the following are + // all correct and equal to each other: + // + // * the current perfect square + // * about halfway to the next perfect square + // * the next perfect square, minus one + let mut n: $T = 0; + for sqrt_n in 0..1_024.min((1_u128 << (<$T>::MAX.count_ones()/2)) - 1) as $T { + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "`{sqrt_n}.pow(2).isqrt()` should be {sqrt_n}." + ); + + n += sqrt_n; + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "{n} is about halfway between `{sqrt_n}.pow(2)` and `{}.pow(2)`, so `{n}.isqrt()` should be {sqrt_n}.", + sqrt_n + 1 + ); + + n += sqrt_n; + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "`({}.pow(2) - 1).isqrt()` should be {sqrt_n}.", + sqrt_n + 1 + ); + + n += 1; + } + + // Similarly, for each of the last 1,024 perfect squares, + // check: + // + // * the current perfect square + // * the current perfect square, minus one + // * about halfway to the previous perfect square + // + // `MAX`'s `isqrt` return value is verified in the `isqrt` + // test function above. + let maximum_sqrt = <$T>::MAX.isqrt(); + let mut n = maximum_sqrt * maximum_sqrt; + + for sqrt_n in (maximum_sqrt - 1_024.min((1_u128 << (<$T>::MAX.count_ones()/2)) - 1) as $T..maximum_sqrt).rev() { + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n + 1, + "`{0}.pow(2).isqrt()` should be {0}.", + sqrt_n + 1 + ); + + n -= 1; + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "`({}.pow(2) - 1).isqrt()` should be {sqrt_n}.", + sqrt_n + 1 + ); + + n -= sqrt_n; + isqrt_consistency_check(n); + assert_eq!( + n.isqrt(), + sqrt_n, + "{n} is about halfway between `{sqrt_n}.pow(2)` and `{}.pow(2)`, so `{n}.isqrt()` should be {sqrt_n}.", + sqrt_n + 1 + ); + + n -= sqrt_n; + } + } + } + )* + }; +} + +macro_rules! signed_check { + ($T:ident) => { + /// This takes an input and, if it's nonnegative or + #[doc = concat!("`", stringify!($T), "::MIN`,")] + /// checks that `isqrt` and `checked_isqrt` produce equivalent results + /// for that input and for the negative of that input. + /// + /// # Note + /// + /// This cannot check that negative inputs to `isqrt` cause panics if + /// panics abort instead of unwind. + fn isqrt_consistency_check(n: $T) { + // `<$T>::MIN` will be negative, so ignore it in this nonnegative + // section. + if n >= 0 { + assert_eq!( + Some(n.isqrt()), + n.checked_isqrt(), + "`{n}.checked_isqrt()` should match `Some({n}.isqrt())`.", + ); + } + + // `wrapping_neg` so that `<$T>::MIN` will negate to itself rather + // than panicking. + let negative_n = n.wrapping_neg(); + + // Zero negated will still be nonnegative, so ignore it in this + // negative section. + if negative_n < 0 { + assert_eq!( + negative_n.checked_isqrt(), + None, + "`({negative_n}).checked_isqrt()` should be `None`, as {negative_n} is negative.", + ); + + // `catch_unwind` only works when panics unwind rather than abort. + #[cfg(panic = "unwind")] + { + std::panic::catch_unwind(core::panic::AssertUnwindSafe(|| (-n).isqrt())).expect_err( + &format!("`({negative_n}).isqrt()` should have panicked, as {negative_n} is negative.") + ); + } + } + } + }; +} + +macro_rules! unsigned_check { + ($T:ident) => { + /// This takes an input and, if it's nonzero, checks that `isqrt` + /// produces the same numeric value for both + #[doc = concat!("`", stringify!($T), "` and ")] + #[doc = concat!("`NonZero<", stringify!($T), ">`.")] + fn isqrt_consistency_check(n: $T) { + // Zero cannot be turned into a `NonZero` value, so ignore it in + // this nonzero section. + if n > 0 { + assert_eq!( + n.isqrt(), + core::num::NonZero::<$T>::new(n) + .expect( + "Was not able to create a new `NonZero` value from a nonzero number." + ) + .isqrt() + .get(), + "`{n}.isqrt` should match `NonZero`'s `{n}.isqrt().get()`.", + ); + } + } + }; +} + +tests!(signed_check: i8 i16 i32 i64 i128); +tests!(unsigned_check: u8 u16 u32 u64 u128); diff --git a/library/core/tests/num/mod.rs b/library/core/tests/num/mod.rs index dad46ad88fe19..b14fe0b22c311 100644 --- a/library/core/tests/num/mod.rs +++ b/library/core/tests/num/mod.rs @@ -27,6 +27,7 @@ mod const_from; mod dec2flt; mod flt2dec; mod int_log; +mod int_sqrt; mod ops; mod wrapping; From 7af8e218da9d7d0b4b1c244ecd192bac8c847b1b Mon Sep 17 00:00:00 2001 From: "Chai T. Rex" Date: Mon, 26 Aug 2024 01:36:25 -0400 Subject: [PATCH 2/2] Speed up `checked_isqrt` and `isqrt` methods * Use a lookup table for 8-bit integers and the Karatsuba square root algorithm for larger integers. * Include optimization hints that give the compiler the exact numeric range of results. --- library/core/src/num/int_macros.rs | 36 +++- library/core/src/num/int_sqrt.rs | 316 ++++++++++++++++++++++++++++ library/core/src/num/mod.rs | 1 + library/core/src/num/nonzero.rs | 33 +-- library/core/src/num/uint_macros.rs | 20 +- 5 files changed, 371 insertions(+), 35 deletions(-) create mode 100644 library/core/src/num/int_sqrt.rs diff --git a/library/core/src/num/int_macros.rs b/library/core/src/num/int_macros.rs index 229cd89bea3f0..f3d38b25ab4fe 100644 --- a/library/core/src/num/int_macros.rs +++ b/library/core/src/num/int_macros.rs @@ -1641,7 +1641,33 @@ macro_rules! int_impl { if self < 0 { None } else { - Some((self as $UnsignedT).isqrt() as Self) + // SAFETY: Input is nonnegative in this `else` branch. + let result = unsafe { + crate::num::int_sqrt::$ActualT(self as $ActualT) as $SelfT + }; + + // Inform the optimizer what the range of outputs is. If + // testing `core` crashes with no panic message and a + // `num::int_sqrt::i*` test failed, it's because your edits + // caused these assertions to become false. + // + // SAFETY: Integer square root is a monotonically nondecreasing + // function, which means that increasing the input will never + // cause the output to decrease. Thus, since the input for + // nonnegative signed integers is bounded by + // `[0, <$ActualT>::MAX]`, sqrt(n) will be bounded by + // `[sqrt(0), sqrt(<$ActualT>::MAX)]`. + unsafe { + // SAFETY: `<$ActualT>::MAX` is nonnegative. + const MAX_RESULT: $SelfT = unsafe { + crate::num::int_sqrt::$ActualT(<$ActualT>::MAX) as $SelfT + }; + + crate::hint::assert_unchecked(result >= 0); + crate::hint::assert_unchecked(result <= MAX_RESULT); + } + + Some(result) } } @@ -2862,15 +2888,11 @@ macro_rules! int_impl { #[must_use = "this returns the result of the operation, \ without modifying the original"] #[inline] + #[track_caller] pub const fn isqrt(self) -> Self { - // I would like to implement it as - // ``` - // self.checked_isqrt().expect("argument of integer square root must be non-negative") - // ``` - // but `expect` is not yet stable as a `const fn`. match self.checked_isqrt() { Some(sqrt) => sqrt, - None => panic!("argument of integer square root must be non-negative"), + None => crate::num::int_sqrt::panic_for_negative_argument(), } } diff --git a/library/core/src/num/int_sqrt.rs b/library/core/src/num/int_sqrt.rs new file mode 100644 index 0000000000000..601e81f69930f --- /dev/null +++ b/library/core/src/num/int_sqrt.rs @@ -0,0 +1,316 @@ +//! These functions use the [Karatsuba square root algorithm][1] to compute the +//! [integer square root](https://en.wikipedia.org/wiki/Integer_square_root) +//! for the primitive integer types. +//! +//! The signed integer functions can only handle **nonnegative** inputs, so +//! that must be checked before calling those. +//! +//! [1]: +//! "Paul Zimmermann. Karatsuba Square Root. \[Research Report\] RR-3805, +//! INRIA. 1999, pp.8. (inria-00072854)" + +/// This array stores the [integer square roots]( +/// https://en.wikipedia.org/wiki/Integer_square_root) and remainders of each +/// [`u8`](prim@u8) value. For example, `U8_ISQRT_WITH_REMAINDER[17]` will be +/// `(4, 1)` because the integer square root of 17 is 4 and because 17 is 1 +/// higher than 4 squared. +const U8_ISQRT_WITH_REMAINDER: [(u8, u8); 256] = { + let mut result = [(0, 0); 256]; + + let mut n: usize = 0; + let mut isqrt_n: usize = 0; + while n < result.len() { + result[n] = (isqrt_n as u8, (n - isqrt_n.pow(2)) as u8); + + n += 1; + if n == (isqrt_n + 1).pow(2) { + isqrt_n += 1; + } + } + + result +}; + +/// Returns the [integer square root]( +/// https://en.wikipedia.org/wiki/Integer_square_root) of any [`u8`](prim@u8) +/// input. +#[must_use = "this returns the result of the operation, \ + without modifying the original"] +#[inline] +pub const fn u8(n: u8) -> u8 { + U8_ISQRT_WITH_REMAINDER[n as usize].0 +} + +/// Generates an `i*` function that returns the [integer square root]( +/// https://en.wikipedia.org/wiki/Integer_square_root) of any **nonnegative** +/// input of a specific signed integer type. +macro_rules! signed_fn { + ($SignedT:ident, $UnsignedT:ident) => { + /// Returns the [integer square root]( + /// https://en.wikipedia.org/wiki/Integer_square_root) of any + /// **nonnegative** + #[doc = concat!("[`", stringify!($SignedT), "`](prim@", stringify!($SignedT), ")")] + /// input. + /// + /// # Safety + /// + /// This results in undefined behavior when the input is negative. + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub const unsafe fn $SignedT(n: $SignedT) -> $SignedT { + debug_assert!(n >= 0, "Negative input inside `isqrt`."); + $UnsignedT(n as $UnsignedT) as $SignedT + } + }; +} + +signed_fn!(i8, u8); +signed_fn!(i16, u16); +signed_fn!(i32, u32); +signed_fn!(i64, u64); +signed_fn!(i128, u128); + +/// Generates a `u*` function that returns the [integer square root]( +/// https://en.wikipedia.org/wiki/Integer_square_root) of any input of +/// a specific unsigned integer type. +macro_rules! unsigned_fn { + ($UnsignedT:ident, $HalfBitsT:ident, $stages:ident) => { + /// Returns the [integer square root]( + /// https://en.wikipedia.org/wiki/Integer_square_root) of any + #[doc = concat!("[`", stringify!($UnsignedT), "`](prim@", stringify!($UnsignedT), ")")] + /// input. + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub const fn $UnsignedT(mut n: $UnsignedT) -> $UnsignedT { + if n <= <$HalfBitsT>::MAX as $UnsignedT { + $HalfBitsT(n as $HalfBitsT) as $UnsignedT + } else { + // The normalization shift satisfies the Karatsuba square root + // algorithm precondition "a₃ ≥ b/4" where a₃ is the most + // significant quarter of `n`'s bits and b is the number of + // values that can be represented by that quarter of the bits. + // + // b/4 would then be all 0s except the second most significant + // bit (010...0) in binary. Since a₃ must be at least b/4, a₃'s + // most significant bit or its neighbor must be a 1. Since a₃'s + // most significant bits are `n`'s most significant bits, the + // same applies to `n`. + // + // The reason to shift by an even number of bits is because an + // even number of bits produces the square root shifted to the + // left by half of the normalization shift: + // + // sqrt(n << (2 * p)) + // sqrt(2.pow(2 * p) * n) + // sqrt(2.pow(2 * p)) * sqrt(n) + // 2.pow(p) * sqrt(n) + // sqrt(n) << p + // + // Shifting by an odd number of bits leaves an ugly sqrt(2) + // multiplied in: + // + // sqrt(n << (2 * p + 1)) + // sqrt(2.pow(2 * p + 1) * n) + // sqrt(2 * 2.pow(2 * p) * n) + // sqrt(2) * sqrt(2.pow(2 * p)) * sqrt(n) + // sqrt(2) * 2.pow(p) * sqrt(n) + // sqrt(2) * (sqrt(n) << p) + const EVEN_MAKING_BITMASK: u32 = !1; + let normalization_shift = n.leading_zeros() & EVEN_MAKING_BITMASK; + n <<= normalization_shift; + + let s = $stages(n); + + let denormalization_shift = normalization_shift >> 1; + s >> denormalization_shift + } + } + }; +} + +/// Generates the first stage of the computation after normalization. +/// +/// # Safety +/// +/// `$n` must be nonzero. +macro_rules! first_stage { + ($original_bits:literal, $n:ident) => {{ + debug_assert!($n != 0, "`$n` is zero in `first_stage!`."); + + const N_SHIFT: u32 = $original_bits - 8; + let n = $n >> N_SHIFT; + + let (s, r) = U8_ISQRT_WITH_REMAINDER[n as usize]; + + // Inform the optimizer that `s` is nonzero. This will allow it to + // avoid generating code to handle division-by-zero panics in the next + // stage. + // + // SAFETY: If the original `$n` is zero, the top of the `unsigned_fn` + // macro recurses instead of continuing to this point, so the original + // `$n` wasn't a 0 if we've reached here. + // + // Then the `unsigned_fn` macro normalizes `$n` so that at least one of + // its two most-significant bits is a 1. + // + // Then this stage puts the eight most-significant bits of `$n` into + // `n`. This means that `n` here has at least one 1 bit in its two + // most-significant bits, making `n` nonzero. + // + // `U8_ISQRT_WITH_REMAINDER[n as usize]` will give a nonzero `s` when + // given a nonzero `n`. + unsafe { crate::hint::assert_unchecked(s != 0) }; + (s, r) + }}; +} + +/// Generates a middle stage of the computation. +/// +/// # Safety +/// +/// `$s` must be nonzero. +macro_rules! middle_stage { + ($original_bits:literal, $ty:ty, $n:ident, $s:ident, $r:ident) => {{ + debug_assert!($s != 0, "`$s` is zero in `middle_stage!`."); + + const N_SHIFT: u32 = $original_bits - <$ty>::BITS; + let n = ($n >> N_SHIFT) as $ty; + + const HALF_BITS: u32 = <$ty>::BITS >> 1; + const QUARTER_BITS: u32 = <$ty>::BITS >> 2; + const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1; + const LOWEST_QUARTER_1_BITS: $ty = (1 << QUARTER_BITS) - 1; + + let lo = n & LOWER_HALF_1_BITS; + let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS); + let denominator = ($s as $ty) << 1; + let q = numerator / denominator; + let u = numerator % denominator; + + let mut s = ($s << QUARTER_BITS) as $ty + q; + let (mut r, overflow) = + ((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q); + if overflow { + r = r.wrapping_add(2 * s - 1); + s -= 1; + } + + // Inform the optimizer that `s` is nonzero. This will allow it to + // avoid generating code to handle division-by-zero panics in the next + // stage. + // + // SAFETY: If the original `$n` is zero, the top of the `unsigned_fn` + // macro recurses instead of continuing to this point, so the original + // `$n` wasn't a 0 if we've reached here. + // + // Then the `unsigned_fn` macro normalizes `$n` so that at least one of + // its two most-significant bits is a 1. + // + // Then these stages take as many of the most-significant bits of `$n` + // as will fit in this stage's type. For example, the stage that + // handles `u32` deals with the 32 most-significant bits of `$n`. This + // means that each stage has at least one 1 bit in `n`'s two + // most-significant bits, making `n` nonzero. + // + // Then this stage will produce the correct integer square root for + // that `n` value. Since `n` is nonzero, `s` will also be nonzero. + unsafe { crate::hint::assert_unchecked(s != 0) }; + (s, r) + }}; +} + +/// Generates the last stage of the computation before denormalization. +/// +/// # Safety +/// +/// `$s` must be nonzero. +macro_rules! last_stage { + ($ty:ty, $n:ident, $s:ident, $r:ident) => {{ + debug_assert!($s != 0, "`$s` is zero in `last_stage!`."); + + const HALF_BITS: u32 = <$ty>::BITS >> 1; + const QUARTER_BITS: u32 = <$ty>::BITS >> 2; + const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1; + + let lo = $n & LOWER_HALF_1_BITS; + let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS); + let denominator = ($s as $ty) << 1; + + let q = numerator / denominator; + let mut s = ($s << QUARTER_BITS) as $ty + q; + let (s_squared, overflow) = s.overflowing_mul(s); + if overflow || s_squared > $n { + s -= 1; + } + s + }}; +} + +/// Takes the normalized [`u16`](prim@u16) input and gets its normalized +/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root). +/// +/// # Safety +/// +/// `n` must be nonzero. +#[inline] +const fn u16_stages(n: u16) -> u16 { + let (s, r) = first_stage!(16, n); + last_stage!(u16, n, s, r) +} + +/// Takes the normalized [`u32`](prim@u32) input and gets its normalized +/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root). +/// +/// # Safety +/// +/// `n` must be nonzero. +#[inline] +const fn u32_stages(n: u32) -> u32 { + let (s, r) = first_stage!(32, n); + let (s, r) = middle_stage!(32, u16, n, s, r); + last_stage!(u32, n, s, r) +} + +/// Takes the normalized [`u64`](prim@u64) input and gets its normalized +/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root). +/// +/// # Safety +/// +/// `n` must be nonzero. +#[inline] +const fn u64_stages(n: u64) -> u64 { + let (s, r) = first_stage!(64, n); + let (s, r) = middle_stage!(64, u16, n, s, r); + let (s, r) = middle_stage!(64, u32, n, s, r); + last_stage!(u64, n, s, r) +} + +/// Takes the normalized [`u128`](prim@u128) input and gets its normalized +/// [integer square root](https://en.wikipedia.org/wiki/Integer_square_root). +/// +/// # Safety +/// +/// `n` must be nonzero. +#[inline] +const fn u128_stages(n: u128) -> u128 { + let (s, r) = first_stage!(128, n); + let (s, r) = middle_stage!(128, u16, n, s, r); + let (s, r) = middle_stage!(128, u32, n, s, r); + let (s, r) = middle_stage!(128, u64, n, s, r); + last_stage!(u128, n, s, r) +} + +unsigned_fn!(u16, u8, u16_stages); +unsigned_fn!(u32, u16, u32_stages); +unsigned_fn!(u64, u32, u64_stages); +unsigned_fn!(u128, u64, u128_stages); + +/// Instantiate this panic logic once, rather than for all the isqrt methods +/// on every single primitive type. +#[cold] +#[track_caller] +pub const fn panic_for_negative_argument() -> ! { + panic!("argument of integer square root cannot be negative") +} diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 309e1ba958aee..e9e5324666ada 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -41,6 +41,7 @@ mod uint_macros; // import uint_impl! mod error; mod int_log10; +mod int_sqrt; mod nonzero; mod overflow_panic; mod saturating; diff --git a/library/core/src/num/nonzero.rs b/library/core/src/num/nonzero.rs index c6e9c249048a7..8b888f12da0b1 100644 --- a/library/core/src/num/nonzero.rs +++ b/library/core/src/num/nonzero.rs @@ -7,7 +7,7 @@ use crate::marker::{Freeze, StructuralPartialEq}; use crate::ops::{BitOr, BitOrAssign, Div, DivAssign, Neg, Rem, RemAssign}; use crate::panic::{RefUnwindSafe, UnwindSafe}; use crate::str::FromStr; -use crate::{fmt, hint, intrinsics, ptr, ub_checks}; +use crate::{fmt, intrinsics, ptr, ub_checks}; /// A marker trait for primitive types which can be zero. /// @@ -1545,31 +1545,14 @@ macro_rules! nonzero_integer_signedness_dependent_methods { without modifying the original"] #[inline] pub const fn isqrt(self) -> Self { - // The algorithm is based on the one presented in - // - // which cites as source the following C code: - // . - - let mut op = self.get(); - let mut res = 0; - let mut one = 1 << (self.ilog2() & !1); - - while one != 0 { - if op >= res + one { - op -= res + one; - res = (res >> 1) + one; - } else { - res >>= 1; - } - one >>= 2; - } + let result = self.get().isqrt(); - // SAFETY: The result fits in an integer with half as many bits. - // Inform the optimizer about it. - unsafe { hint::assert_unchecked(res < 1 << (Self::BITS / 2)) }; - - // SAFETY: The square root of an integer >= 1 is always >= 1. - unsafe { Self::new_unchecked(res) } + // SAFETY: Integer square root is a monotonically nondecreasing + // function, which means that increasing the input will never cause + // the output to decrease. Thus, since the input for nonzero + // unsigned integers has a lower bound of 1, the lower bound of the + // results will be sqrt(1), which is 1, so a result can't be zero. + unsafe { Self::new_unchecked(result) } } }; diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index 0d0bbc5256f78..d9036abecc592 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -2762,10 +2762,24 @@ macro_rules! uint_impl { without modifying the original"] #[inline] pub const fn isqrt(self) -> Self { - match NonZero::new(self) { - Some(x) => x.isqrt().get(), - None => 0, + let result = crate::num::int_sqrt::$ActualT(self as $ActualT) as $SelfT; + + // Inform the optimizer what the range of outputs is. If testing + // `core` crashes with no panic message and a `num::int_sqrt::u*` + // test failed, it's because your edits caused these assertions or + // the assertions in `fn isqrt` of `nonzero.rs` to become false. + // + // SAFETY: Integer square root is a monotonically nondecreasing + // function, which means that increasing the input will never + // cause the output to decrease. Thus, since the input for unsigned + // integers is bounded by `[0, <$ActualT>::MAX]`, sqrt(n) will be + // bounded by `[sqrt(0), sqrt(<$ActualT>::MAX)]`. + unsafe { + const MAX_RESULT: $SelfT = crate::num::int_sqrt::$ActualT(<$ActualT>::MAX) as $SelfT; + crate::hint::assert_unchecked(result <= MAX_RESULT); } + + result } /// Performs Euclidean division.