From f792e480ab3720d5b156910716c0aceb360c0b45 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 7 Apr 2018 17:08:47 +0200 Subject: [PATCH 1/6] Add simd_support feature --- Cargo.toml | 3 ++- src/lib.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0985f27ae90..440ac546fd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,10 +19,11 @@ appveyor = { repository = "alexcrichton/rand" } [features] default = ["std" ] # without "std" rand uses libcore -nightly = ["i128_support"] # enables all features requiring nightly rust +nightly = ["i128_support", "simd_support"] # enables all features requiring nightly rust std = ["rand_core/std", "alloc", "libc", "winapi", "cloudabi", "fuchsia-zircon"] alloc = ["rand_core/alloc"] # enables Vec and Box support (without std) i128_support = [] # enables i128 and u128 support +simd_support = [] # enables SIMD support serde1 = ["serde", "serde_derive", "rand_core/serde1"] # enables serialization for PRNGs [workspace] diff --git a/src/lib.rs b/src/lib.rs index e75b72992bd..77ecf38f537 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -233,6 +233,7 @@ #![cfg_attr(all(feature="alloc", not(feature="std")), feature(alloc))] #![cfg_attr(all(feature="i128_support", feature="nightly"), allow(stable_features))] // stable since 2018-03-27 #![cfg_attr(all(feature="i128_support", feature="nightly"), feature(i128_type, i128))] +#![cfg_attr(all(feature="simd_support", feature="nightly"), feature(stdsimd))] #![cfg_attr(feature = "stdweb", recursion_limit="128")] #[cfg(feature="std")] extern crate std as core; From 38b9a29516dbba0e6891aa9b3c223c924dc7697f Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 6 Apr 2018 21:52:01 +0200 Subject: [PATCH 2/6] Implement Standard for SIMD integer types --- src/distributions/integer.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/distributions/integer.rs b/src/distributions/integer.rs index a23ddd51aba..82efd9ba795 100644 --- a/src/distributions/integer.rs +++ b/src/distributions/integer.rs @@ -12,6 +12,8 @@ use {Rng}; use distributions::{Distribution, Standard}; +#[cfg(feature="simd_support")] +use core::simd::*; impl Distribution for Standard { #[inline] @@ -84,6 +86,39 @@ impl_int_from_uint! { i64, u64 } #[cfg(feature = "i128_support")] impl_int_from_uint! { i128, u128 } impl_int_from_uint! { isize, usize } +#[cfg(feature="simd_support")] +macro_rules! simd_impl { + ($bits:expr,) => {}; + ($bits:expr, $ty:ty, $($ty_more:ty,)*) => { + simd_impl!($bits, $($ty_more,)*); + + impl Distribution<$ty> for Standard { + #[inline] + fn sample(&self, rng: &mut R) -> $ty { + let mut vec = Default::default(); + unsafe { + let ptr = &mut vec; + let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; $bits/8]); + rng.fill_bytes(b_ptr); + } + vec + } + } + } +} + +#[cfg(feature="simd_support")] +simd_impl!(16, u8x2, i8x2,); +#[cfg(feature="simd_support")] +simd_impl!(32, u8x4, i8x4, u16x2, i16x2,); +#[cfg(feature="simd_support")] +simd_impl!(64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,); +#[cfg(feature="simd_support")] +simd_impl!(128, u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2,); +#[cfg(feature="simd_support")] +simd_impl!(256, u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4,); +#[cfg(feature="simd_support")] +simd_impl!(512, u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8,); #[cfg(test)] mod tests { From d80efb2bf17a1e410b582c2e67f310d7968e6f7e Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 7 Apr 2018 17:02:36 +0200 Subject: [PATCH 3/6] Implement Standard for SIMD float types --- src/distributions/float.rs | 198 +++++++++++++++++++++++++++++-------- 1 file changed, 155 insertions(+), 43 deletions(-) diff --git a/src/distributions/float.rs b/src/distributions/float.rs index 0058122443c..9cdd8a9e145 100644 --- a/src/distributions/float.rs +++ b/src/distributions/float.rs @@ -13,6 +13,8 @@ use core::mem; use Rng; use distributions::{Distribution, Standard}; +#[cfg(feature="simd_support")] +use core::simd::*; /// A distribution to sample floating point numbers uniformly in the half-open /// interval `(0, 1]`, i.e. including 1 but not 0. @@ -144,63 +146,173 @@ float_impls! { f32, u32, 23, 127 } float_impls! { f64, u64, 52, 1023 } +#[cfg(feature="simd_support")] +macro_rules! simd_float_impls { + ($ty:ident, $uty:ident, $f_scalar:ty, $u_scalar:ty, + $fraction_bits:expr, $exponent_bias:expr) => { + impl IntoFloat for $uty { + type F = $ty; + #[inline(always)] + fn into_float_with_exponent(self, exponent: i32) -> $ty { + // The exponent is encoded using an offset-binary representation + let exponent_bits: $u_scalar = + (($exponent_bias + exponent) as $u_scalar) << $fraction_bits; + unsafe { mem::transmute(self | $uty::splat(exponent_bits)) } + } + } + + impl Distribution<$ty> for Standard { + fn sample(&self, rng: &mut R) -> $ty { + // Multiply-based method; 24/53 random bits; [0, 1) interval. + // We use the most significant bits because for simple RNGs + // those are usually more random. + let float_size = mem::size_of::<$f_scalar>() * 8; + let precision = $fraction_bits + 1; + let scale = $ty::splat(1.0 / ((1 as $u_scalar << precision) as $f_scalar)); + + let value: $uty = rng.gen(); + let value = $ty::from(value >> (float_size - precision)); + scale * value + } + } + + impl Distribution<$ty> for OpenClosed01 { + fn sample(&self, rng: &mut R) -> $ty { + // Multiply-based method; 24/53 random bits; (0, 1] interval. + // We use the most significant bits because for simple RNGs + // those are usually more random. + let float_size = mem::size_of::<$f_scalar>() * 8; + let precision = $fraction_bits + 1; + let scale = $ty::splat(1.0 / ((1 as $u_scalar << precision) as $f_scalar)); + + let value: $uty = rng.gen(); + // Add 1 to shift up; will not overflow because of right-shift: + let value = $ty::from((value >> (float_size - precision)) + 1); + scale * value + } + } + + impl Distribution<$ty> for Open01 { + fn sample(&self, rng: &mut R) -> $ty { + // Transmute-based method; 23/52 random bits; (0, 1) interval. + // We use the most significant bits because for simple RNGs + // those are usually more random. + const EPSILON: $f_scalar = 1.0 / (1u64 << $fraction_bits) as $f_scalar; + let float_size = mem::size_of::<$f_scalar>() * 8; + + let value: $uty = rng.gen(); + let fraction = value >> (float_size - $fraction_bits); + fraction.into_float_with_exponent(0) - $ty::splat(1.0 - EPSILON / 2.0) + } + } + } +} + +#[cfg(feature="simd_support")] +simd_float_impls! { f32x2, u32x2, f32, u32, 23, 127 } +#[cfg(feature="simd_support")] +simd_float_impls! { f32x4, u32x4, f32, u32, 23, 127 } +#[cfg(feature="simd_support")] +simd_float_impls! { f32x8, u32x8, f32, u32, 23, 127 } +#[cfg(feature="simd_support")] +simd_float_impls! { f32x16, u32x16, f32, u32, 23, 127 } + +#[cfg(feature="simd_support")] +simd_float_impls! { f64x2, u64x2, f64, u64, 52, 1023 } +#[cfg(feature="simd_support")] +simd_float_impls! { f64x4, u64x4, f64, u64, 52, 1023 } +#[cfg(feature="simd_support")] +simd_float_impls! { f64x8, u64x8, f64, u64, 52, 1023 } + + #[cfg(test)] mod tests { use Rng; use distributions::{Open01, OpenClosed01}; use rngs::mock::StepRng; + #[cfg(feature="simd_support")] + use core::simd::*; const EPSILON32: f32 = ::core::f32::EPSILON; const EPSILON64: f64 = ::core::f64::EPSILON; - #[test] - fn standard_fp_edge_cases() { - let mut zeros = StepRng::new(0, 0); - assert_eq!(zeros.gen::(), 0.0); - assert_eq!(zeros.gen::(), 0.0); - - let mut one32 = StepRng::new(1 << 8, 0); - assert_eq!(one32.gen::(), EPSILON32 / 2.0); - - let mut one64 = StepRng::new(1 << 11, 0); - assert_eq!(one64.gen::(), EPSILON64 / 2.0); - - let mut max = StepRng::new(!0, 0); - assert_eq!(max.gen::(), 1.0 - EPSILON32 / 2.0); - assert_eq!(max.gen::(), 1.0 - EPSILON64 / 2.0); - } - - #[test] - fn openclosed01_edge_cases() { - let mut zeros = StepRng::new(0, 0); - assert_eq!(zeros.sample::(OpenClosed01), 0.0 + EPSILON32 / 2.0); - assert_eq!(zeros.sample::(OpenClosed01), 0.0 + EPSILON64 / 2.0); - - let mut one32 = StepRng::new(1 << 8, 0); - assert_eq!(one32.sample::(OpenClosed01), EPSILON32); + macro_rules! test_f32 { + ($fnn:ident, $ty:ident, $ZERO:expr, $EPSILON:expr) => { + #[test] + fn $fnn() { + // Standard + let mut zeros = StepRng::new(0, 0); + assert_eq!(zeros.gen::<$ty>(), $ZERO); + let mut one = StepRng::new(1 << 8 | 1 << (8 + 32), 0); + assert_eq!(one.gen::<$ty>(), $EPSILON / 2.0); + let mut max = StepRng::new(!0, 0); + assert_eq!(max.gen::<$ty>(), 1.0 - $EPSILON / 2.0); - let mut one64 = StepRng::new(1 << 11, 0); - assert_eq!(one64.sample::(OpenClosed01), EPSILON64); + // OpenClosed01 + let mut zeros = StepRng::new(0, 0); + assert_eq!(zeros.sample::<$ty, _>(OpenClosed01), + 0.0 + $EPSILON / 2.0); + let mut one = StepRng::new(1 << 8 | 1 << (8 + 32), 0); + assert_eq!(one.sample::<$ty, _>(OpenClosed01), $EPSILON); + let mut max = StepRng::new(!0, 0); + assert_eq!(max.sample::<$ty, _>(OpenClosed01), $ZERO + 1.0); - let mut max = StepRng::new(!0, 0); - assert_eq!(max.sample::(OpenClosed01), 1.0); - assert_eq!(max.sample::(OpenClosed01), 1.0); + // Open01 + let mut zeros = StepRng::new(0, 0); + assert_eq!(zeros.sample::<$ty, _>(Open01), 0.0 + $EPSILON / 2.0); + let mut one = StepRng::new(1 << 9 | 1 << (9 + 32), 0); + assert_eq!(one.sample::<$ty, _>(Open01), $EPSILON / 2.0 * 3.0); + let mut max = StepRng::new(!0, 0); + assert_eq!(max.sample::<$ty, _>(Open01), 1.0 - $EPSILON / 2.0); + } + } } + test_f32! { f32_edge_cases, f32, 0.0, ::core::f32::EPSILON } + #[cfg(feature="simd_support")] + test_f32! { f32x2_edge_cases, f32x2, f32x2::splat(0.0), f32x2::splat(EPSILON32) } + #[cfg(feature="simd_support")] + test_f32! { f32x4_edge_cases, f32x4, f32x4::splat(0.0), f32x4::splat(EPSILON32) } + #[cfg(feature="simd_support")] + test_f32! { f32x8_edge_cases, f32x8, f32x8::splat(0.0), f32x8::splat(EPSILON32) } + #[cfg(feature="simd_support")] + test_f32! { f32x16_edge_cases, f32x16, f32x16::splat(0.0), f32x16::splat(EPSILON32) } - #[test] - fn open01_edge_cases() { - let mut zeros = StepRng::new(0, 0); - assert_eq!(zeros.sample::(Open01), 0.0 + EPSILON32 / 2.0); - assert_eq!(zeros.sample::(Open01), 0.0 + EPSILON64 / 2.0); + macro_rules! test_f64 { + ($fnn:ident, $ty:ident, $ZERO:expr, $EPSILON:expr) => { + #[test] + fn $fnn() { + // Standard + let mut zeros = StepRng::new(0, 0); + assert_eq!(zeros.gen::<$ty>(), $ZERO); + let mut one = StepRng::new(1 << 11, 0); + assert_eq!(one.gen::<$ty>(), $EPSILON / 2.0); + let mut max = StepRng::new(!0, 0); + assert_eq!(max.gen::<$ty>(), 1.0 - $EPSILON / 2.0); - let mut one32 = StepRng::new(1 << 9, 0); - assert_eq!(one32.sample::(Open01), EPSILON32 / 2.0 * 3.0); + // OpenClosed01 + let mut zeros = StepRng::new(0, 0); + assert_eq!(zeros.sample::<$ty, _>(OpenClosed01), + 0.0 + $EPSILON / 2.0); + let mut one = StepRng::new(1 << 11, 0); + assert_eq!(one.sample::<$ty, _>(OpenClosed01), $EPSILON); + let mut max = StepRng::new(!0, 0); + assert_eq!(max.sample::<$ty, _>(OpenClosed01), $ZERO + 1.0); - let mut one64 = StepRng::new(1 << 12, 0); - assert_eq!(one64.sample::(Open01), EPSILON64 / 2.0 * 3.0); - - let mut max = StepRng::new(!0, 0); - assert_eq!(max.sample::(Open01), 1.0 - EPSILON32 / 2.0); - assert_eq!(max.sample::(Open01), 1.0 - EPSILON64 / 2.0); + // Open01 + let mut zeros = StepRng::new(0, 0); + assert_eq!(zeros.sample::<$ty, _>(Open01), 0.0 + $EPSILON / 2.0); + let mut one = StepRng::new(1 << 12, 0); + assert_eq!(one.sample::<$ty, _>(Open01), $EPSILON / 2.0 * 3.0); + let mut max = StepRng::new(!0, 0); + assert_eq!(max.sample::<$ty, _>(Open01), 1.0 - $EPSILON / 2.0); + } + } } + test_f64! { f64_edge_cases, f64, 0.0, EPSILON64 } + #[cfg(feature="simd_support")] + test_f64! { f64x2_edge_cases, f64x2, f64x2::splat(0.0), f64x2::splat(EPSILON64) } + #[cfg(feature="simd_support")] + test_f64! { f64x4_edge_cases, f64x4, f64x4::splat(0.0), f64x4::splat(EPSILON64) } + #[cfg(feature="simd_support")] + test_f64! { f64x8_edge_cases, f64x8, f64x8::splat(0.0), f64x8::splat(EPSILON64) } } From 209836f7c31aa04be3113c7fde5b82c9be8e6e84 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 7 Apr 2018 19:44:50 +0200 Subject: [PATCH 4/6] Implement Uniform for SIMD float types --- src/distributions/uniform.rs | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index 98bca748d01..1a2e660a7d2 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -117,6 +117,9 @@ use Rng; use distributions::Distribution; use distributions::float::IntoFloat; +#[cfg(feature="simd_support")] +use core::simd::*; + /// Sample values uniformly between two bounds. /// /// [`Uniform::new`] and [`Uniform::new_inclusive`] construct a uniform @@ -580,7 +583,7 @@ pub struct UniformFloat { } macro_rules! uniform_float_impl { - ($ty:ty, $bits_to_discard:expr, $next_u:ident) => { + ($ty:ty, $uty:ident, $bits_to_discard:expr) => { impl SampleUniform for $ty { type Sampler = UniformFloat<$ty>; } @@ -621,8 +624,8 @@ macro_rules! uniform_float_impl { fn sample(&self, rng: &mut R) -> Self::X { // Generate a value in the range [1, 2) - let value1_2 = (rng.$next_u() >> $bits_to_discard) - .into_float_with_exponent(0); + let value: $uty = rng.gen::<$uty>() >> $bits_to_discard; + let value1_2 = value.into_float_with_exponent(0); // We don't use `f64::mul_add`, because it is not available with // `no_std`. Furthermore, it is slower for some targets (but // faster for others). However, the order of multiplication and @@ -643,8 +646,8 @@ macro_rules! uniform_float_impl { let scale = high - low; let offset = low - scale; // Generate a value in the range [1, 2) - let value1_2 = (rng.$next_u() >> $bits_to_discard) - .into_float_with_exponent(0); + let value: $uty = rng.gen::<$uty>() >> $bits_to_discard; + let value1_2 = value.into_float_with_exponent(0); // Doing multiply before addition allows some architectures to // use a single instruction. value1_2 * scale + offset @@ -653,8 +656,24 @@ macro_rules! uniform_float_impl { } } -uniform_float_impl! { f32, 32 - 23, next_u32 } -uniform_float_impl! { f64, 64 - 52, next_u64 } +uniform_float_impl! { f32, u32, 32 - 23 } +uniform_float_impl! { f64, u64, 64 - 52 } + +#[cfg(feature="simd_support")] +uniform_float_impl! { f32x2, u32x2, 32 - 23 } +#[cfg(feature="simd_support")] +uniform_float_impl! { f32x4, u32x4, 32 - 23 } +#[cfg(feature="simd_support")] +uniform_float_impl! { f32x8, u32x8, 32 - 23 } +#[cfg(feature="simd_support")] +uniform_float_impl! { f32x16, u32x16, 32 - 23 } + +#[cfg(feature="simd_support")] +uniform_float_impl! { f64x2, u64x2, 64 - 52 } +#[cfg(feature="simd_support")] +uniform_float_impl! { f64x4, u64x4, 64 - 52 } +#[cfg(feature="simd_support")] +uniform_float_impl! { f64x8, u64x8, 64 - 52 } From 4dab1e3da732d3f7837e9a6f8c06ece7f6715980 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 22 Jun 2018 07:28:01 +0200 Subject: [PATCH 5/6] Add math_helpers module and various fixes --- src/distributions/float.rs | 103 ++++--------------- src/distributions/math_helpers.rs | 161 ++++++++++++++++++++++++++++++ src/distributions/mod.rs | 1 + src/distributions/uniform.rs | 89 +---------------- 4 files changed, 189 insertions(+), 165 deletions(-) create mode 100644 src/distributions/math_helpers.rs diff --git a/src/distributions/float.rs b/src/distributions/float.rs index 9cdd8a9e145..facd753eba2 100644 --- a/src/distributions/float.rs +++ b/src/distributions/float.rs @@ -13,6 +13,7 @@ use core::mem; use Rng; use distributions::{Distribution, Standard}; +use distributions::math_helpers::CastFromInt; #[cfg(feature="simd_support")] use core::simd::*; @@ -85,70 +86,7 @@ pub(crate) trait IntoFloat { } macro_rules! float_impls { - ($ty:ty, $uty:ty, $fraction_bits:expr, $exponent_bias:expr) => { - impl IntoFloat for $uty { - type F = $ty; - #[inline(always)] - fn into_float_with_exponent(self, exponent: i32) -> $ty { - // The exponent is encoded using an offset-binary representation - let exponent_bits = - (($exponent_bias + exponent) as $uty) << $fraction_bits; - unsafe { mem::transmute(self | exponent_bits) } - } - } - - impl Distribution<$ty> for Standard { - fn sample(&self, rng: &mut R) -> $ty { - // Multiply-based method; 24/53 random bits; [0, 1) interval. - // We use the most significant bits because for simple RNGs - // those are usually more random. - let float_size = mem::size_of::<$ty>() * 8; - let precision = $fraction_bits + 1; - let scale = 1.0 / ((1 as $uty << precision) as $ty); - - let value: $uty = rng.gen(); - scale * (value >> (float_size - precision)) as $ty - } - } - - impl Distribution<$ty> for OpenClosed01 { - fn sample(&self, rng: &mut R) -> $ty { - // Multiply-based method; 24/53 random bits; (0, 1] interval. - // We use the most significant bits because for simple RNGs - // those are usually more random. - let float_size = mem::size_of::<$ty>() * 8; - let precision = $fraction_bits + 1; - let scale = 1.0 / ((1 as $uty << precision) as $ty); - - let value: $uty = rng.gen(); - let value = value >> (float_size - precision); - // Add 1 to shift up; will not overflow because of right-shift: - scale * (value + 1) as $ty - } - } - - impl Distribution<$ty> for Open01 { - fn sample(&self, rng: &mut R) -> $ty { - // Transmute-based method; 23/52 random bits; (0, 1) interval. - // We use the most significant bits because for simple RNGs - // those are usually more random. - const EPSILON: $ty = 1.0 / (1u64 << $fraction_bits) as $ty; - let float_size = mem::size_of::<$ty>() * 8; - - let value: $uty = rng.gen(); - let fraction = value >> (float_size - $fraction_bits); - fraction.into_float_with_exponent(0) - (1.0 - EPSILON / 2.0) - } - } - } -} -float_impls! { f32, u32, 23, 127 } -float_impls! { f64, u64, 52, 1023 } - - -#[cfg(feature="simd_support")] -macro_rules! simd_float_impls { - ($ty:ident, $uty:ident, $f_scalar:ty, $u_scalar:ty, + ($ty:ident, $uty:ident, $f_scalar:ident, $u_scalar:ty, $fraction_bits:expr, $exponent_bias:expr) => { impl IntoFloat for $uty { type F = $ty; @@ -157,7 +95,7 @@ macro_rules! simd_float_impls { // The exponent is encoded using an offset-binary representation let exponent_bits: $u_scalar = (($exponent_bias + exponent) as $u_scalar) << $fraction_bits; - unsafe { mem::transmute(self | $uty::splat(exponent_bits)) } + $ty::from_bits(self | exponent_bits) } } @@ -168,11 +106,11 @@ macro_rules! simd_float_impls { // those are usually more random. let float_size = mem::size_of::<$f_scalar>() * 8; let precision = $fraction_bits + 1; - let scale = $ty::splat(1.0 / ((1 as $u_scalar << precision) as $f_scalar)); + let scale = 1.0 / ((1 as $u_scalar << precision) as $f_scalar); let value: $uty = rng.gen(); - let value = $ty::from(value >> (float_size - precision)); - scale * value + let value = value >> (float_size - precision); + scale * $ty::cast_from_int(value) } } @@ -183,12 +121,12 @@ macro_rules! simd_float_impls { // those are usually more random. let float_size = mem::size_of::<$f_scalar>() * 8; let precision = $fraction_bits + 1; - let scale = $ty::splat(1.0 / ((1 as $u_scalar << precision) as $f_scalar)); + let scale = 1.0 / ((1 as $u_scalar << precision) as $f_scalar); let value: $uty = rng.gen(); + let value = value >> (float_size - precision); // Add 1 to shift up; will not overflow because of right-shift: - let value = $ty::from((value >> (float_size - precision)) + 1); - scale * value + scale * $ty::cast_from_int(value + 1) } } @@ -197,32 +135,35 @@ macro_rules! simd_float_impls { // Transmute-based method; 23/52 random bits; (0, 1) interval. // We use the most significant bits because for simple RNGs // those are usually more random. - const EPSILON: $f_scalar = 1.0 / (1u64 << $fraction_bits) as $f_scalar; + use core::$f_scalar::EPSILON; let float_size = mem::size_of::<$f_scalar>() * 8; let value: $uty = rng.gen(); let fraction = value >> (float_size - $fraction_bits); - fraction.into_float_with_exponent(0) - $ty::splat(1.0 - EPSILON / 2.0) + fraction.into_float_with_exponent(0) - (1.0 - EPSILON / 2.0) } } } } +float_impls! { f32, u32, f32, u32, 23, 127 } +float_impls! { f64, u64, f64, u64, 52, 1023 } + #[cfg(feature="simd_support")] -simd_float_impls! { f32x2, u32x2, f32, u32, 23, 127 } +float_impls! { f32x2, u32x2, f32, u32, 23, 127 } #[cfg(feature="simd_support")] -simd_float_impls! { f32x4, u32x4, f32, u32, 23, 127 } +float_impls! { f32x4, u32x4, f32, u32, 23, 127 } #[cfg(feature="simd_support")] -simd_float_impls! { f32x8, u32x8, f32, u32, 23, 127 } +float_impls! { f32x8, u32x8, f32, u32, 23, 127 } #[cfg(feature="simd_support")] -simd_float_impls! { f32x16, u32x16, f32, u32, 23, 127 } +float_impls! { f32x16, u32x16, f32, u32, 23, 127 } #[cfg(feature="simd_support")] -simd_float_impls! { f64x2, u64x2, f64, u64, 52, 1023 } +float_impls! { f64x2, u64x2, f64, u64, 52, 1023 } #[cfg(feature="simd_support")] -simd_float_impls! { f64x4, u64x4, f64, u64, 52, 1023 } +float_impls! { f64x4, u64x4, f64, u64, 52, 1023 } #[cfg(feature="simd_support")] -simd_float_impls! { f64x8, u64x8, f64, u64, 52, 1023 } +float_impls! { f64x8, u64x8, f64, u64, 52, 1023 } #[cfg(test)] @@ -267,7 +208,7 @@ mod tests { } } } - test_f32! { f32_edge_cases, f32, 0.0, ::core::f32::EPSILON } + test_f32! { f32_edge_cases, f32, 0.0, EPSILON32 } #[cfg(feature="simd_support")] test_f32! { f32x2_edge_cases, f32x2, f32x2::splat(0.0), f32x2::splat(EPSILON32) } #[cfg(feature="simd_support")] diff --git a/src/distributions/math_helpers.rs b/src/distributions/math_helpers.rs new file mode 100644 index 00000000000..08f2e719227 --- /dev/null +++ b/src/distributions/math_helpers.rs @@ -0,0 +1,161 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// https://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Math helper functions + +#[cfg(feature="simd_support")] +use core::simd::*; + + +pub trait WideningMultiply { + type Output; + + fn wmul(self, x: RHS) -> Self::Output; +} + +macro_rules! wmul_impl { + ($ty:ty, $wide:ty, $shift:expr) => { + impl WideningMultiply for $ty { + type Output = ($ty, $ty); + + #[inline(always)] + fn wmul(self, x: $ty) -> Self::Output { + let tmp = (self as $wide) * (x as $wide); + ((tmp >> $shift) as $ty, tmp as $ty) + } + } + } +} +wmul_impl! { u8, u16, 8 } +wmul_impl! { u16, u32, 16 } +wmul_impl! { u32, u64, 32 } +#[cfg(feature = "i128_support")] +wmul_impl! { u64, u128, 64 } + +// This code is a translation of the __mulddi3 function in LLVM's +// compiler-rt. It is an optimised variant of the common method +// `(a + b) * (c + d) = ac + ad + bc + bd`. +// +// For some reason LLVM can optimise the C version very well, but +// keeps shuffling registers in this Rust translation. +macro_rules! wmul_impl_large { + ($ty:ty, $half:expr) => { + impl WideningMultiply for $ty { + type Output = ($ty, $ty); + + #[inline(always)] + fn wmul(self, b: $ty) -> Self::Output { + const LOWER_MASK: $ty = !0 >> $half; + let mut low = (self & LOWER_MASK).wrapping_mul(b & LOWER_MASK); + let mut t = low >> $half; + low &= LOWER_MASK; + t += (self >> $half).wrapping_mul(b & LOWER_MASK); + low += (t & LOWER_MASK) << $half; + let mut high = t >> $half; + t = low >> $half; + low &= LOWER_MASK; + t += (b >> $half).wrapping_mul(self & LOWER_MASK); + low += (t & LOWER_MASK) << $half; + high += t >> $half; + high += (self >> $half).wrapping_mul(b >> $half); + + (high, low) + } + } + } +} +#[cfg(not(feature = "i128_support"))] +wmul_impl_large! { u64, 32 } +#[cfg(feature = "i128_support")] +wmul_impl_large! { u128, 64 } + +macro_rules! wmul_impl_usize { + ($ty:ty) => { + impl WideningMultiply for usize { + type Output = (usize, usize); + + #[inline(always)] + fn wmul(self, x: usize) -> Self::Output { + let (high, low) = (self as $ty).wmul(x as $ty); + (high as usize, low as usize) + } + } + } +} +#[cfg(target_pointer_width = "32")] +wmul_impl_usize! { u32 } +#[cfg(target_pointer_width = "64")] +wmul_impl_usize! { u64 } + + +pub trait CastFromInt { + fn cast_from_int(i: T) -> Self; +} + +impl CastFromInt for f32 { + fn cast_from_int(i: u32) -> Self { i as f32 } +} + +impl CastFromInt for f64 { + fn cast_from_int(i: u64) -> Self { i as f64 } +} + +#[cfg(feature="simd_support")] +macro_rules! simd_float_from_int { + ($ty:ident, $uty:ident) => { + impl CastFromInt<$uty> for $ty { + fn cast_from_int(i: $uty) -> Self { $ty::from(i) } + } + } +} + +#[cfg(feature="simd_support")] simd_float_from_int! { f32x2, u32x2 } +#[cfg(feature="simd_support")] simd_float_from_int! { f32x4, u32x4 } +#[cfg(feature="simd_support")] simd_float_from_int! { f32x8, u32x8 } +#[cfg(feature="simd_support")] simd_float_from_int! { f32x16, u32x16 } +#[cfg(feature="simd_support")] simd_float_from_int! { f64x2, u64x2 } +#[cfg(feature="simd_support")] simd_float_from_int! { f64x4, u64x4 } +#[cfg(feature="simd_support")] simd_float_from_int! { f64x8, u64x8 } + + +/// `PartialOrd` for vectors compares lexicographically. We want natural order. +/// Only the comparison functions we need are implemented. +pub trait NaturalCompare { + fn cmp_lt(self, other: Self) -> bool; + fn cmp_le(self, other: Self) -> bool; +} + +impl NaturalCompare for f32 { + fn cmp_lt(self, other: Self) -> bool { self < other } + fn cmp_le(self, other: Self) -> bool { self <= other } +} + +impl NaturalCompare for f64 { + fn cmp_lt(self, other: Self) -> bool { self < other } + fn cmp_le(self, other: Self) -> bool { self <= other } +} + +#[cfg(feature="simd_support")] +macro_rules! simd_less_then { + ($ty:ident) => { + impl NaturalCompare for $ty { + fn cmp_lt(self, other: Self) -> bool { self.lt(other).all() } + fn cmp_le(self, other: Self) -> bool { self.le(other).all() } + } + } +} + +#[cfg(feature="simd_support")] simd_less_then! { f32x2 } +#[cfg(feature="simd_support")] simd_less_then! { f32x4 } +#[cfg(feature="simd_support")] simd_less_then! { f32x8 } +#[cfg(feature="simd_support")] simd_less_then! { f32x16 } +#[cfg(feature="simd_support")] simd_less_then! { f64x2 } +#[cfg(feature="simd_support")] simd_less_then! { f64x4 } +#[cfg(feature="simd_support")] simd_less_then! { f64x8 } diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs index 9d093f1d509..0f0420c9d89 100644 --- a/src/distributions/mod.rs +++ b/src/distributions/mod.rs @@ -214,6 +214,7 @@ mod float; mod integer; #[cfg(feature="std")] mod log_gamma; +mod math_helpers; mod other; #[cfg(feature="std")] mod ziggurat_tables; diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index 1a2e660a7d2..d908074a560 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -116,6 +116,7 @@ use std::time::Duration; use Rng; use distributions::Distribution; use distributions::float::IntoFloat; +use distributions::math_helpers::{WideningMultiply, NaturalCompare}; #[cfg(feature="simd_support")] use core::simd::*; @@ -469,87 +470,6 @@ uniform_int_impl! { usize, isize, usize, isize, usize } uniform_int_impl! { u128, u128, u128, i128, u128 } -trait WideningMultiply { - type Output; - - fn wmul(self, x: RHS) -> Self::Output; -} - -macro_rules! wmul_impl { - ($ty:ty, $wide:ty, $shift:expr) => { - impl WideningMultiply for $ty { - type Output = ($ty, $ty); - - #[inline(always)] - fn wmul(self, x: $ty) -> Self::Output { - let tmp = (self as $wide) * (x as $wide); - ((tmp >> $shift) as $ty, tmp as $ty) - } - } - } -} -wmul_impl! { u8, u16, 8 } -wmul_impl! { u16, u32, 16 } -wmul_impl! { u32, u64, 32 } -#[cfg(feature = "i128_support")] -wmul_impl! { u64, u128, 64 } - -// This code is a translation of the __mulddi3 function in LLVM's -// compiler-rt. It is an optimised variant of the common method -// `(a + b) * (c + d) = ac + ad + bc + bd`. -// -// For some reason LLVM can optimise the C version very well, but -// keeps shuffeling registers in this Rust translation. -macro_rules! wmul_impl_large { - ($ty:ty, $half:expr) => { - impl WideningMultiply for $ty { - type Output = ($ty, $ty); - - #[inline(always)] - fn wmul(self, b: $ty) -> Self::Output { - const LOWER_MASK: $ty = !0 >> $half; - let mut low = (self & LOWER_MASK).wrapping_mul(b & LOWER_MASK); - let mut t = low >> $half; - low &= LOWER_MASK; - t += (self >> $half).wrapping_mul(b & LOWER_MASK); - low += (t & LOWER_MASK) << $half; - let mut high = t >> $half; - t = low >> $half; - low &= LOWER_MASK; - t += (b >> $half).wrapping_mul(self & LOWER_MASK); - low += (t & LOWER_MASK) << $half; - high += t >> $half; - high += (self >> $half).wrapping_mul(b >> $half); - - (high, low) - } - } - } -} -#[cfg(not(feature = "i128_support"))] -wmul_impl_large! { u64, 32 } -#[cfg(feature = "i128_support")] -wmul_impl_large! { u128, 64 } - -macro_rules! wmul_impl_usize { - ($ty:ty) => { - impl WideningMultiply for usize { - type Output = (usize, usize); - - #[inline(always)] - fn wmul(self, x: usize) -> Self::Output { - let (high, low) = (self as $ty).wmul(x as $ty); - (high as usize, low as usize) - } - } - } -} -#[cfg(target_pointer_width = "32")] -wmul_impl_usize! { u32 } -#[cfg(target_pointer_width = "64")] -wmul_impl_usize! { u64 } - - /// The back-end implementing [`UniformSampler`] for floating-point types. /// @@ -597,7 +517,8 @@ macro_rules! uniform_float_impl { { let low = *low_b.borrow(); let high = *high_b.borrow(); - assert!(low < high, "Uniform::new called with `low >= high`"); + assert!(low.cmp_lt(high), + "Uniform::new called with `low >= high`"); let scale = high - low; let offset = low - scale; UniformFloat { @@ -612,7 +533,7 @@ macro_rules! uniform_float_impl { { let low = *low_b.borrow(); let high = *high_b.borrow(); - assert!(low <= high, + assert!(low.cmp_le(high), "Uniform::new_inclusive called with `low > high`"); let scale = high - low; let offset = low - scale; @@ -641,7 +562,7 @@ macro_rules! uniform_float_impl { { let low = *low_b.borrow(); let high = *high_b.borrow(); - assert!(low < high, + assert!(low.cmp_lt(high), "Uniform::sample_single called with low >= high"); let scale = high - low; let offset = low - scale; From 5c948fe05a565d620099f87650b5965aa232c980 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Tue, 26 Jun 2018 14:45:12 +0200 Subject: [PATCH 6/6] Rename NaturalCompare and math_helpers --- src/distributions/float.rs | 2 +- src/distributions/mod.rs | 2 +- src/distributions/uniform.rs | 8 ++--- .../{math_helpers.rs => utils.rs} | 29 ++++++++++--------- 4 files changed, 22 insertions(+), 19 deletions(-) rename src/distributions/{math_helpers.rs => utils.rs} (85%) diff --git a/src/distributions/float.rs b/src/distributions/float.rs index facd753eba2..9bf4a69ab0b 100644 --- a/src/distributions/float.rs +++ b/src/distributions/float.rs @@ -13,7 +13,7 @@ use core::mem; use Rng; use distributions::{Distribution, Standard}; -use distributions::math_helpers::CastFromInt; +use distributions::utils::CastFromInt; #[cfg(feature="simd_support")] use core::simd::*; diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs index 0f0420c9d89..06704664b29 100644 --- a/src/distributions/mod.rs +++ b/src/distributions/mod.rs @@ -214,8 +214,8 @@ mod float; mod integer; #[cfg(feature="std")] mod log_gamma; -mod math_helpers; mod other; +mod utils; #[cfg(feature="std")] mod ziggurat_tables; #[cfg(feature="std")] diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index d908074a560..9db0f59600e 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -116,7 +116,7 @@ use std::time::Duration; use Rng; use distributions::Distribution; use distributions::float::IntoFloat; -use distributions::math_helpers::{WideningMultiply, NaturalCompare}; +use distributions::utils::{WideningMultiply, CompareAll}; #[cfg(feature="simd_support")] use core::simd::*; @@ -517,7 +517,7 @@ macro_rules! uniform_float_impl { { let low = *low_b.borrow(); let high = *high_b.borrow(); - assert!(low.cmp_lt(high), + assert!(low.all_lt(high), "Uniform::new called with `low >= high`"); let scale = high - low; let offset = low - scale; @@ -533,7 +533,7 @@ macro_rules! uniform_float_impl { { let low = *low_b.borrow(); let high = *high_b.borrow(); - assert!(low.cmp_le(high), + assert!(low.all_le(high), "Uniform::new_inclusive called with `low > high`"); let scale = high - low; let offset = low - scale; @@ -562,7 +562,7 @@ macro_rules! uniform_float_impl { { let low = *low_b.borrow(); let high = *high_b.borrow(); - assert!(low.cmp_lt(high), + assert!(low.all_lt(high), "Uniform::sample_single called with low >= high"); let scale = high - low; let offset = low - scale; diff --git a/src/distributions/math_helpers.rs b/src/distributions/utils.rs similarity index 85% rename from src/distributions/math_helpers.rs rename to src/distributions/utils.rs index 08f2e719227..f5fef26eb9a 100644 --- a/src/distributions/math_helpers.rs +++ b/src/distributions/utils.rs @@ -125,29 +125,32 @@ macro_rules! simd_float_from_int { #[cfg(feature="simd_support")] simd_float_from_int! { f64x8, u64x8 } -/// `PartialOrd` for vectors compares lexicographically. We want natural order. +/// `PartialOrd` for vectors compares lexicographically. We want to compare all +/// the individual SIMD lanes instead, and get the combined result over all +/// lanes. This is possible using something like `a.lt(b).all()`, but we +/// implement it as a trait so we can write the same code for `f32` and `f64`. /// Only the comparison functions we need are implemented. -pub trait NaturalCompare { - fn cmp_lt(self, other: Self) -> bool; - fn cmp_le(self, other: Self) -> bool; +pub trait CompareAll { + fn all_lt(self, other: Self) -> bool; + fn all_le(self, other: Self) -> bool; } -impl NaturalCompare for f32 { - fn cmp_lt(self, other: Self) -> bool { self < other } - fn cmp_le(self, other: Self) -> bool { self <= other } +impl CompareAll for f32 { + fn all_lt(self, other: Self) -> bool { self < other } + fn all_le(self, other: Self) -> bool { self <= other } } -impl NaturalCompare for f64 { - fn cmp_lt(self, other: Self) -> bool { self < other } - fn cmp_le(self, other: Self) -> bool { self <= other } +impl CompareAll for f64 { + fn all_lt(self, other: Self) -> bool { self < other } + fn all_le(self, other: Self) -> bool { self <= other } } #[cfg(feature="simd_support")] macro_rules! simd_less_then { ($ty:ident) => { - impl NaturalCompare for $ty { - fn cmp_lt(self, other: Self) -> bool { self.lt(other).all() } - fn cmp_le(self, other: Self) -> bool { self.le(other).all() } + impl CompareAll for $ty { + fn all_lt(self, other: Self) -> bool { self.lt(other).all() } + fn all_le(self, other: Self) -> bool { self.le(other).all() } } } }