rust-random · dhardy · Jun 29, 2018 · Apr 7, 2018 · Apr 6, 2018 · Apr 7, 2018
diff --git a/Cargo.toml b/Cargo.toml
@@ -19,10 +19,11 @@ appveyor = { repository = "alexcrichton/rand" }
 
 [features]
 default = ["std" ] # without "std" rand uses libcore
-nightly = ["i128_support"] # enables all features requiring nightly rust
+nightly = ["i128_support", "simd_support"] # enables all features requiring nightly rust
 std = ["rand_core/std", "alloc", "libc", "winapi", "cloudabi", "fuchsia-zircon"]
 alloc = ["rand_core/alloc"]  # enables Vec and Box support (without std)
 i128_support = [] # enables i128 and u128 support
+simd_support = [] # enables SIMD support
 serde1 = ["serde", "serde_derive", "rand_core/serde1"] # enables serialization for PRNGs
 
 [workspace]

diff --git a/src/distributions/float.rs b/src/distributions/float.rs
@@ -13,6 +13,8 @@
 use core::mem;
 use Rng;
 use distributions::{Distribution, Standard};
+#[cfg(feature="simd_support")]
+use core::simd::*;
 
 /// A distribution to sample floating point numbers uniformly in the half-open
 /// interval `(0, 1]`, i.e. including 1 but not 0.
@@ -144,63 +146,173 @@ float_impls! { f32, u32, 23, 127 }
 float_impls! { f64, u64, 52, 1023 }
 
 
+#[cfg(feature="simd_support")]
+macro_rules! simd_float_impls {
+    ($ty:ident, $uty:ident, $f_scalar:ty, $u_scalar:ty,
+     $fraction_bits:expr, $exponent_bias:expr) => {
+        impl IntoFloat for $uty {
+            type F = $ty;
+            #[inline(always)]
+            fn into_float_with_exponent(self, exponent: i32) -> $ty {
+                // The exponent is encoded using an offset-binary representation
+                let exponent_bits: $u_scalar =
+                    (($exponent_bias + exponent) as $u_scalar) << $fraction_bits;
+                unsafe { mem::transmute(self | $uty::splat(exponent_bits)) }
+            }
+        }
+
+        impl Distribution<$ty> for Standard {
+            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                // Multiply-based method; 24/53 random bits; [0, 1) interval.
+                // We use the most significant bits because for simple RNGs
+                // those are usually more random.
+                let float_size = mem::size_of::<$f_scalar>() * 8;
+                let precision = $fraction_bits + 1;
+                let scale = $ty::splat(1.0 / ((1 as $u_scalar << precision) as $f_scalar));
+
+                let value: $uty = rng.gen();
+                let value = $ty::from(value >> (float_size - precision));
+                scale * value
+            }
+        }
+
+        impl Distribution<$ty> for OpenClosed01 {
+            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                // Multiply-based method; 24/53 random bits; (0, 1] interval.
+                // We use the most significant bits because for simple RNGs
+                // those are usually more random.
+                let float_size = mem::size_of::<$f_scalar>() * 8;
+                let precision = $fraction_bits + 1;
+                let scale = $ty::splat(1.0 / ((1 as $u_scalar << precision) as $f_scalar));
+
+                let value: $uty = rng.gen();
+                // Add 1 to shift up; will not overflow because of right-shift:
+                let value = $ty::from((value >> (float_size - precision)) + 1);
+                scale * value
+            }
+        }
+
+        impl Distribution<$ty> for Open01 {
+            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                // Transmute-based method; 23/52 random bits; (0, 1) interval.
+                // We use the most significant bits because for simple RNGs
+                // those are usually more random.
+                const EPSILON: $f_scalar = 1.0 / (1u64 << $fraction_bits) as $f_scalar;
+                let float_size = mem::size_of::<$f_scalar>() * 8;
+
+                let value: $uty = rng.gen();
+                let fraction = value >> (float_size - $fraction_bits);
+                fraction.into_float_with_exponent(0) - $ty::splat(1.0 - EPSILON / 2.0)
+            }
+        }
+    }
+}
+
+#[cfg(feature="simd_support")]
+simd_float_impls! { f32x2, u32x2, f32, u32, 23, 127 }
+#[cfg(feature="simd_support")]
+simd_float_impls! { f32x4, u32x4, f32, u32, 23, 127 }
+#[cfg(feature="simd_support")]
+simd_float_impls! { f32x8, u32x8, f32, u32, 23, 127 }
+#[cfg(feature="simd_support")]
+simd_float_impls! { f32x16, u32x16, f32, u32, 23, 127 }
+
+#[cfg(feature="simd_support")]
+simd_float_impls! { f64x2, u64x2, f64, u64, 52, 1023 }
+#[cfg(feature="simd_support")]
+simd_float_impls! { f64x4, u64x4, f64, u64, 52, 1023 }
+#[cfg(feature="simd_support")]
+simd_float_impls! { f64x8, u64x8, f64, u64, 52, 1023 }
+
+
 #[cfg(test)]
 mod tests {
     use Rng;
     use distributions::{Open01, OpenClosed01};
     use rngs::mock::StepRng;
+    #[cfg(feature="simd_support")]
+    use core::simd::*;
 
     const EPSILON32: f32 = ::core::f32::EPSILON;
     const EPSILON64: f64 = ::core::f64::EPSILON;
 
-    #[test]
-    fn standard_fp_edge_cases() {
-        let mut zeros = StepRng::new(0, 0);
-        assert_eq!(zeros.gen::<f32>(), 0.0);
-        assert_eq!(zeros.gen::<f64>(), 0.0);
-
-        let mut one32 = StepRng::new(1 << 8, 0);
-        assert_eq!(one32.gen::<f32>(), EPSILON32 / 2.0);
-
-        let mut one64 = StepRng::new(1 << 11, 0);
-        assert_eq!(one64.gen::<f64>(), EPSILON64 / 2.0);
-
-        let mut max = StepRng::new(!0, 0);
-        assert_eq!(max.gen::<f32>(), 1.0 - EPSILON32 / 2.0);
-        assert_eq!(max.gen::<f64>(), 1.0 - EPSILON64 / 2.0);
-    }
-
-    #[test]
-    fn openclosed01_edge_cases() {
-        let mut zeros = StepRng::new(0, 0);
-        assert_eq!(zeros.sample::<f32, _>(OpenClosed01), 0.0 + EPSILON32 / 2.0);
-        assert_eq!(zeros.sample::<f64, _>(OpenClosed01), 0.0 + EPSILON64 / 2.0);
-
-        let mut one32 = StepRng::new(1 << 8, 0);
-        assert_eq!(one32.sample::<f32, _>(OpenClosed01), EPSILON32);
+    macro_rules! test_f32 {
+        ($fnn:ident, $ty:ident, $ZERO:expr, $EPSILON:expr) => {
+            #[test]
+            fn $fnn() {
+                // Standard
+                let mut zeros = StepRng::new(0, 0);
+                assert_eq!(zeros.gen::<$ty>(), $ZERO);
+                let mut one = StepRng::new(1 << 8 | 1 << (8 + 32), 0);
+                assert_eq!(one.gen::<$ty>(), $EPSILON / 2.0);
+                let mut max = StepRng::new(!0, 0);
+                assert_eq!(max.gen::<$ty>(), 1.0 - $EPSILON / 2.0);
 
-        let mut one64 = StepRng::new(1 << 11, 0);
-        assert_eq!(one64.sample::<f64, _>(OpenClosed01), EPSILON64);
+                // OpenClosed01
+                let mut zeros = StepRng::new(0, 0);
+                assert_eq!(zeros.sample::<$ty, _>(OpenClosed01),
+                           0.0 + $EPSILON / 2.0);
+                let mut one = StepRng::new(1 << 8 | 1 << (8 + 32), 0);
+                assert_eq!(one.sample::<$ty, _>(OpenClosed01), $EPSILON);
+                let mut max = StepRng::new(!0, 0);
+                assert_eq!(max.sample::<$ty, _>(OpenClosed01), $ZERO + 1.0);
 
-        let mut max = StepRng::new(!0, 0);
-        assert_eq!(max.sample::<f32, _>(OpenClosed01), 1.0);
-        assert_eq!(max.sample::<f64, _>(OpenClosed01), 1.0);
+                // Open01
+                let mut zeros = StepRng::new(0, 0);
+                assert_eq!(zeros.sample::<$ty, _>(Open01), 0.0 + $EPSILON / 2.0);
+                let mut one = StepRng::new(1 << 9 | 1 << (9 + 32), 0);
+                assert_eq!(one.sample::<$ty, _>(Open01), $EPSILON / 2.0 * 3.0);
+                let mut max = StepRng::new(!0, 0);
+                assert_eq!(max.sample::<$ty, _>(Open01), 1.0 - $EPSILON / 2.0);
+            }
+        }
     }
+    test_f32! { f32_edge_cases, f32, 0.0, ::core::f32::EPSILON }
+    #[cfg(feature="simd_support")]
+    test_f32! { f32x2_edge_cases, f32x2, f32x2::splat(0.0), f32x2::splat(EPSILON32) }
+    #[cfg(feature="simd_support")]
+    test_f32! { f32x4_edge_cases, f32x4, f32x4::splat(0.0), f32x4::splat(EPSILON32) }
+    #[cfg(feature="simd_support")]
+    test_f32! { f32x8_edge_cases, f32x8, f32x8::splat(0.0), f32x8::splat(EPSILON32) }
+    #[cfg(feature="simd_support")]
+    test_f32! { f32x16_edge_cases, f32x16, f32x16::splat(0.0), f32x16::splat(EPSILON32) }
 
-    #[test]
-    fn open01_edge_cases() {
-        let mut zeros = StepRng::new(0, 0);
-        assert_eq!(zeros.sample::<f32, _>(Open01), 0.0 + EPSILON32 / 2.0);
-        assert_eq!(zeros.sample::<f64, _>(Open01), 0.0 + EPSILON64 / 2.0);
+    macro_rules! test_f64 {
+        ($fnn:ident, $ty:ident, $ZERO:expr, $EPSILON:expr) => {
+            #[test]
+            fn $fnn() {
+                // Standard
+                let mut zeros = StepRng::new(0, 0);
+                assert_eq!(zeros.gen::<$ty>(), $ZERO);
+                let mut one = StepRng::new(1 << 11, 0);
+                assert_eq!(one.gen::<$ty>(), $EPSILON / 2.0);
+                let mut max = StepRng::new(!0, 0);
+                assert_eq!(max.gen::<$ty>(), 1.0 - $EPSILON / 2.0);
 
-        let mut one32 = StepRng::new(1 << 9, 0);
-        assert_eq!(one32.sample::<f32, _>(Open01), EPSILON32 / 2.0 * 3.0);
+                // OpenClosed01
+                let mut zeros = StepRng::new(0, 0);
+                assert_eq!(zeros.sample::<$ty, _>(OpenClosed01),
+                           0.0 + $EPSILON / 2.0);
+                let mut one = StepRng::new(1 << 11, 0);
+                assert_eq!(one.sample::<$ty, _>(OpenClosed01), $EPSILON);
+                let mut max = StepRng::new(!0, 0);
+                assert_eq!(max.sample::<$ty, _>(OpenClosed01), $ZERO + 1.0);
 
-        let mut one64 = StepRng::new(1 << 12, 0);
-        assert_eq!(one64.sample::<f64, _>(Open01), EPSILON64 / 2.0 * 3.0);
-
-        let mut max = StepRng::new(!0, 0);
-        assert_eq!(max.sample::<f32, _>(Open01), 1.0 - EPSILON32 / 2.0);
-        assert_eq!(max.sample::<f64, _>(Open01), 1.0 - EPSILON64 / 2.0);
+                // Open01
+                let mut zeros = StepRng::new(0, 0);
+                assert_eq!(zeros.sample::<$ty, _>(Open01), 0.0 + $EPSILON / 2.0);
+                let mut one = StepRng::new(1 << 12, 0);
+                assert_eq!(one.sample::<$ty, _>(Open01), $EPSILON / 2.0 * 3.0);
+                let mut max = StepRng::new(!0, 0);
+                assert_eq!(max.sample::<$ty, _>(Open01), 1.0 - $EPSILON / 2.0);
+            }
+        }
     }
+    test_f64! { f64_edge_cases, f64, 0.0, EPSILON64 }
+    #[cfg(feature="simd_support")]
+    test_f64! { f64x2_edge_cases, f64x2, f64x2::splat(0.0), f64x2::splat(EPSILON64) }
+    #[cfg(feature="simd_support")]
+    test_f64! { f64x4_edge_cases, f64x4, f64x4::splat(0.0), f64x4::splat(EPSILON64) }
+    #[cfg(feature="simd_support")]
+    test_f64! { f64x8_edge_cases, f64x8, f64x8::splat(0.0), f64x8::splat(EPSILON64) }
 }
diff --git a/src/distributions/integer.rs b/src/distributions/integer.rs
@@ -12,6 +12,8 @@
 
 use {Rng};
 use distributions::{Distribution, Standard};
+#[cfg(feature="simd_support")]
+use core::simd::*;
 
 impl Distribution<u8> for Standard {
     #[inline]
@@ -84,6 +86,39 @@ impl_int_from_uint! { i64, u64 }
 #[cfg(feature = "i128_support")] impl_int_from_uint! { i128, u128 }
 impl_int_from_uint! { isize, usize }
 
+#[cfg(feature="simd_support")]
+macro_rules! simd_impl {
+    ($bits:expr,) => {};
+    ($bits:expr, $ty:ty, $($ty_more:ty,)*) => {
+        simd_impl!($bits, $($ty_more,)*);
+
+        impl Distribution<$ty> for Standard {
+            #[inline]
+            fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> $ty {
+                let mut vec = Default::default();
+                unsafe {
+                    let ptr = &mut vec;
+                    let b_ptr = &mut *(ptr as *mut $ty as *mut [u8; $bits/8]);
+                    rng.fill_bytes(b_ptr);
+                }
+                vec
+            }
+        }
+    }
+}
+
+#[cfg(feature="simd_support")]
+simd_impl!(16, u8x2, i8x2,);
+#[cfg(feature="simd_support")]
+simd_impl!(32, u8x4, i8x4, u16x2, i16x2,);
+#[cfg(feature="simd_support")]
+simd_impl!(64, u8x8, i8x8, u16x4, i16x4, u32x2, i32x2,);
+#[cfg(feature="simd_support")]
+simd_impl!(128, u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2,);
+#[cfg(feature="simd_support")]
+simd_impl!(256, u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4,);
+#[cfg(feature="simd_support")]
+simd_impl!(512, u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8,);
 
 #[cfg(test)]
 mod tests {

diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs
@@ -117,6 +117,9 @@ use Rng;
 use distributions::Distribution;
 use distributions::float::IntoFloat;
 
+#[cfg(feature="simd_support")]
+use core::simd::*;
+
 /// Sample values uniformly between two bounds.
 ///
 /// [`Uniform::new`] and [`Uniform::new_inclusive`] construct a uniform
@@ -580,7 +583,7 @@ pub struct UniformFloat<X> {
 }
 
 macro_rules! uniform_float_impl {
-    ($ty:ty, $bits_to_discard:expr, $next_u:ident) => {
+    ($ty:ty, $uty:ident, $bits_to_discard:expr) => {
         impl SampleUniform for $ty {
             type Sampler = UniformFloat<$ty>;
         }
@@ -621,8 +624,8 @@ macro_rules! uniform_float_impl {
 
             fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
                 // Generate a value in the range [1, 2)
-                let value1_2 = (rng.$next_u() >> $bits_to_discard)
-                               .into_float_with_exponent(0);
+                let value: $uty = rng.gen::<$uty>() >> $bits_to_discard;
+                let value1_2 = value.into_float_with_exponent(0);
                 // We don't use `f64::mul_add`, because it is not available with
                 // `no_std`. Furthermore, it is slower for some targets (but
                 // faster for others). However, the order of multiplication and
@@ -643,8 +646,8 @@ macro_rules! uniform_float_impl {
                 let scale = high - low;
                 let offset = low - scale;
                 // Generate a value in the range [1, 2)
-                let value1_2 = (rng.$next_u() >> $bits_to_discard)
-                               .into_float_with_exponent(0);
+                let value: $uty = rng.gen::<$uty>() >> $bits_to_discard;
+                let value1_2 = value.into_float_with_exponent(0);
                 // Doing multiply before addition allows some architectures to
                 // use a single instruction.
                 value1_2 * scale + offset
@@ -653,8 +656,24 @@ macro_rules! uniform_float_impl {
     }
 }
 
-uniform_float_impl! { f32, 32 - 23, next_u32 }
-uniform_float_impl! { f64, 64 - 52, next_u64 }
+uniform_float_impl! { f32, u32, 32 - 23 }
+uniform_float_impl! { f64, u64, 64 - 52 }
+
+#[cfg(feature="simd_support")]
+uniform_float_impl! { f32x2, u32x2, 32 - 23 }
+#[cfg(feature="simd_support")]
+uniform_float_impl! { f32x4, u32x4, 32 - 23 }
+#[cfg(feature="simd_support")]
+uniform_float_impl! { f32x8, u32x8, 32 - 23 }
+#[cfg(feature="simd_support")]
+uniform_float_impl! { f32x16, u32x16, 32 - 23 }
+
+#[cfg(feature="simd_support")]
+uniform_float_impl! { f64x2, u64x2, 64 - 52 }
+#[cfg(feature="simd_support")]
+uniform_float_impl! { f64x4, u64x4, 64 - 52 }
+#[cfg(feature="simd_support")]
+uniform_float_impl! { f64x8, u64x8, 64 - 52 }
 
 
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -233,6 +233,7 @@
 #![cfg_attr(all(feature="alloc", not(feature="std")), feature(alloc))]
 #![cfg_attr(all(feature="i128_support", feature="nightly"), allow(stable_features))] // stable since 2018-03-27
 #![cfg_attr(all(feature="i128_support", feature="nightly"), feature(i128_type, i128))]
+#![cfg_attr(all(feature="simd_support", feature="nightly"), feature(stdsimd))]
 #![cfg_attr(feature = "stdweb", recursion_limit="128")]
 
 #[cfg(feature="std")] extern crate std as core;