diff --git a/benches/distributions.rs b/benches/distributions.rs index 4e215e857fa..eeab8ca9db7 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -150,6 +150,33 @@ gen_range_int!(gen_range_i64, i64, 3i64, 123_456_789_123); #[cfg(feature = "i128_support")] gen_range_int!(gen_range_i128, i128, -12345678901234i128, 123_456_789_123_456_789); +// construct and sample from a floating-point range +macro_rules! gen_range_float { + ($fnn:ident, $ty:ident, $low:expr, $high:expr) => { + #[bench] + fn $fnn(b: &mut Bencher) { + let mut rng = XorShiftRng::from_entropy(); + + b.iter(|| { + let mut high = $high; + let mut low = $low; + let mut accum: $ty = 0.0; + for _ in 0..::RAND_BENCH_N { + accum += rng.gen_range(low, high); + // force recalculation of range each time + low += 0.9; + high += 1.1; + } + accum + }); + b.bytes = size_of::<$ty>() as u64 * ::RAND_BENCH_N; + } + } +} + +gen_range_float!(gen_range_f32, f32, -20000.0f32, 100000.0); +gen_range_float!(gen_range_f64, f64, 123.456f64, 7890.12); + #[bench] fn dist_iter(b: &mut Bencher) { let mut rng = XorShiftRng::from_entropy(); diff --git a/src/distributions/float.rs b/src/distributions/float.rs index a7e66ed0afc..0d418ebdc74 100644 --- a/src/distributions/float.rs +++ b/src/distributions/float.rs @@ -13,7 +13,7 @@ use core::mem; use Rng; use distributions::{Distribution, Standard}; -use distributions::utils::CastFromInt; +use distributions::utils::FloatSIMDUtils; #[cfg(feature="simd_support")] use core::simd::*; diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs index 9db0f59600e..bf68e45f51a 100644 --- a/src/distributions/uniform.rs +++ b/src/distributions/uniform.rs @@ -116,7 +116,12 @@ use std::time::Duration; use Rng; use distributions::Distribution; use distributions::float::IntoFloat; -use distributions::utils::{WideningMultiply, CompareAll}; +use distributions::utils::{WideningMultiply, FloatSIMDUtils, FloatAsSIMD, BoolAsSIMD}; + +#[cfg(not(feature = "std"))] +#[allow(unused_imports)] // rustc doesn't detect that this is actually used +use distributions::utils::Float; + #[cfg(feature="simd_support")] use core::simd::*; @@ -139,10 +144,9 @@ use core::simd::*; /// generated by the RNG than the low bits, since with some RNGs the low-bits /// are of lower quality than the high bits. /// -/// Implementations should attempt to sample in `[low, high)` for -/// `Uniform::new(low, high)`, i.e., excluding `high`, but this may be very -/// difficult. All the primitive integer types satisfy this property, and the -/// float types normally satisfy it, but rounding may mean `high` can occur. +/// Implementations must sample in `[low, high)` range for +/// `Uniform::new(low, high)`, i.e., excluding `high`. In particular care must +/// be taken to ensure that rounding never results values `< low` or `>= high`. /// /// # Example /// @@ -284,9 +288,11 @@ pub trait SampleBorrow { fn borrow(&self) -> &Borrowed; } impl SampleBorrow for Borrowed where Borrowed: SampleUniform { + #[inline(always)] fn borrow(&self) -> &Borrowed { self } } impl<'a, Borrowed> SampleBorrow for &'a Borrowed where Borrowed: SampleUniform { + #[inline(always)] fn borrow(&self) -> &Borrowed { *self } } @@ -487,10 +493,6 @@ uniform_int_impl! { u128, u128, u128, i128, u128 } /// multiply and addition. Values produced this way have what equals 22 bits of /// random digits for an `f32`, and 52 for an `f64`. /// -/// Currently there is no difference between [`new`] and [`new_inclusive`], -/// because the boundaries of a floats range are a bit of a fuzzy concept due to -/// rounding errors. -/// /// [`UniformSampler`]: trait.UniformSampler.html /// [`new`]: trait.UniformSampler.html#tymethod.new /// [`new_inclusive`]: trait.UniformSampler.html#tymethod.new_inclusive @@ -498,12 +500,12 @@ uniform_int_impl! { u128, u128, u128, i128, u128 } /// [`Standard`]: ../struct.Standard.html #[derive(Clone, Copy, Debug)] pub struct UniformFloat { + low: X, scale: X, - offset: X, } macro_rules! uniform_float_impl { - ($ty:ty, $uty:ident, $bits_to_discard:expr) => { + ($ty:ty, $uty:ident, $f_scalar:ident, $u_scalar:ident, $bits_to_discard:expr) => { impl SampleUniform for $ty { type Sampler = UniformFloat<$ty>; } @@ -519,12 +521,24 @@ macro_rules! uniform_float_impl { let high = *high_b.borrow(); assert!(low.all_lt(high), "Uniform::new called with `low >= high`"); - let scale = high - low; - let offset = low - scale; - UniformFloat { - scale: scale, - offset: offset, + assert!(low.all_finite() && high.all_finite(), + "Uniform::new called with non-finite boundaries"); + let max_rand = <$ty>::splat((::core::$u_scalar::MAX >> $bits_to_discard) + .into_float_with_exponent(0) - 1.0); + + let mut scale = high - low; + + loop { + let mask = (scale * max_rand + low).ge_mask(high); + if mask.none() { + break; + } + scale = scale.decrease_masked(mask); } + + debug_assert!(<$ty>::splat(0.0).all_le(scale)); + + UniformFloat { low, scale } } fn new_inclusive(low_b: B1, high_b: B2) -> Self @@ -535,26 +549,44 @@ macro_rules! uniform_float_impl { let high = *high_b.borrow(); assert!(low.all_le(high), "Uniform::new_inclusive called with `low > high`"); - let scale = high - low; - let offset = low - scale; - UniformFloat { - scale: scale, - offset: offset, + assert!(low.all_finite() && high.all_finite(), + "Uniform::new_inclusive called with non-finite boundaries"); + let max_rand = <$ty>::splat((::core::$u_scalar::MAX >> $bits_to_discard) + .into_float_with_exponent(0) - 1.0); + + let mut scale = (high - low) / max_rand; + + loop { + let mask = (scale * max_rand + low).gt_mask(high); + if mask.none() { + break; + } + scale = scale.decrease_masked(mask); } + + debug_assert!(<$ty>::splat(0.0).all_le(scale)); + + UniformFloat { low, scale } } fn sample(&self, rng: &mut R) -> Self::X { // Generate a value in the range [1, 2) - let value: $uty = rng.gen::<$uty>() >> $bits_to_discard; - let value1_2 = value.into_float_with_exponent(0); + let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard as u8) + .into_float_with_exponent(0); + + // Get a value in the range [0, 1) in order to avoid + // overflowing into infinity when multiplying with scale + let value0_1 = value1_2 - 1.0; + // We don't use `f64::mul_add`, because it is not available with // `no_std`. Furthermore, it is slower for some targets (but // faster for others). However, the order of multiplication and // addition is important, because on some platforms (e.g. ARM) // it will be optimized to a single (non-FMA) instruction. - value1_2 * self.scale + self.offset + value0_1 * self.scale + self.low } + #[inline] fn sample_single(low_b: B1, high_b: B2, rng: &mut R) -> Self::X where B1: SampleBorrow + Sized, @@ -564,37 +596,83 @@ macro_rules! uniform_float_impl { let high = *high_b.borrow(); assert!(low.all_lt(high), "Uniform::sample_single called with low >= high"); - let scale = high - low; - let offset = low - scale; - // Generate a value in the range [1, 2) - let value: $uty = rng.gen::<$uty>() >> $bits_to_discard; - let value1_2 = value.into_float_with_exponent(0); - // Doing multiply before addition allows some architectures to - // use a single instruction. - value1_2 * scale + offset + let mut scale = high - low; + + loop { + // Generate a value in the range [1, 2) + let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard as u32) + .into_float_with_exponent(0); + + // Get a value in the range [0, 1) in order to avoid + // overflowing into infinity when multiplying with scale + let value0_1 = value1_2 - 1.0; + + // Doing multiply before addition allows some architectures + // to use a single instruction. + let res = value0_1 * scale + low; + + debug_assert!(low.all_le(res) || !scale.all_finite()); + if res.all_lt(high) { + return res; + } + + // This handles a number of edge cases. + // * `low` or `high` is NaN. In this case `scale` and + // `res` are going to end up as NaN. + // * `low` is negative infinity and `high` is finite. + // `scale` is going to be infinite and `res` will be + // NaN. + // * `high` is positive infinity and `low` is finite. + // `scale` is going to be infinite and `res` will + // be infinite or NaN (if value0_1 is 0). + // * `low` is negative infinity and `high` is positive + // infinity. `scale` will be infinite and `res` will + // be NaN. + // * `low` and `high` are finite, but `high - low` + // overflows to infinite. `scale` will be infinite + // and `res` will be infinite or NaN (if value0_1 is 0). + // So if `high` or `low` are non-finite, we are guaranteed + // to fail the `res < high` check above and end up here. + // + // While we technically should check for non-finite `low` + // and `high` before entering the loop, by doing the checks + // here instead, we allow the common case to avoid these + // checks. But we are still guaranteed that if `low` or + // `high` are non-finite we'll end up here and can do the + // appropriate checks. + // + // Likewise `high - low` overflowing to infinity is also + // rare, so handle it here after the common case. + let mask = !scale.finite_mask(); + if mask.any() { + assert!(low.all_finite() && high.all_finite(), + "Uniform::sample_single called with non-finite boundaries"); + scale = scale.decrease_masked(mask); + } + } } } } } -uniform_float_impl! { f32, u32, 32 - 23 } -uniform_float_impl! { f64, u64, 64 - 52 } +uniform_float_impl! { f32, u32, f32, u32, 32 - 23 } +uniform_float_impl! { f64, u64, f64, u64, 64 - 52 } #[cfg(feature="simd_support")] -uniform_float_impl! { f32x2, u32x2, 32 - 23 } +uniform_float_impl! { f32x2, u32x2, f32, u32, 32 - 23 } #[cfg(feature="simd_support")] -uniform_float_impl! { f32x4, u32x4, 32 - 23 } +uniform_float_impl! { f32x4, u32x4, f32, u32, 32 - 23 } #[cfg(feature="simd_support")] -uniform_float_impl! { f32x8, u32x8, 32 - 23 } +uniform_float_impl! { f32x8, u32x8, f32, u32, 32 - 23 } #[cfg(feature="simd_support")] -uniform_float_impl! { f32x16, u32x16, 32 - 23 } +uniform_float_impl! { f32x16, u32x16, f32, u32, 32 - 23 } #[cfg(feature="simd_support")] -uniform_float_impl! { f64x2, u64x2, 64 - 52 } +uniform_float_impl! { f64x2, u64x2, f64, u64, 64 - 52 } #[cfg(feature="simd_support")] -uniform_float_impl! { f64x4, u64x4, 64 - 52 } +uniform_float_impl! { f64x4, u64x4, f64, u64, 64 - 52 } #[cfg(feature="simd_support")] -uniform_float_impl! { f64x8, u64x8, 64 - 52 } +uniform_float_impl! { f64x8, u64x8, f64, u64, 64 - 52 } @@ -704,7 +782,10 @@ impl UniformSampler for UniformDuration { #[cfg(test)] mod tests { use Rng; + use rngs::mock::StepRng; use distributions::uniform::Uniform; + use distributions::utils::FloatAsSIMD; + #[cfg(feature="simd_support")] use core::simd::*; #[should_panic] #[test] @@ -712,12 +793,6 @@ mod tests { Uniform::new(10, 10); } - #[should_panic] - #[test] - fn test_uniform_bad_limits_equal_float() { - Uniform::new(10., 10.); - } - #[test] fn test_uniform_good_limits_equal_int() { let mut rng = ::test::rng(804); @@ -727,27 +802,12 @@ mod tests { } } - #[test] - fn test_uniform_good_limits_equal_float() { - let mut rng = ::test::rng(805); - let dist = Uniform::new_inclusive(10., 10.); - for _ in 0..20 { - assert_eq!(rng.sample(dist), 10.); - } - } - #[should_panic] #[test] fn test_uniform_bad_limits_flipped_int() { Uniform::new(10, 5); } - #[should_panic] - #[test] - fn test_uniform_bad_limits_flipped_float() { - Uniform::new(10., 5.); - } - #[test] fn test_integers() { let mut rng = ::test::rng(251); @@ -799,27 +859,135 @@ mod tests { #[test] fn test_floats() { let mut rng = ::test::rng(252); + let mut zero_rng = StepRng::new(0, 0); + let mut max_rng = StepRng::new(0xffff_ffff_ffff_ffff, 0); macro_rules! t { - ($($ty:ty),*) => {{ - $( - let v: &[($ty, $ty)] = &[(0.0, 100.0), - (-1e35, -1e25), - (1e-35, 1e-25), - (-1e35, 1e35)]; - for &(low, high) in v.iter() { + ($ty:ty, $f_scalar:ident, $bits_shifted:expr) => {{ + let v: &[($f_scalar, $f_scalar)]= + &[(0.0, 100.0), + (-1e35, -1e25), + (1e-35, 1e-25), + (-1e35, 1e35), + (<$f_scalar>::from_bits(0), <$f_scalar>::from_bits(3)), + (-<$f_scalar>::from_bits(10), -<$f_scalar>::from_bits(1)), + (-<$f_scalar>::from_bits(5), 0.0), + (-<$f_scalar>::from_bits(7), -0.0), + (10.0, ::core::$f_scalar::MAX), + (-100.0, ::core::$f_scalar::MAX), + (-::core::$f_scalar::MAX / 5.0, ::core::$f_scalar::MAX), + (-::core::$f_scalar::MAX, ::core::$f_scalar::MAX / 5.0), + (-::core::$f_scalar::MAX * 0.8, ::core::$f_scalar::MAX * 0.7), + (-::core::$f_scalar::MAX, ::core::$f_scalar::MAX), + ]; + for &(low_scalar, high_scalar) in v.iter() { + for lane in 0..<$ty>::lanes() { + let low = <$ty>::splat(0.0 as $f_scalar).replace(lane, low_scalar); + let high = <$ty>::splat(1.0 as $f_scalar).replace(lane, high_scalar); let my_uniform = Uniform::new(low, high); - for _ in 0..1000 { - let v: $ty = rng.sample(my_uniform); - assert!(low <= v && v < high); + let my_incl_uniform = Uniform::new_inclusive(low, high); + for _ in 0..100 { + let v = rng.sample(my_uniform).extract(lane); + assert!(low_scalar <= v && v < high_scalar); + let v = rng.sample(my_incl_uniform).extract(lane); + assert!(low_scalar <= v && v <= high_scalar); + let v = rng.gen_range(low, high).extract(lane); + assert!(low_scalar <= v && v < high_scalar); + } + + assert_eq!(rng.sample(Uniform::new_inclusive(low, low)).extract(lane), low_scalar); + + assert_eq!(zero_rng.sample(my_uniform).extract(lane), low_scalar); + assert_eq!(zero_rng.sample(my_incl_uniform).extract(lane), low_scalar); + assert_eq!(zero_rng.gen_range(low, high).extract(lane), low_scalar); + assert!(max_rng.sample(my_uniform).extract(lane) < high_scalar); + assert!(max_rng.sample(my_incl_uniform).extract(lane) <= high_scalar); + + // Don't run this test for really tiny differences between high and low + // since for those rounding might result in selecting high for a very + // long time. + if (high_scalar - low_scalar) > 0.0001 { + let mut lowering_max_rng = + StepRng::new(0xffff_ffff_ffff_ffff, + (-1i64 << $bits_shifted) as u64); + assert!(lowering_max_rng.gen_range(low, high).extract(lane) < high_scalar); } } - )* + } + + assert_eq!(rng.sample(Uniform::new_inclusive(::core::$f_scalar::MAX, + ::core::$f_scalar::MAX)), + ::core::$f_scalar::MAX); + assert_eq!(rng.sample(Uniform::new_inclusive(-::core::$f_scalar::MAX, + -::core::$f_scalar::MAX)), + -::core::$f_scalar::MAX); }} } - t!(f32, f64) + t!(f32, f32, 32 - 23); + t!(f64, f64, 64 - 52); + #[cfg(feature="simd_support")] t!(f32x2, f32, 32 - 23); + #[cfg(feature="simd_support")] t!(f32x4, f32, 32 - 23); + #[cfg(feature="simd_support")] t!(f32x8, f32, 32 - 23); + #[cfg(feature="simd_support")] t!(f32x16, f32, 32 - 23); + #[cfg(feature="simd_support")] t!(f64x2, f64, 64 - 52); + #[cfg(feature="simd_support")] t!(f64x4, f64, 64 - 52); + #[cfg(feature="simd_support")] t!(f64x8, f64, 64 - 52); } + #[test] + #[cfg(all(feature="std", + not(target_arch = "wasm32"), + not(target_arch = "asmjs")))] + fn test_float_assertions() { + use core::panic::catch_unwind; + use super::SampleUniform; + fn range(low: T, high: T) { + let mut rng = ::test::rng(253); + rng.gen_range(low, high); + } + + macro_rules! t { + ($ty:ident, $f_scalar:ident) => {{ + let v: &[($f_scalar, $f_scalar)] = + &[(::std::$f_scalar::NAN, 0.0), + (1.0, ::std::$f_scalar::NAN), + (::std::$f_scalar::NAN, ::std::$f_scalar::NAN), + (1.0, 0.5), + (::std::$f_scalar::MAX, -::std::$f_scalar::MAX), + (::std::$f_scalar::INFINITY, ::std::$f_scalar::INFINITY), + (::std::$f_scalar::NEG_INFINITY, ::std::$f_scalar::NEG_INFINITY), + (::std::$f_scalar::NEG_INFINITY, 5.0), + (5.0, ::std::$f_scalar::INFINITY), + (::std::$f_scalar::NAN, ::std::$f_scalar::INFINITY), + (::std::$f_scalar::NEG_INFINITY, ::std::$f_scalar::NAN), + (::std::$f_scalar::NEG_INFINITY, ::std::$f_scalar::INFINITY), + ]; + for &(low_scalar, high_scalar) in v.iter() { + for lane in 0..<$ty>::lanes() { + let low = <$ty>::splat(0.0 as $f_scalar).replace(lane, low_scalar); + let high = <$ty>::splat(1.0 as $f_scalar).replace(lane, high_scalar); + assert!(catch_unwind(|| range(low, high)).is_err()); + assert!(catch_unwind(|| Uniform::new(low, high)).is_err()); + assert!(catch_unwind(|| Uniform::new_inclusive(low, high)).is_err()); + assert!(catch_unwind(|| range(low, low)).is_err()); + assert!(catch_unwind(|| Uniform::new(low, low)).is_err()); + } + } + }} + } + + t!(f32, f32); + t!(f64, f64); + #[cfg(feature="simd_support")] t!(f32x2, f32); + #[cfg(feature="simd_support")] t!(f32x4, f32); + #[cfg(feature="simd_support")] t!(f32x8, f32); + #[cfg(feature="simd_support")] t!(f32x16, f32); + #[cfg(feature="simd_support")] t!(f64x2, f64); + #[cfg(feature="simd_support")] t!(f64x4, f64); + #[cfg(feature="simd_support")] t!(f64x8, f64); + } + + #[test] #[cfg(feature = "std")] fn test_durations() { @@ -889,7 +1057,7 @@ mod tests { assert_eq!(r.inner.low, 2); assert_eq!(r.inner.range, 5); let r = Uniform::from(2.0f64..7.0); - assert_eq!(r.inner.offset, -3.0); + assert_eq!(r.inner.low, 2.0); assert_eq!(r.inner.scale, 5.0); } } diff --git a/src/distributions/utils.rs b/src/distributions/utils.rs index f5fef26eb9a..4d237230235 100644 --- a/src/distributions/utils.rs +++ b/src/distributions/utils.rs @@ -95,70 +95,179 @@ wmul_impl_usize! { u32 } wmul_impl_usize! { u64 } -pub trait CastFromInt { - fn cast_from_int(i: T) -> Self; -} +/// Helper trait when dealing with scalar and SIMD floating point types. +pub(crate) trait FloatSIMDUtils { + // `PartialOrd` for vectors compares lexicographically. We want to compare all + // the individual SIMD lanes instead, and get the combined result over all + // lanes. This is possible using something like `a.lt(b).all()`, but we + // implement it as a trait so we can write the same code for `f32` and `f64`. + // Only the comparison functions we need are implemented. + fn all_lt(self, other: Self) -> bool; + fn all_le(self, other: Self) -> bool; + fn all_finite(self) -> bool; -impl CastFromInt for f32 { - fn cast_from_int(i: u32) -> Self { i as f32 } + type Mask; + fn finite_mask(self) -> Self::Mask; + fn gt_mask(self, other: Self) -> Self::Mask; + fn ge_mask(self, other: Self) -> Self::Mask; + + // Decrease all lanes where the mask is `true` to the next lower value + // representable by the floating-point type. At least one of the lanes + // must be set. + fn decrease_masked(self, mask: Self::Mask) -> Self; + + // Convert from int value. Conversion is done while retaining the numerical + // value, not by retaining the binary representation. + type UInt; + fn cast_from_int(i: Self::UInt) -> Self; } -impl CastFromInt for f64 { - fn cast_from_int(i: u64) -> Self { i as f64 } +/// Implement functions available in std builds but missing from core primitives +#[cfg(not(std))] +pub(crate) trait Float : Sized { + type Bits; + + fn is_nan(self) -> bool; + fn is_infinite(self) -> bool; + fn is_finite(self) -> bool; + fn to_bits(self) -> Self::Bits; + fn from_bits(v: Self::Bits) -> Self; } -#[cfg(feature="simd_support")] -macro_rules! simd_float_from_int { - ($ty:ident, $uty:ident) => { - impl CastFromInt<$uty> for $ty { - fn cast_from_int(i: $uty) -> Self { $ty::from(i) } - } - } +/// Implement functions on f32/f64 to give them APIs similar to SIMD types +pub(crate) trait FloatAsSIMD : Sized { + #[inline(always)] + fn lanes() -> usize { 1 } + #[inline(always)] + fn splat(scalar: Self) -> Self { scalar } + #[inline(always)] + fn extract(self, index: usize) -> Self { debug_assert_eq!(index, 0); self } + #[inline(always)] + fn replace(self, index: usize, new_value: Self) -> Self { debug_assert_eq!(index, 0); new_value } } -#[cfg(feature="simd_support")] simd_float_from_int! { f32x2, u32x2 } -#[cfg(feature="simd_support")] simd_float_from_int! { f32x4, u32x4 } -#[cfg(feature="simd_support")] simd_float_from_int! { f32x8, u32x8 } -#[cfg(feature="simd_support")] simd_float_from_int! { f32x16, u32x16 } -#[cfg(feature="simd_support")] simd_float_from_int! { f64x2, u64x2 } -#[cfg(feature="simd_support")] simd_float_from_int! { f64x4, u64x4 } -#[cfg(feature="simd_support")] simd_float_from_int! { f64x8, u64x8 } - - -/// `PartialOrd` for vectors compares lexicographically. We want to compare all -/// the individual SIMD lanes instead, and get the combined result over all -/// lanes. This is possible using something like `a.lt(b).all()`, but we -/// implement it as a trait so we can write the same code for `f32` and `f64`. -/// Only the comparison functions we need are implemented. -pub trait CompareAll { - fn all_lt(self, other: Self) -> bool; - fn all_le(self, other: Self) -> bool; +pub(crate) trait BoolAsSIMD : Sized { + fn any(self) -> bool; + fn all(self) -> bool; + fn none(self) -> bool; } -impl CompareAll for f32 { - fn all_lt(self, other: Self) -> bool { self < other } - fn all_le(self, other: Self) -> bool { self <= other } +impl BoolAsSIMD for bool { + #[inline(always)] + fn any(self) -> bool { self } + #[inline(always)] + fn all(self) -> bool { self } + #[inline(always)] + fn none(self) -> bool { !self } } -impl CompareAll for f64 { - fn all_lt(self, other: Self) -> bool { self < other } - fn all_le(self, other: Self) -> bool { self <= other } +macro_rules! scalar_float_impl { + ($ty:ident, $uty:ident) => { + #[cfg(not(std))] + impl Float for $ty { + type Bits = $uty; + + #[inline] + fn is_nan(self) -> bool { + self != self + } + + #[inline] + fn is_infinite(self) -> bool { + self == ::core::$ty::INFINITY || self == ::core::$ty::NEG_INFINITY + } + + #[inline] + fn is_finite(self) -> bool { + !(self.is_nan() || self.is_infinite()) + } + + #[inline] + fn to_bits(self) -> Self::Bits { + unsafe { ::core::mem::transmute(self) } + } + + #[inline] + fn from_bits(v: Self::Bits) -> Self { + // It turns out the safety issues with sNaN were overblown! Hooray! + unsafe { ::core::mem::transmute(v) } + } + } + + impl FloatSIMDUtils for $ty { + type Mask = bool; + #[inline(always)] + fn all_lt(self, other: Self) -> bool { self < other } + #[inline(always)] + fn all_le(self, other: Self) -> bool { self <= other } + #[inline(always)] + fn all_finite(self) -> bool { self.is_finite() } + #[inline(always)] + fn finite_mask(self) -> Self::Mask { self.is_finite() } + #[inline(always)] + fn gt_mask(self, other: Self) -> Self::Mask { self > other } + #[inline(always)] + fn ge_mask(self, other: Self) -> Self::Mask { self >= other } + #[inline(always)] + fn decrease_masked(self, mask: Self::Mask) -> Self { + debug_assert!(mask, "At least one lane must be set"); + <$ty>::from_bits(self.to_bits() - 1) + } + type UInt = $uty; + fn cast_from_int(i: Self::UInt) -> Self { i as $ty } + } + + impl FloatAsSIMD for $ty {} + } } +scalar_float_impl!(f32, u32); +scalar_float_impl!(f64, u64); + + #[cfg(feature="simd_support")] -macro_rules! simd_less_then { - ($ty:ident) => { - impl CompareAll for $ty { +macro_rules! simd_impl { + ($ty:ident, $f_scalar:ident, $mty:ident, $uty:ident) => { + impl FloatSIMDUtils for $ty { + type Mask = $mty; + #[inline(always)] fn all_lt(self, other: Self) -> bool { self.lt(other).all() } + #[inline(always)] fn all_le(self, other: Self) -> bool { self.le(other).all() } + #[inline(always)] + fn all_finite(self) -> bool { self.finite_mask().all() } + #[inline(always)] + fn finite_mask(self) -> Self::Mask { + // This can possibly be done faster by checking bit patterns + let neg_inf = $ty::splat(::core::$f_scalar::NEG_INFINITY); + let pos_inf = $ty::splat(::core::$f_scalar::INFINITY); + self.gt(neg_inf) & self.lt(pos_inf) + } + #[inline(always)] + fn gt_mask(self, other: Self) -> Self::Mask { self.gt(other) } + #[inline(always)] + fn ge_mask(self, other: Self) -> Self::Mask { self.ge(other) } + #[inline(always)] + fn decrease_masked(self, mask: Self::Mask) -> Self { + // Casting a mask into ints will produce all bits set for + // true, and 0 for false. Adding that to the binary + // representation of a float means subtracting one from + // the binary representation, resulting in the next lower + // value representable by $ty. This works even when the + // current value is infinity. + debug_assert!(mask.any(), "At least one lane must be set"); + <$ty>::from_bits(<$uty>::from_bits(self) + <$uty>::from_bits(mask)) + } + type UInt = $uty; + fn cast_from_int(i: Self::UInt) -> Self { $ty::from(i) } } } } -#[cfg(feature="simd_support")] simd_less_then! { f32x2 } -#[cfg(feature="simd_support")] simd_less_then! { f32x4 } -#[cfg(feature="simd_support")] simd_less_then! { f32x8 } -#[cfg(feature="simd_support")] simd_less_then! { f32x16 } -#[cfg(feature="simd_support")] simd_less_then! { f64x2 } -#[cfg(feature="simd_support")] simd_less_then! { f64x4 } -#[cfg(feature="simd_support")] simd_less_then! { f64x8 } +#[cfg(feature="simd_support")] simd_impl! { f32x2, f32, m32x2, u32x2 } +#[cfg(feature="simd_support")] simd_impl! { f32x4, f32, m32x4, u32x4 } +#[cfg(feature="simd_support")] simd_impl! { f32x8, f32, m32x8, u32x8 } +#[cfg(feature="simd_support")] simd_impl! { f32x16, f32, m1x16, u32x16 } +#[cfg(feature="simd_support")] simd_impl! { f64x2, f64, m64x2, u64x2 } +#[cfg(feature="simd_support")] simd_impl! { f64x4, f64, m64x4, u64x4 } +#[cfg(feature="simd_support")] simd_impl! { f64x8, f64, m1x8, u64x8 }