From dd37c58c1cb96ffee68fbfd5c8bd6bf76fe8fc58 Mon Sep 17 00:00:00 2001 From: Jonas Sicking Date: Thu, 12 Jul 2018 04:09:49 -0700 Subject: [PATCH] Hide modules whose distributions we're exposing directly throguh rand::distributions. Also move a couple of distribution helper functions into utils.rs. --- src/distributions/binomial.rs | 2 +- src/distributions/exponential.rs | 3 +- src/distributions/log_gamma.rs | 51 ------------ src/distributions/mod.rs | 132 ++++++------------------------- src/distributions/normal.rs | 3 +- src/distributions/poisson.rs | 2 +- src/distributions/utils.rs | 111 ++++++++++++++++++++++++++ src/distributions/weighted.rs | 6 ++ 8 files changed, 146 insertions(+), 164 deletions(-) delete mode 100644 src/distributions/log_gamma.rs diff --git a/src/distributions/binomial.rs b/src/distributions/binomial.rs index 5eb03e38850..1c232fa43c7 100644 --- a/src/distributions/binomial.rs +++ b/src/distributions/binomial.rs @@ -12,7 +12,7 @@ use Rng; use distributions::{Distribution, Bernoulli, Cauchy}; -use distributions::log_gamma::log_gamma; +use distributions::utils::log_gamma; /// The binomial distribution `Binomial(n, p)`. /// diff --git a/src/distributions/exponential.rs b/src/distributions/exponential.rs index e8dbdc9c9de..9214cbb09ef 100644 --- a/src/distributions/exponential.rs +++ b/src/distributions/exponential.rs @@ -11,7 +11,8 @@ //! The exponential distribution. use {Rng}; -use distributions::{ziggurat, ziggurat_tables, Distribution}; +use distributions::{ziggurat_tables, Distribution}; +use distributions::utils::ziggurat; /// Samples floating-point numbers according to the exponential distribution, /// with rate parameter `λ = 1`. This is equivalent to `Exp::new(1.0)` or diff --git a/src/distributions/log_gamma.rs b/src/distributions/log_gamma.rs deleted file mode 100644 index f1fa3831f15..00000000000 --- a/src/distributions/log_gamma.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2016-2017 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// https://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/// Calculates ln(gamma(x)) (natural logarithm of the gamma -/// function) using the Lanczos approximation. -/// -/// The approximation expresses the gamma function as: -/// `gamma(z+1) = sqrt(2*pi)*(z+g+0.5)^(z+0.5)*exp(-z-g-0.5)*Ag(z)` -/// `g` is an arbitrary constant; we use the approximation with `g=5`. -/// -/// Noting that `gamma(z+1) = z*gamma(z)` and applying `ln` to both sides: -/// `ln(gamma(z)) = (z+0.5)*ln(z+g+0.5)-(z+g+0.5) + ln(sqrt(2*pi)*Ag(z)/z)` -/// -/// `Ag(z)` is an infinite series with coefficients that can be calculated -/// ahead of time - we use just the first 6 terms, which is good enough -/// for most purposes. -pub fn log_gamma(x: f64) -> f64 { - // precalculated 6 coefficients for the first 6 terms of the series - let coefficients: [f64; 6] = [ - 76.18009172947146, - -86.50532032941677, - 24.01409824083091, - -1.231739572450155, - 0.1208650973866179e-2, - -0.5395239384953e-5, - ]; - - // (x+0.5)*ln(x+g+0.5)-(x+g+0.5) - let tmp = x + 5.5; - let log = (x + 0.5) * tmp.ln() - tmp; - - // the first few terms of the series for Ag(x) - let mut a = 1.000000000190015; - let mut denom = x; - for coeff in &coefficients { - denom += 1.0; - a += coeff / denom; - } - - // get everything together - // a is Ag(x) - // 2.5066... is sqrt(2pi) - log + (2.5066282746310005 * a / x).ln() -} diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs index 77f9cd5f730..03d2d58e7bc 100644 --- a/src/distributions/mod.rs +++ b/src/distributions/mod.rs @@ -173,60 +173,37 @@ use Rng; -#[doc(inline)] pub use self::other::Alphanumeric; +pub use self::other::Alphanumeric; #[doc(inline)] pub use self::uniform::Uniform; -#[doc(inline)] pub use self::float::{OpenClosed01, Open01}; -#[cfg(feature="alloc")] -#[doc(inline)] pub use self::weighted::{WeightedIndex, WeightedError}; -#[cfg(feature="std")] -#[doc(inline)] pub use self::gamma::{Gamma, ChiSquared, FisherF, StudentT}; -#[cfg(feature="std")] -#[doc(inline)] pub use self::normal::{Normal, LogNormal, StandardNormal}; -#[cfg(feature="std")] -#[doc(inline)] pub use self::exponential::{Exp, Exp1}; -#[cfg(feature="std")] -#[doc(inline)] pub use self::pareto::Pareto; -#[cfg(feature = "std")] -#[doc(inline)] pub use self::poisson::Poisson; -#[cfg(feature = "std")] -#[doc(inline)] pub use self::binomial::Binomial; -#[doc(inline)] pub use self::bernoulli::Bernoulli; -#[cfg(feature = "std")] -#[doc(inline)] pub use self::cauchy::Cauchy; -#[cfg(feature = "std")] -#[doc(inline)] pub use self::dirichlet::Dirichlet; +pub use self::float::{OpenClosed01, Open01}; +pub use self::bernoulli::Bernoulli; +#[cfg(feature="alloc")] pub use self::weighted::{WeightedIndex, WeightedError}; +#[cfg(feature="std")] pub use self::gamma::{Gamma, ChiSquared, FisherF, StudentT}; +#[cfg(feature="std")] pub use self::normal::{Normal, LogNormal, StandardNormal}; +#[cfg(feature="std")] pub use self::exponential::{Exp, Exp1}; +#[cfg(feature="std")] pub use self::pareto::Pareto; +#[cfg(feature="std")] pub use self::poisson::Poisson; +#[cfg(feature="std")] pub use self::binomial::Binomial; +#[cfg(feature="std")] pub use self::cauchy::Cauchy; +#[cfg(feature="std")] pub use self::dirichlet::Dirichlet; pub mod uniform; -#[cfg(feature="alloc")] -#[doc(hidden)] pub mod weighted; -#[cfg(feature="std")] -#[doc(hidden)] pub mod gamma; -#[cfg(feature="std")] -#[doc(hidden)] pub mod normal; -#[cfg(feature="std")] -#[doc(hidden)] pub mod exponential; -#[cfg(feature="std")] -#[doc(hidden)] pub mod pareto; -#[cfg(feature = "std")] -#[doc(hidden)] pub mod poisson; -#[cfg(feature = "std")] -#[doc(hidden)] pub mod binomial; -#[doc(hidden)] pub mod bernoulli; -#[cfg(feature = "std")] -#[doc(hidden)] pub mod cauchy; -#[cfg(feature = "std")] -#[doc(hidden)] pub mod dirichlet; +mod bernoulli; +#[cfg(feature="alloc")] mod weighted; +#[cfg(feature="std")] mod gamma; +#[cfg(feature="std")] mod normal; +#[cfg(feature="std")] mod exponential; +#[cfg(feature="std")] mod pareto; +#[cfg(feature="std")] mod poisson; +#[cfg(feature="std")] mod binomial; +#[cfg(feature="std")] mod cauchy; +#[cfg(feature="std")] mod dirichlet; mod float; mod integer; -#[cfg(feature="std")] -mod log_gamma; mod other; mod utils; -#[cfg(feature="std")] -mod ziggurat_tables; -#[cfg(feature="std")] -use distributions::float::IntoFloat; +#[cfg(feature="std")] mod ziggurat_tables; /// Types (distributions) that can be used to create a random instance of `T`. /// @@ -487,69 +464,6 @@ impl<'a, T: Clone> Distribution for WeightedChoice<'a, T> { } } -/// Sample a random number using the Ziggurat method (specifically the -/// ZIGNOR variant from Doornik 2005). Most of the arguments are -/// directly from the paper: -/// -/// * `rng`: source of randomness -/// * `symmetric`: whether this is a symmetric distribution, or one-sided with P(x < 0) = 0. -/// * `X`: the $x_i$ abscissae. -/// * `F`: precomputed values of the PDF at the $x_i$, (i.e. $f(x_i)$) -/// * `F_DIFF`: precomputed values of $f(x_i) - f(x_{i+1})$ -/// * `pdf`: the probability density function -/// * `zero_case`: manual sampling from the tail when we chose the -/// bottom box (i.e. i == 0) - -// the perf improvement (25-50%) is definitely worth the extra code -// size from force-inlining. -#[cfg(feature="std")] -#[inline(always)] -fn ziggurat( - rng: &mut R, - symmetric: bool, - x_tab: ziggurat_tables::ZigTable, - f_tab: ziggurat_tables::ZigTable, - mut pdf: P, - mut zero_case: Z) - -> f64 where P: FnMut(f64) -> f64, Z: FnMut(&mut R, f64) -> f64 { - loop { - // As an optimisation we re-implement the conversion to a f64. - // From the remaining 12 most significant bits we use 8 to construct `i`. - // This saves us generating a whole extra random number, while the added - // precision of using 64 bits for f64 does not buy us much. - let bits = rng.next_u64(); - let i = bits as usize & 0xff; - - let u = if symmetric { - // Convert to a value in the range [2,4) and substract to get [-1,1) - // We can't convert to an open range directly, that would require - // substracting `3.0 - EPSILON`, which is not representable. - // It is possible with an extra step, but an open range does not - // seem neccesary for the ziggurat algorithm anyway. - (bits >> 12).into_float_with_exponent(1) - 3.0 - } else { - // Convert to a value in the range [1,2) and substract to get (0,1) - (bits >> 12).into_float_with_exponent(0) - - (1.0 - ::core::f64::EPSILON / 2.0) - }; - let x = u * x_tab[i]; - - let test_x = if symmetric { x.abs() } else {x}; - - // algebraically equivalent to |u| < x_tab[i+1]/x_tab[i] (or u < x_tab[i+1]/x_tab[i]) - if test_x < x_tab[i + 1] { - return x; - } - if i == 0 { - return zero_case(rng, u); - } - // algebraically equivalent to f1 + DRanU()*(f0 - f1) < 1 - if f_tab[i + 1] + (f_tab[i] - f_tab[i + 1]) * rng.gen::() < pdf(x) { - return x; - } - } -} - #[cfg(test)] mod tests { use rngs::mock::StepRng; diff --git a/src/distributions/normal.rs b/src/distributions/normal.rs index f053fb6c7c8..a442b58b320 100644 --- a/src/distributions/normal.rs +++ b/src/distributions/normal.rs @@ -11,7 +11,8 @@ //! The normal and derived distributions. use Rng; -use distributions::{ziggurat, ziggurat_tables, Distribution, Open01}; +use distributions::{ziggurat_tables, Distribution, Open01}; +use distributions::utils::ziggurat; /// Samples floating-point numbers according to the normal distribution /// `N(0, 1)` (a.k.a. a standard normal, or Gaussian). This is equivalent to diff --git a/src/distributions/poisson.rs b/src/distributions/poisson.rs index bdecb771536..1742f9e78e6 100644 --- a/src/distributions/poisson.rs +++ b/src/distributions/poisson.rs @@ -12,7 +12,7 @@ use Rng; use distributions::{Distribution, Cauchy}; -use distributions::log_gamma::log_gamma; +use distributions::utils::log_gamma; /// The Poisson distribution `Poisson(lambda)`. /// diff --git a/src/distributions/utils.rs b/src/distributions/utils.rs index 4d237230235..8ac2c66dcb6 100644 --- a/src/distributions/utils.rs +++ b/src/distributions/utils.rs @@ -12,6 +12,10 @@ #[cfg(feature="simd_support")] use core::simd::*; +#[cfg(feature="std")] +use distributions::ziggurat_tables; +#[cfg(feature="std")] +use Rng; pub trait WideningMultiply { @@ -271,3 +275,110 @@ macro_rules! simd_impl { #[cfg(feature="simd_support")] simd_impl! { f64x2, f64, m64x2, u64x2 } #[cfg(feature="simd_support")] simd_impl! { f64x4, f64, m64x4, u64x4 } #[cfg(feature="simd_support")] simd_impl! { f64x8, f64, m1x8, u64x8 } + +/// Calculates ln(gamma(x)) (natural logarithm of the gamma +/// function) using the Lanczos approximation. +/// +/// The approximation expresses the gamma function as: +/// `gamma(z+1) = sqrt(2*pi)*(z+g+0.5)^(z+0.5)*exp(-z-g-0.5)*Ag(z)` +/// `g` is an arbitrary constant; we use the approximation with `g=5`. +/// +/// Noting that `gamma(z+1) = z*gamma(z)` and applying `ln` to both sides: +/// `ln(gamma(z)) = (z+0.5)*ln(z+g+0.5)-(z+g+0.5) + ln(sqrt(2*pi)*Ag(z)/z)` +/// +/// `Ag(z)` is an infinite series with coefficients that can be calculated +/// ahead of time - we use just the first 6 terms, which is good enough +/// for most purposes. +#[cfg(feature="std")] +pub fn log_gamma(x: f64) -> f64 { + // precalculated 6 coefficients for the first 6 terms of the series + let coefficients: [f64; 6] = [ + 76.18009172947146, + -86.50532032941677, + 24.01409824083091, + -1.231739572450155, + 0.1208650973866179e-2, + -0.5395239384953e-5, + ]; + + // (x+0.5)*ln(x+g+0.5)-(x+g+0.5) + let tmp = x + 5.5; + let log = (x + 0.5) * tmp.ln() - tmp; + + // the first few terms of the series for Ag(x) + let mut a = 1.000000000190015; + let mut denom = x; + for coeff in &coefficients { + denom += 1.0; + a += coeff / denom; + } + + // get everything together + // a is Ag(x) + // 2.5066... is sqrt(2pi) + log + (2.5066282746310005 * a / x).ln() +} + +/// Sample a random number using the Ziggurat method (specifically the +/// ZIGNOR variant from Doornik 2005). Most of the arguments are +/// directly from the paper: +/// +/// * `rng`: source of randomness +/// * `symmetric`: whether this is a symmetric distribution, or one-sided with P(x < 0) = 0. +/// * `X`: the $x_i$ abscissae. +/// * `F`: precomputed values of the PDF at the $x_i$, (i.e. $f(x_i)$) +/// * `F_DIFF`: precomputed values of $f(x_i) - f(x_{i+1})$ +/// * `pdf`: the probability density function +/// * `zero_case`: manual sampling from the tail when we chose the +/// bottom box (i.e. i == 0) + +// the perf improvement (25-50%) is definitely worth the extra code +// size from force-inlining. +#[cfg(feature="std")] +#[inline(always)] +pub fn ziggurat( + rng: &mut R, + symmetric: bool, + x_tab: ziggurat_tables::ZigTable, + f_tab: ziggurat_tables::ZigTable, + mut pdf: P, + mut zero_case: Z) + -> f64 where P: FnMut(f64) -> f64, Z: FnMut(&mut R, f64) -> f64 { + use distributions::float::IntoFloat; + loop { + // As an optimisation we re-implement the conversion to a f64. + // From the remaining 12 most significant bits we use 8 to construct `i`. + // This saves us generating a whole extra random number, while the added + // precision of using 64 bits for f64 does not buy us much. + let bits = rng.next_u64(); + let i = bits as usize & 0xff; + + let u = if symmetric { + // Convert to a value in the range [2,4) and substract to get [-1,1) + // We can't convert to an open range directly, that would require + // substracting `3.0 - EPSILON`, which is not representable. + // It is possible with an extra step, but an open range does not + // seem neccesary for the ziggurat algorithm anyway. + (bits >> 12).into_float_with_exponent(1) - 3.0 + } else { + // Convert to a value in the range [1,2) and substract to get (0,1) + (bits >> 12).into_float_with_exponent(0) + - (1.0 - ::core::f64::EPSILON / 2.0) + }; + let x = u * x_tab[i]; + + let test_x = if symmetric { x.abs() } else {x}; + + // algebraically equivalent to |u| < x_tab[i+1]/x_tab[i] (or u < x_tab[i+1]/x_tab[i]) + if test_x < x_tab[i + 1] { + return x; + } + if i == 0 { + return zero_case(rng, u); + } + // algebraically equivalent to f1 + DRanU()*(f0 - f1) < 1 + if f_tab[i + 1] + (f_tab[i] - f_tab[i + 1]) * rng.gen::() < pdf(x) { + return x; + } + } +} diff --git a/src/distributions/weighted.rs b/src/distributions/weighted.rs index 64d862987c3..33a4071ddf1 100644 --- a/src/distributions/weighted.rs +++ b/src/distributions/weighted.rs @@ -169,10 +169,16 @@ mod test { } } +/// Error type returned from `WeightedIndex::new`. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum WeightedError { + /// The provided iterator contained no items. NoItem, + + /// A weight lower than zero was used. NegativeWeight, + + /// All items in the provided iterator had a weight of zero. AllWeightsZero, }