-
Notifications
You must be signed in to change notification settings - Fork 432
/
uniform.rs
1658 lines (1502 loc) · 61.5 KB
/
uniform.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2018-2020 Developers of the Rand project.
// Copyright 2017 The Rust Project Developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! A distribution uniformly sampling numbers within a given range.
//!
//! [`Uniform`] is the standard distribution to sample uniformly from a range;
//! e.g. `Uniform::new_inclusive(1, 6)` can sample integers from 1 to 6, like a
//! standard die. [`Rng::gen_range`] supports any type supported by
//! [`Uniform`].
//!
//! This distribution is provided with support for several primitive types
//! (all integer and floating-point types) as well as [`std::time::Duration`],
//! and supports extension to user-defined types via a type-specific *back-end*
//! implementation.
//!
//! The types [`UniformInt`], [`UniformFloat`] and [`UniformDuration`] are the
//! back-ends supporting sampling from primitive integer and floating-point
//! ranges as well as from [`std::time::Duration`]; these types do not normally
//! need to be used directly (unless implementing a derived back-end).
//!
//! # Example usage
//!
//! ```
//! use rand::{Rng, thread_rng};
//! use rand::distributions::Uniform;
//!
//! let mut rng = thread_rng();
//! let side = Uniform::new(-10.0, 10.0);
//!
//! // sample between 1 and 10 points
//! for _ in 0..rng.gen_range(1..=10) {
//! // sample a point from the square with sides -10 - 10 in two dimensions
//! let (x, y) = (rng.sample(side), rng.sample(side));
//! println!("Point: {}, {}", x, y);
//! }
//! ```
//!
//! # Extending `Uniform` to support a custom type
//!
//! To extend [`Uniform`] to support your own types, write a back-end which
//! implements the [`UniformSampler`] trait, then implement the [`SampleUniform`]
//! helper trait to "register" your back-end. See the `MyF32` example below.
//!
//! At a minimum, the back-end needs to store any parameters needed for sampling
//! (e.g. the target range) and implement `new`, `new_inclusive` and `sample`.
//! Those methods should include an assert to check the range is valid (i.e.
//! `low < high`). The example below merely wraps another back-end.
//!
//! The `new`, `new_inclusive` and `sample_single` functions use arguments of
//! type SampleBorrow<X> in order to support passing in values by reference or
//! by value. In the implementation of these functions, you can choose to
//! simply use the reference returned by [`SampleBorrow::borrow`], or you can choose
//! to copy or clone the value, whatever is appropriate for your type.
//!
//! ```
//! use rand::prelude::*;
//! use rand::distributions::uniform::{Uniform, SampleUniform,
//! UniformSampler, UniformFloat, SampleBorrow};
//!
//! struct MyF32(f32);
//!
//! #[derive(Clone, Copy, Debug)]
//! struct UniformMyF32(UniformFloat<f32>);
//!
//! impl UniformSampler for UniformMyF32 {
//! type X = MyF32;
//! fn new<B1, B2>(low: B1, high: B2) -> Self
//! where B1: SampleBorrow<Self::X> + Sized,
//! B2: SampleBorrow<Self::X> + Sized
//! {
//! UniformMyF32(UniformFloat::<f32>::new(low.borrow().0, high.borrow().0))
//! }
//! fn new_inclusive<B1, B2>(low: B1, high: B2) -> Self
//! where B1: SampleBorrow<Self::X> + Sized,
//! B2: SampleBorrow<Self::X> + Sized
//! {
//! UniformMyF32(UniformFloat::<f32>::new_inclusive(
//! low.borrow().0,
//! high.borrow().0,
//! ))
//! }
//! fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
//! MyF32(self.0.sample(rng))
//! }
//! }
//!
//! impl SampleUniform for MyF32 {
//! type Sampler = UniformMyF32;
//! }
//!
//! let (low, high) = (MyF32(17.0f32), MyF32(22.0f32));
//! let uniform = Uniform::new(low, high);
//! let x = uniform.sample(&mut thread_rng());
//! ```
//!
//! [`SampleUniform`]: crate::distributions::uniform::SampleUniform
//! [`UniformSampler`]: crate::distributions::uniform::UniformSampler
//! [`UniformInt`]: crate::distributions::uniform::UniformInt
//! [`UniformFloat`]: crate::distributions::uniform::UniformFloat
//! [`UniformDuration`]: crate::distributions::uniform::UniformDuration
//! [`SampleBorrow::borrow`]: crate::distributions::uniform::SampleBorrow::borrow
use core::time::Duration;
use core::ops::{Range, RangeInclusive};
use crate::distributions::float::IntoFloat;
use crate::distributions::utils::{BoolAsSIMD, FloatAsSIMD, FloatSIMDUtils, WideningMultiply};
use crate::distributions::Distribution;
use crate::{Rng, RngCore};
#[cfg(not(feature = "std"))]
#[allow(unused_imports)] // rustc doesn't detect that this is actually used
use crate::distributions::utils::Float;
#[cfg(feature = "simd_support")] use packed_simd::*;
#[cfg(feature = "serde1")]
use serde::{Serialize, Deserialize};
/// Sample values uniformly between two bounds.
///
/// [`Uniform::new`] and [`Uniform::new_inclusive`] construct a uniform
/// distribution sampling from the given range; these functions may do extra
/// work up front to make sampling of multiple values faster. If only one sample
/// from the range is required, [`Rng::gen_range`] can be more efficient.
///
/// When sampling from a constant range, many calculations can happen at
/// compile-time and all methods should be fast; for floating-point ranges and
/// the full range of integer types this should have comparable performance to
/// the `Standard` distribution.
///
/// Steps are taken to avoid bias which might be present in naive
/// implementations; for example `rng.gen::<u8>() % 170` samples from the range
/// `[0, 169]` but is twice as likely to select numbers less than 85 than other
/// values. Further, the implementations here give more weight to the high-bits
/// generated by the RNG than the low bits, since with some RNGs the low-bits
/// are of lower quality than the high bits.
///
/// Implementations must sample in `[low, high)` range for
/// `Uniform::new(low, high)`, i.e., excluding `high`. In particular, care must
/// be taken to ensure that rounding never results values `< low` or `>= high`.
///
/// # Example
///
/// ```
/// use rand::distributions::{Distribution, Uniform};
///
/// let between = Uniform::from(10..10000);
/// let mut rng = rand::thread_rng();
/// let mut sum = 0;
/// for _ in 0..1000 {
/// sum += between.sample(&mut rng);
/// }
/// println!("{}", sum);
/// ```
///
/// For a single sample, [`Rng::gen_range`] may be preferred:
///
/// ```
/// use rand::Rng;
///
/// let mut rng = rand::thread_rng();
/// println!("{}", rng.gen_range(0..10));
/// ```
///
/// [`new`]: Uniform::new
/// [`new_inclusive`]: Uniform::new_inclusive
/// [`Rng::gen_range`]: Rng::gen_range
#[derive(Clone, Copy, Debug, PartialEq)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde1", serde(bound(serialize = "X::Sampler: Serialize")))]
#[cfg_attr(feature = "serde1", serde(bound(deserialize = "X::Sampler: Deserialize<'de>")))]
pub struct Uniform<X: SampleUniform>(X::Sampler);
impl<X: SampleUniform> Uniform<X> {
/// Create a new `Uniform` instance which samples uniformly from the half
/// open range `[low, high)` (excluding `high`). Panics if `low >= high`.
pub fn new<B1, B2>(low: B1, high: B2) -> Uniform<X>
where
B1: SampleBorrow<X> + Sized,
B2: SampleBorrow<X> + Sized,
{
Uniform(X::Sampler::new(low, high))
}
/// Create a new `Uniform` instance which samples uniformly from the closed
/// range `[low, high]` (inclusive). Panics if `low > high`.
pub fn new_inclusive<B1, B2>(low: B1, high: B2) -> Uniform<X>
where
B1: SampleBorrow<X> + Sized,
B2: SampleBorrow<X> + Sized,
{
Uniform(X::Sampler::new_inclusive(low, high))
}
}
impl<X: SampleUniform> Distribution<X> for Uniform<X> {
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> X {
self.0.sample(rng)
}
}
/// Helper trait for creating objects using the correct implementation of
/// [`UniformSampler`] for the sampling type.
///
/// See the [module documentation] on how to implement [`Uniform`] range
/// sampling for a custom type.
///
/// [module documentation]: crate::distributions::uniform
pub trait SampleUniform: Sized {
/// The `UniformSampler` implementation supporting type `X`.
type Sampler: UniformSampler<X = Self>;
}
/// Helper trait handling actual uniform sampling.
///
/// See the [module documentation] on how to implement [`Uniform`] range
/// sampling for a custom type.
///
/// Implementation of [`sample_single`] is optional, and is only useful when
/// the implementation can be faster than `Self::new(low, high).sample(rng)`.
///
/// [module documentation]: crate::distributions::uniform
/// [`sample_single`]: UniformSampler::sample_single
pub trait UniformSampler: Sized {
/// The type sampled by this implementation.
type X;
/// Construct self, with inclusive lower bound and exclusive upper bound
/// `[low, high)`.
///
/// Usually users should not call this directly but instead use
/// `Uniform::new`, which asserts that `low < high` before calling this.
fn new<B1, B2>(low: B1, high: B2) -> Self
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized;
/// Construct self, with inclusive bounds `[low, high]`.
///
/// Usually users should not call this directly but instead use
/// `Uniform::new_inclusive`, which asserts that `low <= high` before
/// calling this.
fn new_inclusive<B1, B2>(low: B1, high: B2) -> Self
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized;
/// Sample a value.
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X;
/// Sample a single value uniformly from a range with inclusive lower bound
/// and exclusive upper bound `[low, high)`.
///
/// By default this is implemented using
/// `UniformSampler::new(low, high).sample(rng)`. However, for some types
/// more optimal implementations for single usage may be provided via this
/// method (which is the case for integers and floats).
/// Results may not be identical.
///
/// Note that to use this method in a generic context, the type needs to be
/// retrieved via `SampleUniform::Sampler` as follows:
/// ```
/// use rand::{thread_rng, distributions::uniform::{SampleUniform, UniformSampler}};
/// # #[allow(unused)]
/// fn sample_from_range<T: SampleUniform>(lb: T, ub: T) -> T {
/// let mut rng = thread_rng();
/// <T as SampleUniform>::Sampler::sample_single(lb, ub, &mut rng)
/// }
/// ```
fn sample_single<R: Rng + ?Sized, B1, B2>(low: B1, high: B2, rng: &mut R) -> Self::X
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let uniform: Self = UniformSampler::new(low, high);
uniform.sample(rng)
}
/// Sample a single value uniformly from a range with inclusive lower bound
/// and inclusive upper bound `[low, high]`.
///
/// By default this is implemented using
/// `UniformSampler::new_inclusive(low, high).sample(rng)`. However, for
/// some types more optimal implementations for single usage may be provided
/// via this method.
/// Results may not be identical.
fn sample_single_inclusive<R: Rng + ?Sized, B1, B2>(low: B1, high: B2, rng: &mut R)
-> Self::X
where B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized
{
let uniform: Self = UniformSampler::new_inclusive(low, high);
uniform.sample(rng)
}
}
impl<X: SampleUniform> From<Range<X>> for Uniform<X> {
fn from(r: ::core::ops::Range<X>) -> Uniform<X> {
Uniform::new(r.start, r.end)
}
}
impl<X: SampleUniform> From<RangeInclusive<X>> for Uniform<X> {
fn from(r: ::core::ops::RangeInclusive<X>) -> Uniform<X> {
Uniform::new_inclusive(r.start(), r.end())
}
}
/// Helper trait similar to [`Borrow`] but implemented
/// only for SampleUniform and references to SampleUniform in
/// order to resolve ambiguity issues.
///
/// [`Borrow`]: std::borrow::Borrow
pub trait SampleBorrow<Borrowed> {
/// Immutably borrows from an owned value. See [`Borrow::borrow`]
///
/// [`Borrow::borrow`]: std::borrow::Borrow::borrow
fn borrow(&self) -> &Borrowed;
}
impl<Borrowed> SampleBorrow<Borrowed> for Borrowed
where Borrowed: SampleUniform
{
#[inline(always)]
fn borrow(&self) -> &Borrowed {
self
}
}
impl<'a, Borrowed> SampleBorrow<Borrowed> for &'a Borrowed
where Borrowed: SampleUniform
{
#[inline(always)]
fn borrow(&self) -> &Borrowed {
*self
}
}
/// Range that supports generating a single sample efficiently.
///
/// Any type implementing this trait can be used to specify the sampled range
/// for `Rng::gen_range`.
pub trait SampleRange<T> {
/// Generate a sample from the given range.
fn sample_single<R: RngCore + ?Sized>(self, rng: &mut R) -> T;
/// Check whether the range is empty.
fn is_empty(&self) -> bool;
}
impl<T: SampleUniform + PartialOrd> SampleRange<T> for Range<T> {
#[inline]
fn sample_single<R: RngCore + ?Sized>(self, rng: &mut R) -> T {
T::Sampler::sample_single(self.start, self.end, rng)
}
#[inline]
fn is_empty(&self) -> bool {
!(self.start < self.end)
}
}
impl<T: SampleUniform + PartialOrd> SampleRange<T> for RangeInclusive<T> {
#[inline]
fn sample_single<R: RngCore + ?Sized>(self, rng: &mut R) -> T {
T::Sampler::sample_single_inclusive(self.start(), self.end(), rng)
}
#[inline]
fn is_empty(&self) -> bool {
!(self.start() <= self.end())
}
}
////////////////////////////////////////////////////////////////////////////////
// What follows are all back-ends.
/// The back-end implementing [`UniformSampler`] for integer types.
///
/// Unless you are implementing [`UniformSampler`] for your own type, this type
/// should not be used directly, use [`Uniform`] instead.
///
/// # Implementation notes
///
/// For simplicity, we use the same generic struct `UniformInt<X>` for all
/// integer types `X`. This gives us only one field type, `X`; to store unsigned
/// values of this size, we take use the fact that these conversions are no-ops.
///
/// For a closed range, the number of possible numbers we should generate is
/// `range = (high - low + 1)`. To avoid bias, we must ensure that the size of
/// our sample space, `zone`, is a multiple of `range`; other values must be
/// rejected (by replacing with a new random sample).
///
/// As a special case, we use `range = 0` to represent the full range of the
/// result type (i.e. for `new_inclusive($ty::MIN, $ty::MAX)`).
///
/// The optimum `zone` is the largest product of `range` which fits in our
/// (unsigned) target type. We calculate this by calculating how many numbers we
/// must reject: `reject = (MAX + 1) % range = (MAX - range + 1) % range`. Any (large)
/// product of `range` will suffice, thus in `sample_single` we multiply by a
/// power of 2 via bit-shifting (faster but may cause more rejections).
///
/// The smallest integer PRNGs generate is `u32`. For 8- and 16-bit outputs we
/// use `u32` for our `zone` and samples (because it's not slower and because
/// it reduces the chance of having to reject a sample). In this case we cannot
/// store `zone` in the target type since it is too large, however we know
/// `ints_to_reject < range <= $unsigned::MAX`.
///
/// An alternative to using a modulus is widening multiply: After a widening
/// multiply by `range`, the result is in the high word. Then comparing the low
/// word against `zone` makes sure our distribution is uniform.
#[derive(Clone, Copy, Debug, PartialEq)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct UniformInt<X> {
low: X,
range: X,
z: X, // either ints_to_reject or zone depending on implementation
}
macro_rules! uniform_int_impl {
($ty:ty, $unsigned:ident, $u_large:ident) => {
impl SampleUniform for $ty {
type Sampler = UniformInt<$ty>;
}
impl UniformSampler for UniformInt<$ty> {
// We play free and fast with unsigned vs signed here
// (when $ty is signed), but that's fine, since the
// contract of this macro is for $ty and $unsigned to be
// "bit-equal", so casting between them is a no-op.
type X = $ty;
#[inline] // if the range is constant, this helps LLVM to do the
// calculations at compile-time.
fn new<B1, B2>(low_b: B1, high_b: B2) -> Self
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = *low_b.borrow();
let high = *high_b.borrow();
assert!(low < high, "Uniform::new called with `low >= high`");
UniformSampler::new_inclusive(low, high - 1)
}
#[inline] // if the range is constant, this helps LLVM to do the
// calculations at compile-time.
fn new_inclusive<B1, B2>(low_b: B1, high_b: B2) -> Self
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = *low_b.borrow();
let high = *high_b.borrow();
assert!(
low <= high,
"Uniform::new_inclusive called with `low > high`"
);
let unsigned_max = ::core::$u_large::MAX;
let range = high.wrapping_sub(low).wrapping_add(1) as $unsigned;
let ints_to_reject = if range > 0 {
let range = $u_large::from(range);
(unsigned_max - range + 1) % range
} else {
0
};
UniformInt {
low,
// These are really $unsigned values, but store as $ty:
range: range as $ty,
z: ints_to_reject as $unsigned as $ty,
}
}
#[inline]
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
let range = self.range as $unsigned as $u_large;
if range > 0 {
let unsigned_max = ::core::$u_large::MAX;
let zone = unsigned_max - (self.z as $unsigned as $u_large);
loop {
let v: $u_large = rng.gen();
let (hi, lo) = v.wmul(range);
if lo <= zone {
return self.low.wrapping_add(hi as $ty);
}
}
} else {
// Sample from the entire integer range.
rng.gen()
}
}
#[inline]
fn sample_single<R: Rng + ?Sized, B1, B2>(low_b: B1, high_b: B2, rng: &mut R) -> Self::X
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = *low_b.borrow();
let high = *high_b.borrow();
assert!(low < high, "UniformSampler::sample_single: low >= high");
Self::sample_single_inclusive(low, high - 1, rng)
}
#[inline]
fn sample_single_inclusive<R: Rng + ?Sized, B1, B2>(low_b: B1, high_b: B2, rng: &mut R) -> Self::X
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = *low_b.borrow();
let high = *high_b.borrow();
assert!(low <= high, "UniformSampler::sample_single_inclusive: low > high");
let range = high.wrapping_sub(low).wrapping_add(1) as $unsigned as $u_large;
// If the above resulted in wrap-around to 0, the range is $ty::MIN..=$ty::MAX,
// and any integer will do.
if range == 0 {
return rng.gen();
}
let zone = if ::core::$unsigned::MAX <= ::core::u16::MAX as $unsigned {
// Using a modulus is faster than the approximation for
// i8 and i16. I suppose we trade the cost of one
// modulus for near-perfect branch prediction.
let unsigned_max: $u_large = ::core::$u_large::MAX;
let ints_to_reject = (unsigned_max - range + 1) % range;
unsigned_max - ints_to_reject
} else {
// conservative but fast approximation. `- 1` is necessary to allow the
// same comparison without bias.
(range << range.leading_zeros()).wrapping_sub(1)
};
loop {
let v: $u_large = rng.gen();
let (hi, lo) = v.wmul(range);
if lo <= zone {
return low.wrapping_add(hi as $ty);
}
}
}
}
};
}
uniform_int_impl! { i8, u8, u32 }
uniform_int_impl! { i16, u16, u32 }
uniform_int_impl! { i32, u32, u32 }
uniform_int_impl! { i64, u64, u64 }
uniform_int_impl! { i128, u128, u128 }
uniform_int_impl! { isize, usize, usize }
uniform_int_impl! { u8, u8, u32 }
uniform_int_impl! { u16, u16, u32 }
uniform_int_impl! { u32, u32, u32 }
uniform_int_impl! { u64, u64, u64 }
uniform_int_impl! { usize, usize, usize }
uniform_int_impl! { u128, u128, u128 }
#[cfg(feature = "simd_support")]
macro_rules! uniform_simd_int_impl {
($ty:ident, $unsigned:ident, $u_scalar:ident) => {
// The "pick the largest zone that can fit in an `u32`" optimization
// is less useful here. Multiple lanes complicate things, we don't
// know the PRNG's minimal output size, and casting to a larger vector
// is generally a bad idea for SIMD performance. The user can still
// implement it manually.
// TODO: look into `Uniform::<u32x4>::new(0u32, 100)` functionality
// perhaps `impl SampleUniform for $u_scalar`?
impl SampleUniform for $ty {
type Sampler = UniformInt<$ty>;
}
impl UniformSampler for UniformInt<$ty> {
type X = $ty;
#[inline] // if the range is constant, this helps LLVM to do the
// calculations at compile-time.
fn new<B1, B2>(low_b: B1, high_b: B2) -> Self
where B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized
{
let low = *low_b.borrow();
let high = *high_b.borrow();
assert!(low.lt(high).all(), "Uniform::new called with `low >= high`");
UniformSampler::new_inclusive(low, high - 1)
}
#[inline] // if the range is constant, this helps LLVM to do the
// calculations at compile-time.
fn new_inclusive<B1, B2>(low_b: B1, high_b: B2) -> Self
where B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized
{
let low = *low_b.borrow();
let high = *high_b.borrow();
assert!(low.le(high).all(),
"Uniform::new_inclusive called with `low > high`");
let unsigned_max = ::core::$u_scalar::MAX;
// NOTE: these may need to be replaced with explicitly
// wrapping operations if `packed_simd` changes
let range: $unsigned = ((high - low) + 1).cast();
// `% 0` will panic at runtime.
let not_full_range = range.gt($unsigned::splat(0));
// replacing 0 with `unsigned_max` allows a faster `select`
// with bitwise OR
let modulo = not_full_range.select(range, $unsigned::splat(unsigned_max));
// wrapping addition
let ints_to_reject = (unsigned_max - range + 1) % modulo;
// When `range` is 0, `lo` of `v.wmul(range)` will always be
// zero which means only one sample is needed.
let zone = unsigned_max - ints_to_reject;
UniformInt {
low,
// These are really $unsigned values, but store as $ty:
range: range.cast(),
z: zone.cast(),
}
}
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
let range: $unsigned = self.range.cast();
let zone: $unsigned = self.z.cast();
// This might seem very slow, generating a whole new
// SIMD vector for every sample rejection. For most uses
// though, the chance of rejection is small and provides good
// general performance. With multiple lanes, that chance is
// multiplied. To mitigate this, we replace only the lanes of
// the vector which fail, iteratively reducing the chance of
// rejection. The replacement method does however add a little
// overhead. Benchmarking or calculating probabilities might
// reveal contexts where this replacement method is slower.
let mut v: $unsigned = rng.gen();
loop {
let (hi, lo) = v.wmul(range);
let mask = lo.le(zone);
if mask.all() {
let hi: $ty = hi.cast();
// wrapping addition
let result = self.low + hi;
// `select` here compiles to a blend operation
// When `range.eq(0).none()` the compare and blend
// operations are avoided.
let v: $ty = v.cast();
return range.gt($unsigned::splat(0)).select(result, v);
}
// Replace only the failing lanes
v = mask.select(v, rng.gen());
}
}
}
};
// bulk implementation
($(($unsigned:ident, $signed:ident),)+ $u_scalar:ident) => {
$(
uniform_simd_int_impl!($unsigned, $unsigned, $u_scalar);
uniform_simd_int_impl!($signed, $unsigned, $u_scalar);
)+
};
}
#[cfg(feature = "simd_support")]
uniform_simd_int_impl! {
(u64x2, i64x2),
(u64x4, i64x4),
(u64x8, i64x8),
u64
}
#[cfg(feature = "simd_support")]
uniform_simd_int_impl! {
(u32x2, i32x2),
(u32x4, i32x4),
(u32x8, i32x8),
(u32x16, i32x16),
u32
}
#[cfg(feature = "simd_support")]
uniform_simd_int_impl! {
(u16x2, i16x2),
(u16x4, i16x4),
(u16x8, i16x8),
(u16x16, i16x16),
(u16x32, i16x32),
u16
}
#[cfg(feature = "simd_support")]
uniform_simd_int_impl! {
(u8x2, i8x2),
(u8x4, i8x4),
(u8x8, i8x8),
(u8x16, i8x16),
(u8x32, i8x32),
(u8x64, i8x64),
u8
}
impl SampleUniform for char {
type Sampler = UniformChar;
}
/// The back-end implementing [`UniformSampler`] for `char`.
///
/// Unless you are implementing [`UniformSampler`] for your own type, this type
/// should not be used directly, use [`Uniform`] instead.
///
/// This differs from integer range sampling since the range `0xD800..=0xDFFF`
/// are used for surrogate pairs in UCS and UTF-16, and consequently are not
/// valid Unicode code points. We must therefore avoid sampling values in this
/// range.
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct UniformChar {
sampler: UniformInt<u32>,
}
/// UTF-16 surrogate range start
const CHAR_SURROGATE_START: u32 = 0xD800;
/// UTF-16 surrogate range size
const CHAR_SURROGATE_LEN: u32 = 0xE000 - CHAR_SURROGATE_START;
/// Convert `char` to compressed `u32`
fn char_to_comp_u32(c: char) -> u32 {
match c as u32 {
c if c >= CHAR_SURROGATE_START => c - CHAR_SURROGATE_LEN,
c => c,
}
}
impl UniformSampler for UniformChar {
type X = char;
#[inline] // if the range is constant, this helps LLVM to do the
// calculations at compile-time.
fn new<B1, B2>(low_b: B1, high_b: B2) -> Self
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = char_to_comp_u32(*low_b.borrow());
let high = char_to_comp_u32(*high_b.borrow());
let sampler = UniformInt::<u32>::new(low, high);
UniformChar { sampler }
}
#[inline] // if the range is constant, this helps LLVM to do the
// calculations at compile-time.
fn new_inclusive<B1, B2>(low_b: B1, high_b: B2) -> Self
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = char_to_comp_u32(*low_b.borrow());
let high = char_to_comp_u32(*high_b.borrow());
let sampler = UniformInt::<u32>::new_inclusive(low, high);
UniformChar { sampler }
}
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
let mut x = self.sampler.sample(rng);
if x >= CHAR_SURROGATE_START {
x += CHAR_SURROGATE_LEN;
}
// SAFETY: x must not be in surrogate range or greater than char::MAX.
// This relies on range constructors which accept char arguments.
// Validity of input char values is assumed.
unsafe { core::char::from_u32_unchecked(x) }
}
}
/// The back-end implementing [`UniformSampler`] for floating-point types.
///
/// Unless you are implementing [`UniformSampler`] for your own type, this type
/// should not be used directly, use [`Uniform`] instead.
///
/// # Implementation notes
///
/// Instead of generating a float in the `[0, 1)` range using [`Standard`], the
/// `UniformFloat` implementation converts the output of an PRNG itself. This
/// way one or two steps can be optimized out.
///
/// The floats are first converted to a value in the `[1, 2)` interval using a
/// transmute-based method, and then mapped to the expected range with a
/// multiply and addition. Values produced this way have what equals 23 bits of
/// random digits for an `f32`, and 52 for an `f64`.
///
/// [`new`]: UniformSampler::new
/// [`new_inclusive`]: UniformSampler::new_inclusive
/// [`Standard`]: crate::distributions::Standard
#[derive(Clone, Copy, Debug, PartialEq)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct UniformFloat<X> {
low: X,
scale: X,
}
macro_rules! uniform_float_impl {
($ty:ty, $uty:ident, $f_scalar:ident, $u_scalar:ident, $bits_to_discard:expr) => {
impl SampleUniform for $ty {
type Sampler = UniformFloat<$ty>;
}
impl UniformSampler for UniformFloat<$ty> {
type X = $ty;
fn new<B1, B2>(low_b: B1, high_b: B2) -> Self
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = *low_b.borrow();
let high = *high_b.borrow();
debug_assert!(
low.all_finite(),
"Uniform::new called with `low` non-finite."
);
debug_assert!(
high.all_finite(),
"Uniform::new called with `high` non-finite."
);
assert!(low.all_lt(high), "Uniform::new called with `low >= high`");
let max_rand = <$ty>::splat(
(::core::$u_scalar::MAX >> $bits_to_discard).into_float_with_exponent(0) - 1.0,
);
let mut scale = high - low;
assert!(scale.all_finite(), "Uniform::new: range overflow");
loop {
let mask = (scale * max_rand + low).ge_mask(high);
if mask.none() {
break;
}
scale = scale.decrease_masked(mask);
}
debug_assert!(<$ty>::splat(0.0).all_le(scale));
UniformFloat { low, scale }
}
fn new_inclusive<B1, B2>(low_b: B1, high_b: B2) -> Self
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = *low_b.borrow();
let high = *high_b.borrow();
debug_assert!(
low.all_finite(),
"Uniform::new_inclusive called with `low` non-finite."
);
debug_assert!(
high.all_finite(),
"Uniform::new_inclusive called with `high` non-finite."
);
assert!(
low.all_le(high),
"Uniform::new_inclusive called with `low > high`"
);
let max_rand = <$ty>::splat(
(::core::$u_scalar::MAX >> $bits_to_discard).into_float_with_exponent(0) - 1.0,
);
let mut scale = (high - low) / max_rand;
assert!(scale.all_finite(), "Uniform::new_inclusive: range overflow");
loop {
let mask = (scale * max_rand + low).gt_mask(high);
if mask.none() {
break;
}
scale = scale.decrease_masked(mask);
}
debug_assert!(<$ty>::splat(0.0).all_le(scale));
UniformFloat { low, scale }
}
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
// Generate a value in the range [1, 2)
let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard).into_float_with_exponent(0);
// Get a value in the range [0, 1) in order to avoid
// overflowing into infinity when multiplying with scale
let value0_1 = value1_2 - 1.0;
// We don't use `f64::mul_add`, because it is not available with
// `no_std`. Furthermore, it is slower for some targets (but
// faster for others). However, the order of multiplication and
// addition is important, because on some platforms (e.g. ARM)
// it will be optimized to a single (non-FMA) instruction.
value0_1 * self.scale + self.low
}
#[inline]
fn sample_single<R: Rng + ?Sized, B1, B2>(low_b: B1, high_b: B2, rng: &mut R) -> Self::X
where
B1: SampleBorrow<Self::X> + Sized,
B2: SampleBorrow<Self::X> + Sized,
{
let low = *low_b.borrow();
let high = *high_b.borrow();
debug_assert!(
low.all_finite(),
"UniformSampler::sample_single called with `low` non-finite."
);
debug_assert!(
high.all_finite(),
"UniformSampler::sample_single called with `high` non-finite."
);
assert!(
low.all_lt(high),
"UniformSampler::sample_single: low >= high"
);
let mut scale = high - low;
assert!(scale.all_finite(), "UniformSampler::sample_single: range overflow");
loop {
// Generate a value in the range [1, 2)
let value1_2 =
(rng.gen::<$uty>() >> $bits_to_discard).into_float_with_exponent(0);
// Get a value in the range [0, 1) in order to avoid
// overflowing into infinity when multiplying with scale
let value0_1 = value1_2 - 1.0;
// Doing multiply before addition allows some architectures
// to use a single instruction.
let res = value0_1 * scale + low;
debug_assert!(low.all_le(res) || !scale.all_finite());
if res.all_lt(high) {
return res;
}
// This handles a number of edge cases.
// * `low` or `high` is NaN. In this case `scale` and
// `res` are going to end up as NaN.
// * `low` is negative infinity and `high` is finite.
// `scale` is going to be infinite and `res` will be
// NaN.
// * `high` is positive infinity and `low` is finite.
// `scale` is going to be infinite and `res` will
// be infinite or NaN (if value0_1 is 0).
// * `low` is negative infinity and `high` is positive
// infinity. `scale` will be infinite and `res` will
// be NaN.
// * `low` and `high` are finite, but `high - low`
// overflows to infinite. `scale` will be infinite
// and `res` will be infinite or NaN (if value0_1 is 0).
// So if `high` or `low` are non-finite, we are guaranteed
// to fail the `res < high` check above and end up here.
//
// While we technically should check for non-finite `low`
// and `high` before entering the loop, by doing the checks
// here instead, we allow the common case to avoid these
// checks. But we are still guaranteed that if `low` or
// `high` are non-finite we'll end up here and can do the
// appropriate checks.
//
// Likewise `high - low` overflowing to infinity is also
// rare, so handle it here after the common case.
let mask = !scale.finite_mask();
if mask.any() {
assert!(
low.all_finite() && high.all_finite(),
"Uniform::sample_single: low and high must be finite"
);
scale = scale.decrease_masked(mask);
}
}
}
}
};
}
uniform_float_impl! { f32, u32, f32, u32, 32 - 23 }
uniform_float_impl! { f64, u64, f64, u64, 64 - 52 }