From 96321cc443059811f19dabb9b529e810c6c3de1b Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Fri, 19 Jul 2024 15:41:11 +0100 Subject: [PATCH 1/9] cfg-gate IndexVec::USize This is not required with 32-bit usize --- src/seq/index.rs | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/seq/index.rs b/src/seq/index.rs index d7d1636570..18bb03c735 100644 --- a/src/seq/index.rs +++ b/src/seq/index.rs @@ -23,6 +23,9 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "std")] use std::collections::HashSet; +#[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))] +compile_error!("unsupported pointer width"); + /// A vector of indices. /// /// Multiple internal representations are possible. @@ -31,6 +34,7 @@ use std::collections::HashSet; pub enum IndexVec { #[doc(hidden)] U32(Vec), + #[cfg(target_pointer_width = "64")] #[doc(hidden)] USize(Vec), } @@ -41,6 +45,7 @@ impl IndexVec { pub fn len(&self) -> usize { match *self { IndexVec::U32(ref v) => v.len(), + #[cfg(target_pointer_width = "64")] IndexVec::USize(ref v) => v.len(), } } @@ -50,6 +55,7 @@ impl IndexVec { pub fn is_empty(&self) -> bool { match *self { IndexVec::U32(ref v) => v.is_empty(), + #[cfg(target_pointer_width = "64")] IndexVec::USize(ref v) => v.is_empty(), } } @@ -62,6 +68,7 @@ impl IndexVec { pub fn index(&self, index: usize) -> usize { match *self { IndexVec::U32(ref v) => v[index] as usize, + #[cfg(target_pointer_width = "64")] IndexVec::USize(ref v) => v[index], } } @@ -71,6 +78,7 @@ impl IndexVec { pub fn into_vec(self) -> Vec { match self { IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(), + #[cfg(target_pointer_width = "64")] IndexVec::USize(v) => v, } } @@ -80,6 +88,7 @@ impl IndexVec { pub fn iter(&self) -> IndexVecIter<'_> { match *self { IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), + #[cfg(target_pointer_width = "64")] IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), } } @@ -94,6 +103,7 @@ impl IntoIterator for IndexVec { fn into_iter(self) -> IndexVecIntoIter { match self { IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()), + #[cfg(target_pointer_width = "64")] IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()), } } @@ -104,10 +114,13 @@ impl PartialEq for IndexVec { use self::IndexVec::*; match (self, other) { (U32(v1), U32(v2)) => v1 == v2, + #[cfg(target_pointer_width = "64")] (USize(v1), USize(v2)) => v1 == v2, + #[cfg(target_pointer_width = "64")] (U32(v1), USize(v2)) => { (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)) } + #[cfg(target_pointer_width = "64")] (USize(v1), U32(v2)) => { (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)) } @@ -122,6 +135,7 @@ impl From> for IndexVec { } } +#[cfg(target_pointer_width = "64")] impl From> for IndexVec { #[inline] fn from(v: Vec) -> Self { @@ -225,8 +239,12 @@ where panic!("`amount` of samples must be less than or equal to `length`"); } if length > (u32::MAX as usize) { + #[cfg(target_pointer_width = "32")] + unreachable!(); + // We never want to use inplace here, but could use floyd's alg // Lazy version: always use the cache alg. + #[cfg(target_pointer_width = "64")] return sample_rejection(rng, length, amount); } let amount = amount as u32; @@ -285,6 +303,10 @@ where X: Into, { if length > (u32::MAX as usize) { + #[cfg(target_pointer_width = "32")] + unreachable!(); + + #[cfg(target_pointer_width = "64")] sample_efraimidis_spirakis(rng, length, weight, amount) } else { assert!(amount <= u32::MAX as usize); @@ -463,6 +485,7 @@ impl UInt for u32 { } } +#[cfg(target_pointer_width = "64")] impl UInt for usize { #[inline] fn zero() -> Self { @@ -521,20 +544,10 @@ mod test { #[test] #[cfg(feature = "serde1")] fn test_serialization_index_vec() { - let some_index_vec = IndexVec::from(vec![254_usize, 234, 2, 1]); + let some_index_vec = IndexVec::from(vec![254_u32, 234, 2, 1]); let de_some_index_vec: IndexVec = bincode::deserialize(&bincode::serialize(&some_index_vec).unwrap()).unwrap(); - match (some_index_vec, de_some_index_vec) { - (IndexVec::U32(a), IndexVec::U32(b)) => { - assert_eq!(a, b); - } - (IndexVec::USize(a), IndexVec::USize(b)) => { - assert_eq!(a, b); - } - _ => { - panic!("failed to seralize/deserialize `IndexVec`") - } - } + assert_eq!(some_index_vec, de_some_index_vec); } #[test] @@ -610,7 +623,7 @@ mod test { assert!((i as usize) < len); } } - IndexVec::USize(_) => panic!("expected `IndexVec::U32`"), + _ => panic!("expected `IndexVec::U32`"), } } From f78b841f07d7bee78fbd831c11f54d8d13063e58 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Fri, 19 Jul 2024 15:51:35 +0100 Subject: [PATCH 2/9] Replace IndexVec::USize with IndexVec::U64 --- src/seq/index.rs | 54 ++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/src/seq/index.rs b/src/seq/index.rs index 18bb03c735..cbd76ed2d3 100644 --- a/src/seq/index.rs +++ b/src/seq/index.rs @@ -36,7 +36,7 @@ pub enum IndexVec { U32(Vec), #[cfg(target_pointer_width = "64")] #[doc(hidden)] - USize(Vec), + U64(Vec), } impl IndexVec { @@ -46,7 +46,7 @@ impl IndexVec { match *self { IndexVec::U32(ref v) => v.len(), #[cfg(target_pointer_width = "64")] - IndexVec::USize(ref v) => v.len(), + IndexVec::U64(ref v) => v.len(), } } @@ -56,7 +56,7 @@ impl IndexVec { match *self { IndexVec::U32(ref v) => v.is_empty(), #[cfg(target_pointer_width = "64")] - IndexVec::USize(ref v) => v.is_empty(), + IndexVec::U64(ref v) => v.is_empty(), } } @@ -69,7 +69,7 @@ impl IndexVec { match *self { IndexVec::U32(ref v) => v[index] as usize, #[cfg(target_pointer_width = "64")] - IndexVec::USize(ref v) => v[index], + IndexVec::U64(ref v) => v[index] as usize, } } @@ -79,7 +79,7 @@ impl IndexVec { match self { IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(), #[cfg(target_pointer_width = "64")] - IndexVec::USize(v) => v, + IndexVec::U64(v) => v.into_iter().map(|i| i as usize).collect(), } } @@ -89,7 +89,7 @@ impl IndexVec { match *self { IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), #[cfg(target_pointer_width = "64")] - IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), + IndexVec::U64(ref v) => IndexVecIter::U64(v.iter()), } } } @@ -104,7 +104,7 @@ impl IntoIterator for IndexVec { match self { IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()), #[cfg(target_pointer_width = "64")] - IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()), + IndexVec::U64(v) => IndexVecIntoIter::U64(v.into_iter()), } } } @@ -115,14 +115,14 @@ impl PartialEq for IndexVec { match (self, other) { (U32(v1), U32(v2)) => v1 == v2, #[cfg(target_pointer_width = "64")] - (USize(v1), USize(v2)) => v1 == v2, + (U64(v1), U64(v2)) => v1 == v2, #[cfg(target_pointer_width = "64")] - (U32(v1), USize(v2)) => { - (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)) + (U32(v1), U64(v2)) => { + (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as u64 == *y)) } #[cfg(target_pointer_width = "64")] - (USize(v1), U32(v2)) => { - (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)) + (U64(v1), U32(v2)) => { + (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as u64)) } } } @@ -136,10 +136,10 @@ impl From> for IndexVec { } #[cfg(target_pointer_width = "64")] -impl From> for IndexVec { +impl From> for IndexVec { #[inline] - fn from(v: Vec) -> Self { - IndexVec::USize(v) + fn from(v: Vec) -> Self { + IndexVec::U64(v) } } @@ -149,7 +149,7 @@ pub enum IndexVecIter<'a> { #[doc(hidden)] U32(slice::Iter<'a, u32>), #[doc(hidden)] - USize(slice::Iter<'a, usize>), + U64(slice::Iter<'a, u64>), } impl<'a> Iterator for IndexVecIter<'a> { @@ -160,7 +160,7 @@ impl<'a> Iterator for IndexVecIter<'a> { use self::IndexVecIter::*; match *self { U32(ref mut iter) => iter.next().map(|i| *i as usize), - USize(ref mut iter) => iter.next().cloned(), + U64(ref mut iter) => iter.next().map(|i| *i as usize), } } @@ -168,7 +168,7 @@ impl<'a> Iterator for IndexVecIter<'a> { fn size_hint(&self) -> (usize, Option) { match *self { IndexVecIter::U32(ref v) => v.size_hint(), - IndexVecIter::USize(ref v) => v.size_hint(), + IndexVecIter::U64(ref v) => v.size_hint(), } } } @@ -181,7 +181,7 @@ pub enum IndexVecIntoIter { #[doc(hidden)] U32(vec::IntoIter), #[doc(hidden)] - USize(vec::IntoIter), + U64(vec::IntoIter), } impl Iterator for IndexVecIntoIter { @@ -192,7 +192,7 @@ impl Iterator for IndexVecIntoIter { use self::IndexVecIntoIter::*; match *self { U32(ref mut v) => v.next().map(|i| i as usize), - USize(ref mut v) => v.next(), + U64(ref mut v) => v.next().map(|i| i as usize), } } @@ -201,7 +201,7 @@ impl Iterator for IndexVecIntoIter { use self::IndexVecIntoIter::*; match *self { U32(ref v) => v.size_hint(), - USize(ref v) => v.size_hint(), + U64(ref v) => v.size_hint(), } } } @@ -245,7 +245,7 @@ where // We never want to use inplace here, but could use floyd's alg // Lazy version: always use the cache alg. #[cfg(target_pointer_width = "64")] - return sample_rejection(rng, length, amount); + return sample_rejection(rng, length as u64, amount as u64); } let amount = amount as u32; let length = length as u32; @@ -307,7 +307,11 @@ where unreachable!(); #[cfg(target_pointer_width = "64")] - sample_efraimidis_spirakis(rng, length, weight, amount) + { + let amount = amount as u64; + let length = length as u64; + sample_efraimidis_spirakis(rng, length, weight, amount) + } } else { assert!(amount <= u32::MAX as usize); let amount = amount as u32; @@ -486,7 +490,7 @@ impl UInt for u32 { } #[cfg(target_pointer_width = "64")] -impl UInt for usize { +impl UInt for u64 { #[inline] fn zero() -> Self { 0 @@ -499,7 +503,7 @@ impl UInt for usize { #[inline] fn as_usize(self) -> usize { - self + self as usize } } From 90afa222defcc7d9a8115268bfefb31236a8aeb9 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Fri, 19 Jul 2024 15:53:42 +0100 Subject: [PATCH 3/9] gen_index: explicitly use u64, not usize --- src/seq/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 0015517907..9261299b74 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -53,7 +53,11 @@ fn gen_index(rng: &mut R, ubound: usize) -> usize { if ubound <= (u32::MAX as usize) { rng.gen_range(0..ubound as u32) as usize } else { - rng.gen_range(0..ubound) + #[cfg(target_pointer_width = "32")] + unreachable!(); + + #[cfg(target_pointer_width = "64")] + return rng.gen_range(0..ubound as u64) as usize; } } From 3ad1d6e75b1b668b1c117d6613b9dff23a6a2469 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Fri, 19 Jul 2024 17:05:16 +0100 Subject: [PATCH 4/9] Add Rng::gen_index --- src/rng.rs | 84 +++++++++++++++++++++++++++++++++++++++++++++ src/seq/iterator.rs | 9 +++-- src/seq/mod.rs | 21 +----------- src/seq/slice.rs | 8 ++--- tests/gen_index.rs | 13 +++++++ 5 files changed, 106 insertions(+), 29 deletions(-) create mode 100644 tests/gen_index.rs diff --git a/src/rng.rs b/src/rng.rs index 9c015eddd4..26642b2cdd 100644 --- a/src/rng.rs +++ b/src/rng.rs @@ -136,6 +136,37 @@ pub trait Rng: RngCore { range.sample_single(self).unwrap() } + /// Generate a random `usize` value in the given range. + /// + /// This differs from [`Rng::gen_range`] in that the result is expected to + /// be portable across 32-bit and 64-bit targets. This is achieved by using + /// 32-bit sampling whenever possible (this also improves performance in + /// some benchmarks, depending on the generator). + /// + /// The `SampleIndex` trait is *intentionally* private (it not intended to + /// support generics externally). The trait supports types + /// [`RangeTo`](core::ops::RangeTo), [`Range`](core::ops::Range), + /// [`RangeInclusive`](core::ops::RangeInclusive) and + /// [`RangeToInclusive`](core::ops::RangeToInclusive) with + /// type parameter `Idx = usize`. + /// + /// # Example + /// + /// ``` + /// use rand::{thread_rng, Rng}; + /// + /// let mut rng = thread_rng(); + /// + /// let arr = ["a", "b", "c"]; + /// println!("{}", arr[rng.gen_index(..arr.len())]); + /// + /// let _ = rng.gen_index(..=9); + /// ``` + #[inline] + fn gen_index(&mut self, range: impl private::SampleIndex) -> usize { + range.sample_index(self) + } + /// Generate values via an iterator /// /// This is a just a wrapper over [`Rng::sample_iter`] using @@ -320,6 +351,59 @@ pub trait Rng: RngCore { impl Rng for R {} +mod private { + use super::Rng; + use crate::distr::uniform::{UniformInt, UniformSampler}; + use core::ops::{Range, RangeInclusive, RangeTo, RangeToInclusive}; + pub trait SampleIndex { + fn sample_index(self, rng: &mut R) -> usize; + } + + impl SampleIndex for RangeTo { + #[inline] + fn sample_index(self, rng: &mut R) -> usize { + let RangeTo { end } = self; + (0..end).sample_index(rng) + } + } + + impl SampleIndex for Range { + #[inline] + fn sample_index(self, rng: &mut R) -> usize { + let Range { start, end } = self; + assert!(end != 0, "cannot sample empty range"); + (start..=end - 1).sample_index(rng) + } + } + + impl SampleIndex for RangeToInclusive { + #[inline] + fn sample_index(self, rng: &mut R) -> usize { + let RangeToInclusive { end } = self; + (0..=end).sample_index(rng) + } + } + + impl SampleIndex for RangeInclusive { + #[inline] + fn sample_index(self, rng: &mut R) -> usize { + let (start, end) = self.into_inner(); + assert!(start <= end); + if end <= (u32::MAX as usize) { + UniformInt::::sample_single_inclusive(start as u32, end as u32, rng).unwrap() + as usize + } else { + #[cfg(not(target_pointer_width = "64"))] + unreachable!(); + + #[cfg(target_pointer_width = "64")] + return UniformInt::::sample_single_inclusive(start as u64, end as u64, rng) + .unwrap() as usize; + } + } + } +} + /// Types which may be filled with random data /// /// This trait allows arrays to be efficiently filled with random data. diff --git a/src/seq/iterator.rs b/src/seq/iterator.rs index 46ecdfeac8..a1606e49ef 100644 --- a/src/seq/iterator.rs +++ b/src/seq/iterator.rs @@ -9,7 +9,6 @@ //! `IteratorRandom` use super::coin_flipper::CoinFlipper; -use super::gen_index; #[allow(unused)] use super::IndexedRandom; use crate::Rng; @@ -71,7 +70,7 @@ pub trait IteratorRandom: Iterator + Sized { return match lower { 0 => None, 1 => self.next(), - _ => self.nth(gen_index(rng, lower)), + _ => self.nth(rng.gen_index(..lower)), }; } @@ -81,7 +80,7 @@ pub trait IteratorRandom: Iterator + Sized { // Continue until the iterator is exhausted loop { if lower > 1 { - let ix = gen_index(coin_flipper.rng, lower + consumed); + let ix = coin_flipper.rng.gen_index(..lower + consumed); let skip = if ix < lower { result = self.nth(ix); lower - (ix + 1) @@ -204,7 +203,7 @@ pub trait IteratorRandom: Iterator + Sized { // Continue, since the iterator was not exhausted for (i, elem) in self.enumerate() { - let k = gen_index(rng, i + 1 + amount); + let k = rng.gen_index(..i + 1 + amount); if let Some(slot) = buf.get_mut(k) { *slot = elem; } @@ -240,7 +239,7 @@ pub trait IteratorRandom: Iterator + Sized { // If the iterator stops once, then so do we. if reservoir.len() == amount { for (i, elem) in self.enumerate() { - let k = gen_index(rng, i + 1 + amount); + let k = rng.gen_index(..i + 1 + amount); if let Some(slot) = reservoir.get_mut(k) { *slot = elem; } diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 9261299b74..125062c8fc 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -43,27 +43,8 @@ pub use iterator::IteratorRandom; pub use slice::SliceChooseIter; pub use slice::{IndexedMutRandom, IndexedRandom, SliceRandom}; -use crate::Rng; - -// Sample a number uniformly between 0 and `ubound`. Uses 32-bit sampling where -// possible, primarily in order to produce the same output on 32-bit and 64-bit -// platforms. -#[inline] -fn gen_index(rng: &mut R, ubound: usize) -> usize { - if ubound <= (u32::MAX as usize) { - rng.gen_range(0..ubound as u32) as usize - } else { - #[cfg(target_pointer_width = "32")] - unreachable!(); - - #[cfg(target_pointer_width = "64")] - return rng.gen_range(0..ubound as u64) as usize; - } -} - /// Low-level API for sampling indices pub mod index { - use super::gen_index; use crate::Rng; #[cfg(feature = "alloc")] @@ -88,7 +69,7 @@ pub mod index { // Floyd's algorithm let mut indices = [0; N]; for (i, j) in (len - N..len).enumerate() { - let t = gen_index(rng, j + 1); + let t = rng.gen_index(..j + 1); if let Some(pos) = indices[0..i].iter().position(|&x| x == t) { indices[pos] = j; } diff --git a/src/seq/slice.rs b/src/seq/slice.rs index 7c86f00a73..e3d4de1505 100644 --- a/src/seq/slice.rs +++ b/src/seq/slice.rs @@ -9,7 +9,7 @@ //! `IndexedRandom`, `IndexedMutRandom`, `SliceRandom` use super::increasing_uniform::IncreasingUniform; -use super::{gen_index, index}; +use super::index; #[cfg(feature = "alloc")] use crate::distr::uniform::{SampleBorrow, SampleUniform}; #[cfg(feature = "alloc")] @@ -57,7 +57,7 @@ pub trait IndexedRandom: Index { if self.is_empty() { None } else { - Some(&self[gen_index(rng, self.len())]) + Some(&self[rng.gen_index(..self.len())]) } } @@ -253,7 +253,7 @@ pub trait IndexedMutRandom: IndexedRandom + IndexMut { None } else { let len = self.len(); - Some(&mut self[gen_index(rng, len)]) + Some(&mut self[rng.gen_index(..len)]) } } @@ -404,7 +404,7 @@ impl SliceRandom for [T] { } } else { for i in m..self.len() { - let index = gen_index(rng, i + 1); + let index = rng.gen_index(..i + 1); self.swap(i, index); } } diff --git a/tests/gen_index.rs b/tests/gen_index.rs new file mode 100644 index 0000000000..247f3fcdc8 --- /dev/null +++ b/tests/gen_index.rs @@ -0,0 +1,13 @@ +// An external test assures that the usage of a private trait by Rng::gen_index +// does not cause issues with intended usages. + +use rand::Rng; + +#[test] +fn sample_index() { + let mut rng = rand::thread_rng(); + assert!(rng.gen_index(..4) < 4); + assert_eq!(rng.gen_index(..=0), 0); + assert_eq!(rng.gen_index(0..1), 0); + assert!(rng.gen_index(0..=9) < 10); +} From 63e823c91fe9656c569970c9050533ea21f59e17 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Fri, 19 Jul 2024 15:56:00 +0100 Subject: [PATCH 5/9] Remove all support for directly generating usize, isize values --- rand_distr/src/weighted_alias.rs | 2 -- src/distr/integer.rs | 36 +++----------------------------- src/distr/uniform_int.rs | 4 +--- src/distr/utils.rs | 20 ------------------ src/distr/weighted_index.rs | 2 +- src/lib.rs | 4 ++-- src/rng.rs | 4 ++-- 7 files changed, 9 insertions(+), 63 deletions(-) diff --git a/rand_distr/src/weighted_alias.rs b/rand_distr/src/weighted_alias.rs index e8c455dcb1..5d512f32a5 100644 --- a/rand_distr/src/weighted_alias.rs +++ b/rand_distr/src/weighted_alias.rs @@ -359,13 +359,11 @@ macro_rules! impl_weight_for_int { impl_weight_for_float!(f64); impl_weight_for_float!(f32); -impl_weight_for_int!(usize); impl_weight_for_int!(u128); impl_weight_for_int!(u64); impl_weight_for_int!(u32); impl_weight_for_int!(u16); impl_weight_for_int!(u8); -impl_weight_for_int!(isize); impl_weight_for_int!(i128); impl_weight_for_int!(i64); impl_weight_for_int!(i32); diff --git a/src/distr/integer.rs b/src/distr/integer.rs index 49546a3941..8cf9ffc688 100644 --- a/src/distr/integer.rs +++ b/src/distr/integer.rs @@ -19,8 +19,8 @@ use core::arch::x86_64::__m512i; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::{__m128i, __m256i}; use core::num::{ - NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize, NonZeroU128, - NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, + NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroU128, NonZeroU16, + NonZeroU32, NonZeroU64, NonZeroU8, }; #[cfg(feature = "simd_support")] use core::simd::*; @@ -63,20 +63,6 @@ impl Distribution for Standard { } } -impl Distribution for Standard { - #[inline] - #[cfg(any(target_pointer_width = "32", target_pointer_width = "16"))] - fn sample(&self, rng: &mut R) -> usize { - rng.next_u32() as usize - } - - #[inline] - #[cfg(target_pointer_width = "64")] - fn sample(&self, rng: &mut R) -> usize { - rng.next_u64() as usize - } -} - macro_rules! impl_int_from_uint { ($ty:ty, $uty:ty) => { impl Distribution<$ty> for Standard { @@ -93,7 +79,6 @@ impl_int_from_uint! { i16, u16 } impl_int_from_uint! { i32, u32 } impl_int_from_uint! { i64, u64 } impl_int_from_uint! { i128, u128 } -impl_int_from_uint! { isize, usize } macro_rules! impl_nzint { ($ty:ty, $new:path) => { @@ -114,14 +99,12 @@ impl_nzint!(NonZeroU16, NonZeroU16::new); impl_nzint!(NonZeroU32, NonZeroU32::new); impl_nzint!(NonZeroU64, NonZeroU64::new); impl_nzint!(NonZeroU128, NonZeroU128::new); -impl_nzint!(NonZeroUsize, NonZeroUsize::new); impl_nzint!(NonZeroI8, NonZeroI8::new); impl_nzint!(NonZeroI16, NonZeroI16::new); impl_nzint!(NonZeroI32, NonZeroI32::new); impl_nzint!(NonZeroI64, NonZeroI64::new); impl_nzint!(NonZeroI128, NonZeroI128::new); -impl_nzint!(NonZeroIsize, NonZeroIsize::new); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] macro_rules! x86_intrinsic_impl { @@ -163,7 +146,7 @@ macro_rules! simd_impl { } #[cfg(feature = "simd_support")] -simd_impl!(u8, i8, u16, i16, u32, i32, u64, i64, usize, isize); +simd_impl!(u8, i8, u16, i16, u32, i32, u64, i64); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] x86_intrinsic_impl!( @@ -191,14 +174,12 @@ mod tests { fn test_integers() { let mut rng = crate::test::rng(806); - rng.sample::(Standard); rng.sample::(Standard); rng.sample::(Standard); rng.sample::(Standard); rng.sample::(Standard); rng.sample::(Standard); - rng.sample::(Standard); rng.sample::(Standard); rng.sample::(Standard); rng.sample::(Standard); @@ -239,17 +220,6 @@ mod tests { 111087889832015897993126088499035356354, ], ); - #[cfg(any(target_pointer_width = "32", target_pointer_width = "16"))] - test_samples(0usize, &[2220326409, 2575017975, 2018088303]); - #[cfg(target_pointer_width = "64")] - test_samples( - 0usize, - &[ - 11059617991457472009, - 16096616328739788143, - 1487364411147516184, - ], - ); test_samples(0i8, &[9, -9, 111]); // Skip further i* types: they are simple reinterpretation of u* samples diff --git a/src/distr/uniform_int.rs b/src/distr/uniform_int.rs index 5a6254058e..2e950586da 100644 --- a/src/distr/uniform_int.rs +++ b/src/distr/uniform_int.rs @@ -258,12 +258,10 @@ uniform_int_impl! { i16, u16, u32 } uniform_int_impl! { i32, u32, u32 } uniform_int_impl! { i64, u64, u64 } uniform_int_impl! { i128, u128, u128 } -uniform_int_impl! { isize, usize, usize } uniform_int_impl! { u8, u8, u32 } uniform_int_impl! { u16, u16, u32 } uniform_int_impl! { u32, u32, u32 } uniform_int_impl! { u64, u64, u64 } -uniform_int_impl! { usize, usize, usize } uniform_int_impl! { u128, u128, u128 } #[cfg(feature = "simd_support")] @@ -476,7 +474,7 @@ mod tests { );)* }}; } - t!(i8, i16, i32, i64, isize, u8, u16, u32, u64, usize, i128, u128); + t!(i8, i16, i32, i64, u8, u16, u32, u64, i128, u128); #[cfg(feature = "simd_support")] { diff --git a/src/distr/utils.rs b/src/distr/utils.rs index b54dc6d6c4..0da96bd714 100644 --- a/src/distr/utils.rs +++ b/src/distr/utils.rs @@ -124,26 +124,6 @@ macro_rules! wmul_impl_large { } wmul_impl_large! { u128, 64 } -macro_rules! wmul_impl_usize { - ($ty:ty) => { - impl WideningMultiply for usize { - type Output = (usize, usize); - - #[inline(always)] - fn wmul(self, x: usize) -> Self::Output { - let (high, low) = (self as $ty).wmul(x as $ty); - (high as usize, low as usize) - } - } - }; -} -#[cfg(target_pointer_width = "16")] -wmul_impl_usize! { u16 } -#[cfg(target_pointer_width = "32")] -wmul_impl_usize! { u32 } -#[cfg(target_pointer_width = "64")] -wmul_impl_usize! { u64 } - #[cfg(feature = "simd_support")] mod simd_wmul { use super::*; diff --git a/src/distr/weighted_index.rs b/src/distr/weighted_index.rs index c0ceefee5d..6f8b2e9187 100644 --- a/src/distr/weighted_index.rs +++ b/src/distr/weighted_index.rs @@ -696,7 +696,7 @@ mod test { #[test] fn overflow() { assert_eq!( - WeightedIndex::new([2, usize::MAX]), + WeightedIndex::new([2, u32::MAX]), Err(WeightError::Overflow) ); } diff --git a/src/lib.rs b/src/lib.rs index ce0206db09..3abbc5a266 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -179,13 +179,13 @@ mod test { #[test] #[cfg(all(feature = "std", feature = "std_rng", feature = "getrandom"))] fn test_random() { - let _n: usize = random(); + let _n: u64 = random(); let _f: f32 = random(); let _o: Option> = random(); #[allow(clippy::type_complexity)] let _many: ( (), - (usize, isize, Option<(u32, (bool,))>), + Option<(u32, (bool,))>, (u8, i8, u16, i16, u32, i32, u64, i64), (f32, (f64, (f64,))), ) = random(); diff --git a/src/rng.rs b/src/rng.rs index 26642b2cdd..c8be7d1a6f 100644 --- a/src/rng.rs +++ b/src/rng.rs @@ -486,8 +486,8 @@ macro_rules! impl_fill { } } -impl_fill!(u16, u32, u64, usize, u128,); -impl_fill!(i8, i16, i32, i64, isize, i128,); +impl_fill!(u16, u32, u64, u128,); +impl_fill!(i8, i16, i32, i64, i128,); impl Fill for [T; N] where From e043590ab6f834d73521285bcf3c85c5480fe40b Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Fri, 26 Jul 2024 08:30:42 +0100 Subject: [PATCH 6/9] Use the only pub non-feature-gated RNG --- tests/gen_index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gen_index.rs b/tests/gen_index.rs index 247f3fcdc8..6985cd9082 100644 --- a/tests/gen_index.rs +++ b/tests/gen_index.rs @@ -5,7 +5,7 @@ use rand::Rng; #[test] fn sample_index() { - let mut rng = rand::thread_rng(); + let mut rng = rand::rngs::mock::StepRng::new(0, 579682777108047081); assert!(rng.gen_index(..4) < 4); assert_eq!(rng.gen_index(..=0), 0); assert_eq!(rng.gen_index(0..1), 0); From e0e1ac3f36c413e93be9c06697013c8f067a77f1 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Fri, 26 Jul 2024 08:34:12 +0100 Subject: [PATCH 7/9] Remove isize/usize from benches --- benches/benches/base_distributions.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/benches/benches/base_distributions.rs b/benches/benches/base_distributions.rs index 17202a3035..5cbf4041f0 100644 --- a/benches/benches/base_distributions.rs +++ b/benches/benches/base_distributions.rs @@ -136,11 +136,6 @@ distr_int!(distr_uniform_i16, i16, Uniform::new(-500i16, 2000).unwrap()); distr_int!(distr_uniform_i32, i32, Uniform::new(-200_000_000i32, 800_000_000).unwrap()); distr_int!(distr_uniform_i64, i64, Uniform::new(3i64, 123_456_789_123).unwrap()); distr_int!(distr_uniform_i128, i128, Uniform::new(-123_456_789_123i128, 123_456_789_123_456_789).unwrap()); -distr_int!(distr_uniform_usize16, usize, Uniform::new(0usize, 0xb9d7).unwrap()); -distr_int!(distr_uniform_usize32, usize, Uniform::new(0usize, 0x548c0f43).unwrap()); -#[cfg(target_pointer_width = "64")] -distr_int!(distr_uniform_usize64, usize, Uniform::new(0usize, 0x3a42714f2bf927a8).unwrap()); -distr_int!(distr_uniform_isize, isize, Uniform::new(-1060478432isize, 1858574057).unwrap()); distr_float!(distr_uniform_f32, f32, Uniform::new(2.26f32, 2.319).unwrap()); distr_float!(distr_uniform_f64, f64, Uniform::new(2.26f64, 2.319).unwrap()); From 565417eabeb85c5501cc06777d91b162942fc62b Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Fri, 26 Jul 2024 08:44:59 +0100 Subject: [PATCH 8/9] Add CHANGELOG entry --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a098453c0..5457f8472d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update. - Allow `UniformFloat::new` samples and `UniformFloat::sample_single` to yield `high` (#1462) - Fix portability of `rand::distributions::Slice` (#1469) - Rename `rand::distributions` to `rand::distr` (#1470) +- Add `Rng::gen_index` (#1471) +- Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1471) ## [0.9.0-alpha.1] - 2024-03-18 - Add the `Slice::num_choices` method to the Slice distribution (#1402) From 04f005cdad0f66a47ee41d7e6dbcdcd56a31feae Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Sat, 27 Jul 2024 08:54:29 +0100 Subject: [PATCH 9/9] Add UniformUsize (Uniform support for usize) --- CHANGELOG.md | 2 +- src/distr/slice.rs | 35 +--------- src/distr/uniform.rs | 2 +- src/distr/uniform_int.rs | 136 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5457f8472d..9d02258f7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update. - Allow `UniformFloat::new` samples and `UniformFloat::sample_single` to yield `high` (#1462) - Fix portability of `rand::distributions::Slice` (#1469) - Rename `rand::distributions` to `rand::distr` (#1470) -- Add `Rng::gen_index` (#1471) +- Add `Rng::gen_index` and `UniformUsize` (#1471) - Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1471) ## [0.9.0-alpha.1] - 2024-03-18 diff --git a/src/distr/slice.rs b/src/distr/slice.rs index 4677b4e89e..d201caa246 100644 --- a/src/distr/slice.rs +++ b/src/distr/slice.rs @@ -8,40 +8,11 @@ use core::num::NonZeroUsize; -use crate::distr::{Distribution, Uniform}; -use crate::Rng; +use crate::distr::uniform::{UniformSampler, UniformUsize}; +use crate::distr::Distribution; #[cfg(feature = "alloc")] use alloc::string::String; -#[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))] -compile_error!("unsupported pointer width"); - -#[derive(Debug, Clone, Copy)] -enum UniformUsize { - U32(Uniform), - #[cfg(target_pointer_width = "64")] - U64(Uniform), -} - -impl UniformUsize { - pub fn new(ubound: usize) -> Result { - #[cfg(target_pointer_width = "64")] - if ubound > (u32::MAX as usize) { - return Uniform::new(0, ubound as u64).map(UniformUsize::U64); - } - - Uniform::new(0, ubound as u32).map(UniformUsize::U32) - } - - pub fn sample(&self, rng: &mut R) -> usize { - match self { - UniformUsize::U32(uu) => uu.sample(rng) as usize, - #[cfg(target_pointer_width = "64")] - UniformUsize::U64(uu) => uu.sample(rng) as usize, - } - } -} - /// A distribution to sample items uniformly from a slice. /// /// [`Slice::new`] constructs a distribution referencing a slice and uniformly @@ -110,7 +81,7 @@ impl<'a, T> Slice<'a, T> { Ok(Self { slice, - range: UniformUsize::new(num_choices.get()).unwrap(), + range: UniformUsize::new(0, num_choices.get()).unwrap(), num_choices, }) } diff --git a/src/distr/uniform.rs b/src/distr/uniform.rs index fa245c3aaf..4da906c9c1 100644 --- a/src/distr/uniform.rs +++ b/src/distr/uniform.rs @@ -112,7 +112,7 @@ pub use float::UniformFloat; #[path = "uniform_int.rs"] mod int; #[doc(inline)] -pub use int::UniformInt; +pub use int::{UniformInt, UniformUsize}; #[path = "uniform_other.rs"] mod other; diff --git a/src/distr/uniform_int.rs b/src/distr/uniform_int.rs index 2e950586da..c915f1dbf5 100644 --- a/src/distr/uniform_int.rs +++ b/src/distr/uniform_int.rs @@ -383,6 +383,142 @@ macro_rules! uniform_simd_int_impl { #[cfg(feature = "simd_support")] uniform_simd_int_impl! { (u8, i8), (u16, i16), (u32, i32), (u64, i64) } +/// The back-end implementing [`UniformSampler`] for `usize`. +/// +/// # Implementation notes +/// +/// Sampling a `usize` value is usually used in relation to the length of an +/// array or other memory structure, thus it is reasonable to assume that the +/// vast majority of use-cases will have a maximum size under [`u32::MAX`]. +/// In part to optimise for this use-case, but mostly to ensure that results +/// are portable across 32-bit and 64-bit architectures (as far as is possible), +/// this implementation will use 32-bit sampling when possible. +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct UniformUsize(UniformUsizeImpl); + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +impl SampleUniform for usize { + type Sampler = UniformUsize; +} + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UniformUsizeImpl { + U32(UniformInt), + #[cfg(target_pointer_width = "64")] + U64(UniformInt), +} + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +impl UniformSampler for UniformUsize { + type X = usize; + + #[inline] // if the range is constant, this helps LLVM to do the + // calculations at compile-time. + fn new(low_b: B1, high_b: B2) -> Result + where + B1: SampleBorrow + Sized, + B2: SampleBorrow + Sized, + { + let low = *low_b.borrow(); + let high = *high_b.borrow(); + if !(low < high) { + return Err(Error::EmptyRange); + } + + #[cfg(target_pointer_width = "64")] + if high > (u32::MAX as usize) { + return UniformInt::new(low as u64, high as u64) + .map(|ui| UniformUsize(UniformUsizeImpl::U64(ui))); + } + + UniformInt::new(low as u32, high as u32).map(|ui| UniformUsize(UniformUsizeImpl::U32(ui))) + } + + #[inline] // if the range is constant, this helps LLVM to do the + // calculations at compile-time. + fn new_inclusive(low_b: B1, high_b: B2) -> Result + where + B1: SampleBorrow + Sized, + B2: SampleBorrow + Sized, + { + let low = *low_b.borrow(); + let high = *high_b.borrow(); + if !(low <= high) { + return Err(Error::EmptyRange); + } + + #[cfg(target_pointer_width = "64")] + if high > (u32::MAX as usize) { + return UniformInt::new_inclusive(low as u64, high as u64) + .map(|ui| UniformUsize(UniformUsizeImpl::U64(ui))); + } + + UniformInt::new_inclusive(low as u32, high as u32) + .map(|ui| UniformUsize(UniformUsizeImpl::U32(ui))) + } + + #[inline] + fn sample(&self, rng: &mut R) -> usize { + match self.0 { + UniformUsizeImpl::U32(uu) => uu.sample(rng) as usize, + #[cfg(target_pointer_width = "64")] + UniformUsizeImpl::U64(uu) => uu.sample(rng) as usize, + } + } + + #[inline] + fn sample_single( + low_b: B1, + high_b: B2, + rng: &mut R, + ) -> Result + where + B1: SampleBorrow + Sized, + B2: SampleBorrow + Sized, + { + let low = *low_b.borrow(); + let high = *high_b.borrow(); + if !(low < high) { + return Err(Error::EmptyRange); + } + + #[cfg(target_pointer_width = "64")] + if high > (u32::MAX as usize) { + return UniformInt::::sample_single(low as u64, high as u64, rng) + .map(|x| x as usize); + } + + UniformInt::::sample_single(low as u32, high as u32, rng).map(|x| x as usize) + } + + #[inline] + fn sample_single_inclusive( + low_b: B1, + high_b: B2, + rng: &mut R, + ) -> Result + where + B1: SampleBorrow + Sized, + B2: SampleBorrow + Sized, + { + let low = *low_b.borrow(); + let high = *high_b.borrow(); + if !(low <= high) { + return Err(Error::EmptyRange); + } + + #[cfg(target_pointer_width = "64")] + if high > (u32::MAX as usize) { + return UniformInt::::sample_single_inclusive(low as u64, high as u64, rng) + .map(|x| x as usize); + } + + UniformInt::::sample_single_inclusive(low as u32, high as u32, rng).map(|x| x as usize) + } +} + #[cfg(test)] mod tests { use super::*;