Skip to content

Commit

Permalink
sample_indices: always shuffle. Floyd's alg: optimise.
Browse files Browse the repository at this point in the history
  • Loading branch information
dhardy committed Jul 30, 2018
1 parent 805022c commit 19897e5
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 59 deletions.
5 changes: 3 additions & 2 deletions benches/seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ macro_rules! seq_slice_choose_multiple {
// Collect full result to prevent unwanted shortcuts getting
// first element (in case sample_indices returns an iterator).
for (slot, sample) in result.iter_mut().zip(
x.choose_multiple(&mut rng, $amount, false)) {
x.choose_multiple(&mut rng, $amount)) {
*slot = *sample;
}
result[$amount-1]
Expand Down Expand Up @@ -87,7 +87,7 @@ macro_rules! sample_indices {
fn $name(b: &mut Bencher) {
let mut rng = SmallRng::from_rng(thread_rng()).unwrap();
b.iter(|| {
index::$fn(&mut rng, $length, $amount, false)
index::$fn(&mut rng, $length, $amount)
})
}
}
Expand All @@ -98,5 +98,6 @@ sample_indices!(misc_sample_indices_10_of_1k, sample, 10, 1000);
sample_indices!(misc_sample_indices_100_of_1k, sample, 100, 1000);
sample_indices!(misc_sample_indices_100_of_1M, sample, 100, 1000_000);
sample_indices!(misc_sample_indices_100_of_1G, sample, 100, 1000_000_000);
sample_indices!(misc_sample_indices_200_of_1G, sample, 200, 1000_000_000);
sample_indices!(misc_sample_indices_400_of_1G, sample, 400, 1000_000_000);
sample_indices!(misc_sample_indices_600_of_1G, sample, 600, 1000_000_000);
90 changes: 51 additions & 39 deletions src/seq/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,21 +158,15 @@ impl Iterator for IndexVecIntoIter {
impl ExactSizeIterator for IndexVecIntoIter {}


/// Randomly sample exactly `amount` distinct indices from `0..length`.
///
/// If `shuffled == true` then the sampled values will be fully shuffled;
/// otherwise the values may only partially shuffled, depending on the
/// algorithm used (i.e. biases may exist in the ordering of sampled elements).
/// Depending on the algorithm used internally, full shuffling may add
/// significant overhead for `amount` > 10 or so, but not more than double
/// the time and often much less.
/// Randomly sample exactly `amount` distinct indices from `0..length`, and
/// return them in random order (fully shuffled).
///
/// This method is used internally by the slice sampling methods, but it can
/// sometimes be useful to have the indices themselves so this is provided as
/// an alternative.
///
/// The implementation used is not specified; we automatically select the
/// fastest available implementation for the `length` and `amount` parameters
/// fastest available algorithm for the `length` and `amount` parameters
/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking,
/// complexity is `O(amount)`, except that when `amount` is small, performance
/// is closer to `O(amount^2)`, and when `length` is close to `amount` then
Expand All @@ -186,8 +180,7 @@ impl ExactSizeIterator for IndexVecIntoIter {}
/// to adapt the internal `sample_floyd` implementation.
///
/// Panics if `amount > length`.
pub fn sample<R>(rng: &mut R, length: usize, amount: usize,
shuffled: bool) -> IndexVec
pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
where R: Rng + ?Sized,
{
if amount > length {
Expand All @@ -205,16 +198,16 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize,
// https://github.com/rust-lang-nursery/rand/pull/479
// We do some calculations with f32. Accuracy is not very important.

if amount < 217 {
const C: [[f32; 2]; 2] = [[1.2, 6.0/45.0], [10.0, 70.0/9.0]];
if amount < 163 {
const C: [[f32; 2]; 2] = [[1.6, 8.0/45.0], [10.0, 70.0/9.0]];
let j = if length < 500_000 { 0 } else { 1 };
let amount_fp = amount as f32;
let m4 = C[0][j] * amount_fp;
// Short-cut: when amount < 12, floyd's is always faster
if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp {
sample_inplace(rng, length, amount)
} else {
sample_floyd(rng, length, amount, shuffled)
sample_floyd(rng, length, amount)
}
} else {
const C: [f32; 2] = [270.0, 330.0/9.0];
Expand All @@ -232,29 +225,50 @@ pub fn sample<R>(rng: &mut R, length: usize, amount: usize,
/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's
/// combination algorithm.
///
/// If `shuffled == false`, the values are only partially shuffled (i.e. biases
/// exist in the ordering of sampled elements). If `shuffled == true`, the
/// values are fully shuffled.
/// The output values are fully shuffled. (Overhead is under 50%.)
///
/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> IndexVec
fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where R: Rng + ?Sized,
{
// Shouldn't this be on std::slice?
fn find_pos<T: Copy + PartialEq<T>>(slice: &[T], elt: T) -> Option<usize> {
for i in 0..slice.len() {
if slice[i] == elt {
return Some(i);
}
}
None
}

// For small amount we use Floyd's fully-shuffled variant. For larger
// amounts this is slow due to Vec::insert performance, so we shuffle
// afterwards. Benchmarks show little overhead from extra logic.
let floyd_shuffle = amount < 50;

debug_assert!(amount <= length);
let mut indices = Vec::with_capacity(amount as usize);
for j in length - amount .. length {
let t = rng.gen_range(0, j + 1);
if indices.contains(&t) {
indices.push(j)
if floyd_shuffle {
if let Some(pos) = find_pos(&indices, t) {
indices.insert(pos, j);
continue;
}
} else {
indices.push(t)
};
if indices.contains(&t) {
indices.push(j);
continue;
}
}
indices.push(t);
}
if shuffled {
// Note that there is a variant of Floyd's algorithm with native full
// shuffling, but it is slow because it requires arbitrary insertions.
use super::SliceRandom;
indices.shuffle(rng);
if !floyd_shuffle {
// Reimplement SliceRandom::shuffle with smaller indices
for i in (1..amount).rev() {
// invariant: elements with index > i have been locked in place.
indices.swap(i as usize, rng.gen_range(0, i + 1) as usize);
}
}
IndexVec::from(indices)
}
Expand All @@ -270,9 +284,7 @@ fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32, shuffled: bool) -> Ind
/// of memory; because of this we only implement for `u32` index (which improves
/// performance in all cases).
///
/// This is likely the fastest for small lengths since it avoids the need for
/// allocations. Set-up is `O(length)` time and memory and shuffling is
/// `O(amount)` time.
/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where R: Rng + ?Sized,
{
Expand Down Expand Up @@ -330,16 +342,16 @@ mod test {

assert_eq!(sample_rejection(&mut r, 1, 0).len(), 0);

assert_eq!(sample_floyd(&mut r, 0, 0, false).len(), 0);
assert_eq!(sample_floyd(&mut r, 1, 0, false).len(), 0);
assert_eq!(sample_floyd(&mut r, 1, 1, false).into_vec(), vec![0]);
assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0);
assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0);
assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]);

// These algorithms should be fast with big numbers. Test average.
let sum: usize = sample_rejection(&mut r, 1 << 25, 10)
.into_iter().sum();
assert!(1 << 25 < sum && sum < (1 << 25) * 25);

let sum: usize = sample_floyd(&mut r, 1 << 25, 10, false)
let sum: usize = sample_floyd(&mut r, 1 << 25, 10)
.into_iter().sum();
assert!(1 << 25 < sum && sum < (1 << 25) * 25);
}
Expand All @@ -358,27 +370,27 @@ mod test {
// A small length and relatively large amount should use inplace
r.fill(&mut seed);
let (length, amount): (usize, usize) = (100, 50);
let v1 = sample(&mut xor_rng(seed), length, amount, true);
let v1 = sample(&mut xor_rng(seed), length, amount);
let v2 = sample_inplace(&mut xor_rng(seed), length as u32, amount as u32);
assert!(v1.iter().all(|e| e < length));
assert_eq!(v1, v2);

// Test Floyd's alg does produce different results
let v3 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32, true);
let v3 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32);
assert!(v1 != v3);

// A large length and small amount should use Floyd
r.fill(&mut seed);
let (length, amount): (usize, usize) = (1<<20, 50);
let v1 = sample(&mut xor_rng(seed), length, amount, true);
let v2 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32, true);
let v1 = sample(&mut xor_rng(seed), length, amount);
let v2 = sample_floyd(&mut xor_rng(seed), length as u32, amount as u32);
assert!(v1.iter().all(|e| e < length));
assert_eq!(v1, v2);

// A large length and larger amount should use cache
r.fill(&mut seed);
let (length, amount): (usize, usize) = (1<<20, 600);
let v1 = sample(&mut xor_rng(seed), length, amount, true);
let v1 = sample(&mut xor_rng(seed), length, amount);
let v2 = sample_rejection(&mut xor_rng(seed), length, amount);
assert!(v1.iter().all(|e| e < length));
assert_eq!(v1, v2);
Expand Down
28 changes: 10 additions & 18 deletions src/seq/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,11 @@ pub trait SliceRandom {
where R: Rng + ?Sized;

/// Produces an iterator that chooses `amount` elements from the slice at
/// random without repeating any.
///
/// random without repeating any, and returns them in random order.
///
/// In case this API is not sufficiently flexible, use `index::sample` then
/// apply the indices to the slice.
///
/// If `shuffled == true` then the sampled values will be fully shuffled;
/// otherwise the values may only partially shuffled, depending on the
/// algorithm used (i.e. biases may exist in the ordering of sampled
/// elements). Depending on the algorithm used internally, full shuffling
/// may add significant overhead for `amount` > 10 or so, but not more
/// than double the time and often much less.
///
/// Complexity is expected to be the same as `index::sample`.
///
/// # Example
Expand All @@ -80,16 +73,16 @@ pub trait SliceRandom {
/// let sample = "Hello, audience!".as_bytes();
///
/// // collect the results into a vector:
/// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3, true).cloned().collect();
/// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3).cloned().collect();
///
/// // store in a buffer:
/// let mut buf = [0u8; 5];
/// for (b, slot) in sample.choose_multiple(&mut rng, buf.len(), true).zip(buf.iter_mut()) {
/// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) {
/// *slot = *b;
/// }
/// ```
#[cfg(feature = "alloc")]
fn choose_multiple<R>(&self, rng: &mut R, amount: usize, shuffled: bool) -> SliceChooseIter<Self, Self::Item>
fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item>
where R: Rng + ?Sized;

/// Similar to [`choose`], where the likelihood of each outcome may be
Expand Down Expand Up @@ -315,15 +308,15 @@ impl<T> SliceRandom for [T] {
}

#[cfg(feature = "alloc")]
fn choose_multiple<R>(&self, rng: &mut R, amount: usize, shuffled: bool)
fn choose_multiple<R>(&self, rng: &mut R, amount: usize)
-> SliceChooseIter<Self, Self::Item>
where R: Rng + ?Sized
{
let amount = ::core::cmp::min(amount, self.len());
SliceChooseIter {
slice: self,
_phantom: Default::default(),
indices: index::sample(rng, self.len(), amount, shuffled).into_iter(),
indices: index::sample(rng, self.len(), amount).into_iter(),
}
}

Expand Down Expand Up @@ -460,7 +453,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T>
where R: Rng + ?Sized,
T: Clone
{
let indices = index::sample(rng, slice.len(), amount, true).into_iter();
let indices = index::sample(rng, slice.len(), amount).into_iter();

let mut out = Vec::with_capacity(amount);
out.extend(indices.map(|i| slice[i].clone()));
Expand All @@ -483,7 +476,7 @@ pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T>
pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T>
where R: Rng + ?Sized
{
let indices = index::sample(rng, slice.len(), amount, true).into_iter();
let indices = index::sample(rng, slice.len(), amount).into_iter();

let mut out = Vec::with_capacity(amount);
out.extend(indices.map(|i| &slice[i]));
Expand Down Expand Up @@ -679,8 +672,7 @@ mod test {
r.fill(&mut seed);

// assert the basics work
let regular = index::sample(
&mut xor_rng(seed), length, amount, true);
let regular = index::sample(&mut xor_rng(seed), length, amount);
assert_eq!(regular.len(), amount);
assert!(regular.iter().all(|e| e < length));

Expand Down

0 comments on commit 19897e5

Please sign in to comment.