Skip to content

Commit

Permalink
fill_via_chunks: mutate src on BE (small optimisation) (#1182)
Browse files Browse the repository at this point in the history
* fill_via_chunks: mutate src on BE (small optimisation)

* Add doc to fill_via_chunks
  • Loading branch information
dhardy committed Dec 7, 2022
1 parent 19169cb commit 50b9a44
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 35 deletions.
4 changes: 2 additions & 2 deletions rand_core/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ impl<R: BlockRngCore<Item = u32>> RngCore for BlockRng<R> {
self.generate_and_set(0);
}
let (consumed_u32, filled_u8) =
fill_via_u32_chunks(&self.results.as_ref()[self.index..], &mut dest[read_len..]);
fill_via_u32_chunks(&mut self.results.as_mut()[self.index..], &mut dest[read_len..]);

self.index += consumed_u32;
read_len += filled_u8;
Expand Down Expand Up @@ -387,7 +387,7 @@ impl<R: BlockRngCore<Item = u64>> RngCore for BlockRng64<R> {
}

let (consumed_u64, filled_u8) = fill_via_u64_chunks(
&self.results.as_ref()[self.index..],
&mut self.results.as_mut()[self.index..],
&mut dest[read_len..],
);

Expand Down
77 changes: 44 additions & 33 deletions rand_core/src/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,14 @@ pub fn fill_bytes_via_next<R: RngCore + ?Sized>(rng: &mut R, dest: &mut [u8]) {
}

trait Observable: Copy {
type Bytes: AsRef<[u8]>;
fn to_le_bytes(self) -> Self::Bytes;
fn to_le(self) -> Self;

// Contract: observing self is memory-safe (implies no uninitialised padding)
fn as_byte_slice(x: &[Self]) -> &[u8];
}
impl Observable for u32 {
type Bytes = [u8; 4];
fn to_le_bytes(self) -> Self::Bytes {
self.to_le_bytes()
fn to_le(self) -> Self {
self.to_le()
}
fn as_byte_slice(x: &[Self]) -> &[u8] {
let ptr = x.as_ptr() as *const u8;
Expand All @@ -71,9 +69,8 @@ impl Observable for u32 {
}
}
impl Observable for u64 {
type Bytes = [u8; 8];
fn to_le_bytes(self) -> Self::Bytes {
self.to_le_bytes()
fn to_le(self) -> Self {
self.to_le()
}
fn as_byte_slice(x: &[Self]) -> &[u8] {
let ptr = x.as_ptr() as *const u8;
Expand All @@ -82,28 +79,27 @@ impl Observable for u64 {
}
}

fn fill_via_chunks<T: Observable>(src: &[T], dest: &mut [u8]) -> (usize, usize) {
/// Fill dest from src
///
/// Returns `(n, byte_len)`. `src[..n]` is consumed (and possibly mutated),
/// `dest[..byte_len]` is filled. `src[n..]` and `dest[byte_len..]` are left
/// unaltered.
fn fill_via_chunks<T: Observable>(src: &mut [T], dest: &mut [u8]) -> (usize, usize) {
let size = core::mem::size_of::<T>();
let byte_len = min(src.len() * size, dest.len());
let num_chunks = (byte_len + size - 1) / size;

if cfg!(target_endian = "little") {
// On LE we can do a simple copy, which is 25-50% faster:
dest[..byte_len].copy_from_slice(&T::as_byte_slice(&src[..num_chunks])[..byte_len]);
} else {
// This code is valid on all arches, but slower than the above:
let mut i = 0;
let mut iter = dest[..byte_len].chunks_exact_mut(size);
for chunk in &mut iter {
chunk.copy_from_slice(src[i].to_le_bytes().as_ref());
i += 1;
}
let chunk = iter.into_remainder();
if !chunk.is_empty() {
chunk.copy_from_slice(&src[i].to_le_bytes().as_ref()[..chunk.len()]);
// Byte-swap for portability of results. This must happen before copying
// since the size of dest is not guaranteed to be a multiple of T or to be
// sufficiently aligned.
if cfg!(target_endian = "big") {
for x in &mut src[..num_chunks] {
*x = x.to_le();
}
}

dest[..byte_len].copy_from_slice(&T::as_byte_slice(&src[..num_chunks])[..byte_len]);

(num_chunks, byte_len)
}

Expand All @@ -112,6 +108,9 @@ fn fill_via_chunks<T: Observable>(src: &[T], dest: &mut [u8]) -> (usize, usize)
///
/// The return values are `(consumed_u32, filled_u8)`.
///
/// On big-endian systems, endianness of `src[..consumed_u32]` values is
/// swapped. No other adjustments to `src` are made.
///
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
/// the length of `dest`.
/// `consumed_u32` is the number of words consumed from `src`, which is the same
Expand All @@ -137,21 +136,25 @@ fn fill_via_chunks<T: Observable>(src: &[T], dest: &mut [u8]) -> (usize, usize)
/// }
/// }
/// ```
pub fn fill_via_u32_chunks(src: &[u32], dest: &mut [u8]) -> (usize, usize) {
pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) {
fill_via_chunks(src, dest)
}

/// Implement `fill_bytes` by reading chunks from the output buffer of a block
/// based RNG.
///
/// The return values are `(consumed_u64, filled_u8)`.
///
/// On big-endian systems, endianness of `src[..consumed_u64]` values is
/// swapped. No other adjustments to `src` are made.
///
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
/// the length of `dest`.
/// `consumed_u64` is the number of words consumed from `src`, which is the same
/// as `filled_u8 / 8` rounded up.
///
/// See `fill_via_u32_chunks` for an example.
pub fn fill_via_u64_chunks(src: &[u64], dest: &mut [u8]) -> (usize, usize) {
pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) {
fill_via_chunks(src, dest)
}

Expand All @@ -175,33 +178,41 @@ mod test {

#[test]
fn test_fill_via_u32_chunks() {
let src = [1, 2, 3];
let src_orig = [1, 2, 3];

let mut src = src_orig;
let mut dst = [0u8; 11];
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (3, 11));
assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (3, 11));
assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0]);

let mut src = src_orig;
let mut dst = [0u8; 13];
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (3, 12));
assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (3, 12));
assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0]);

let mut src = src_orig;
let mut dst = [0u8; 5];
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (2, 5));
assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (2, 5));
assert_eq!(dst, [1, 0, 0, 0, 2]);
}

#[test]
fn test_fill_via_u64_chunks() {
let src = [1, 2];
let src_orig = [1, 2];

let mut src = src_orig;
let mut dst = [0u8; 11];
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (2, 11));
assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (2, 11));
assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0]);

let mut src = src_orig;
let mut dst = [0u8; 17];
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (2, 16));
assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (2, 16));
assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0]);

let mut src = src_orig;
let mut dst = [0u8; 5];
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (1, 5));
assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (1, 5));
assert_eq!(dst, [1, 0, 0, 0, 0]);
}
}

0 comments on commit 50b9a44

Please sign in to comment.