From 50b9a447410860af8d6db9a208c3576886955874 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Wed, 7 Dec 2022 09:47:45 +0000 Subject: [PATCH] fill_via_chunks: mutate src on BE (small optimisation) (#1182) * fill_via_chunks: mutate src on BE (small optimisation) * Add doc to fill_via_chunks --- rand_core/src/block.rs | 4 +-- rand_core/src/impls.rs | 77 ++++++++++++++++++++++++------------------ 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/rand_core/src/block.rs b/rand_core/src/block.rs index 5a986895484..f813784ff2f 100644 --- a/rand_core/src/block.rs +++ b/rand_core/src/block.rs @@ -223,7 +223,7 @@ impl> RngCore for BlockRng { self.generate_and_set(0); } let (consumed_u32, filled_u8) = - fill_via_u32_chunks(&self.results.as_ref()[self.index..], &mut dest[read_len..]); + fill_via_u32_chunks(&mut self.results.as_mut()[self.index..], &mut dest[read_len..]); self.index += consumed_u32; read_len += filled_u8; @@ -387,7 +387,7 @@ impl> RngCore for BlockRng64 { } let (consumed_u64, filled_u8) = fill_via_u64_chunks( - &self.results.as_ref()[self.index..], + &mut self.results.as_mut()[self.index..], &mut dest[read_len..], ); diff --git a/rand_core/src/impls.rs b/rand_core/src/impls.rs index 4b7688c5c80..8f99ef813a5 100644 --- a/rand_core/src/impls.rs +++ b/rand_core/src/impls.rs @@ -53,16 +53,14 @@ pub fn fill_bytes_via_next(rng: &mut R, dest: &mut [u8]) { } trait Observable: Copy { - type Bytes: AsRef<[u8]>; - fn to_le_bytes(self) -> Self::Bytes; + fn to_le(self) -> Self; // Contract: observing self is memory-safe (implies no uninitialised padding) fn as_byte_slice(x: &[Self]) -> &[u8]; } impl Observable for u32 { - type Bytes = [u8; 4]; - fn to_le_bytes(self) -> Self::Bytes { - self.to_le_bytes() + fn to_le(self) -> Self { + self.to_le() } fn as_byte_slice(x: &[Self]) -> &[u8] { let ptr = x.as_ptr() as *const u8; @@ -71,9 +69,8 @@ impl Observable for u32 { } } impl Observable for u64 { - type Bytes = [u8; 8]; - fn to_le_bytes(self) -> Self::Bytes { - self.to_le_bytes() + fn to_le(self) -> Self { + self.to_le() } fn as_byte_slice(x: &[Self]) -> &[u8] { let ptr = x.as_ptr() as *const u8; @@ -82,28 +79,27 @@ impl Observable for u64 { } } -fn fill_via_chunks(src: &[T], dest: &mut [u8]) -> (usize, usize) { +/// Fill dest from src +/// +/// Returns `(n, byte_len)`. `src[..n]` is consumed (and possibly mutated), +/// `dest[..byte_len]` is filled. `src[n..]` and `dest[byte_len..]` are left +/// unaltered. +fn fill_via_chunks(src: &mut [T], dest: &mut [u8]) -> (usize, usize) { let size = core::mem::size_of::(); let byte_len = min(src.len() * size, dest.len()); let num_chunks = (byte_len + size - 1) / size; - if cfg!(target_endian = "little") { - // On LE we can do a simple copy, which is 25-50% faster: - dest[..byte_len].copy_from_slice(&T::as_byte_slice(&src[..num_chunks])[..byte_len]); - } else { - // This code is valid on all arches, but slower than the above: - let mut i = 0; - let mut iter = dest[..byte_len].chunks_exact_mut(size); - for chunk in &mut iter { - chunk.copy_from_slice(src[i].to_le_bytes().as_ref()); - i += 1; - } - let chunk = iter.into_remainder(); - if !chunk.is_empty() { - chunk.copy_from_slice(&src[i].to_le_bytes().as_ref()[..chunk.len()]); + // Byte-swap for portability of results. This must happen before copying + // since the size of dest is not guaranteed to be a multiple of T or to be + // sufficiently aligned. + if cfg!(target_endian = "big") { + for x in &mut src[..num_chunks] { + *x = x.to_le(); } } + dest[..byte_len].copy_from_slice(&T::as_byte_slice(&src[..num_chunks])[..byte_len]); + (num_chunks, byte_len) } @@ -112,6 +108,9 @@ fn fill_via_chunks(src: &[T], dest: &mut [u8]) -> (usize, usize) /// /// The return values are `(consumed_u32, filled_u8)`. /// +/// On big-endian systems, endianness of `src[..consumed_u32]` values is +/// swapped. No other adjustments to `src` are made. +/// /// `filled_u8` is the number of filled bytes in `dest`, which may be less than /// the length of `dest`. /// `consumed_u32` is the number of words consumed from `src`, which is the same @@ -137,7 +136,7 @@ fn fill_via_chunks(src: &[T], dest: &mut [u8]) -> (usize, usize) /// } /// } /// ``` -pub fn fill_via_u32_chunks(src: &[u32], dest: &mut [u8]) -> (usize, usize) { +pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) { fill_via_chunks(src, dest) } @@ -145,13 +144,17 @@ pub fn fill_via_u32_chunks(src: &[u32], dest: &mut [u8]) -> (usize, usize) { /// based RNG. /// /// The return values are `(consumed_u64, filled_u8)`. +/// +/// On big-endian systems, endianness of `src[..consumed_u64]` values is +/// swapped. No other adjustments to `src` are made. +/// /// `filled_u8` is the number of filled bytes in `dest`, which may be less than /// the length of `dest`. /// `consumed_u64` is the number of words consumed from `src`, which is the same /// as `filled_u8 / 8` rounded up. /// /// See `fill_via_u32_chunks` for an example. -pub fn fill_via_u64_chunks(src: &[u64], dest: &mut [u8]) -> (usize, usize) { +pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) { fill_via_chunks(src, dest) } @@ -175,33 +178,41 @@ mod test { #[test] fn test_fill_via_u32_chunks() { - let src = [1, 2, 3]; + let src_orig = [1, 2, 3]; + + let mut src = src_orig; let mut dst = [0u8; 11]; - assert_eq!(fill_via_u32_chunks(&src, &mut dst), (3, 11)); + assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (3, 11)); assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0]); + let mut src = src_orig; let mut dst = [0u8; 13]; - assert_eq!(fill_via_u32_chunks(&src, &mut dst), (3, 12)); + assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (3, 12)); assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0]); + let mut src = src_orig; let mut dst = [0u8; 5]; - assert_eq!(fill_via_u32_chunks(&src, &mut dst), (2, 5)); + assert_eq!(fill_via_u32_chunks(&mut src, &mut dst), (2, 5)); assert_eq!(dst, [1, 0, 0, 0, 2]); } #[test] fn test_fill_via_u64_chunks() { - let src = [1, 2]; + let src_orig = [1, 2]; + + let mut src = src_orig; let mut dst = [0u8; 11]; - assert_eq!(fill_via_u64_chunks(&src, &mut dst), (2, 11)); + assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (2, 11)); assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0]); + let mut src = src_orig; let mut dst = [0u8; 17]; - assert_eq!(fill_via_u64_chunks(&src, &mut dst), (2, 16)); + assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (2, 16)); assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0]); + let mut src = src_orig; let mut dst = [0u8; 5]; - assert_eq!(fill_via_u64_chunks(&src, &mut dst), (1, 5)); + assert_eq!(fill_via_u64_chunks(&mut src, &mut dst), (1, 5)); assert_eq!(dst, [1, 0, 0, 0, 0]); } }