From 3167e86d1afd6dc52da1ada3596a12eeb6d96b52 Mon Sep 17 00:00:00 2001 From: Jonathan S Date: Sun, 23 Mar 2014 16:29:43 -0500 Subject: [PATCH 1/5] Optimized SipHash implementation bench_compound_1: 70 -> 56 bench_long_str: 795 -> 525 bench_str: 32 -> 32 --- src/libstd/hash/sip.rs | 138 ++++++++++++++++++++++++++++++++++------- 1 file changed, 115 insertions(+), 23 deletions(-) diff --git a/src/libstd/hash/sip.rs b/src/libstd/hash/sip.rs index d448f4eeb37f6..be36076f9d456 100644 --- a/src/libstd/hash/sip.rs +++ b/src/libstd/hash/sip.rs @@ -24,11 +24,13 @@ * discouraged. */ +use cast::transmute; use clone::Clone; use container::Container; use default::Default; use io::{IoResult, Writer}; use iter::Iterator; +use mem::to_le64; use result::Ok; use slice::ImmutableVector; @@ -43,7 +45,7 @@ pub struct SipState { priv v1: u64, priv v2: u64, priv v3: u64, - priv tail: [u8, ..8], // unprocessed bytes + priv tail: u64, // unprocessed bytes, stored in little endian format priv ntail: uint, // how many bytes in tail are valid } @@ -53,14 +55,7 @@ pub struct SipState { macro_rules! u8to64_le ( ($buf:expr, $i:expr) => - ($buf[0+$i] as u64 | - $buf[1+$i] as u64 << 8 | - $buf[2+$i] as u64 << 16 | - $buf[3+$i] as u64 << 24 | - $buf[4+$i] as u64 << 32 | - $buf[5+$i] as u64 << 40 | - $buf[6+$i] as u64 << 48 | - $buf[7+$i] as u64 << 56) + (unsafe { to_le64(*transmute::<*u8, *u64>($buf.slice_from($i).as_ptr()) as i64) as u64 }) ) macro_rules! rotl ( @@ -98,7 +93,7 @@ impl SipState { v1: 0, v2: 0, v3: 0, - tail: [ 0, 0, 0, 0, 0, 0, 0, 0 ], + tail: 0, ntail: 0, }; state.reset(); @@ -114,6 +109,7 @@ impl SipState { self.v2 = self.k0 ^ 0x6c7967656e657261; self.v3 = self.k1 ^ 0x7465646279746573; self.ntail = 0; + self.tail = 0; } /// Return the computed hash. @@ -124,15 +120,7 @@ impl SipState { let mut v2 = self.v2; let mut v3 = self.v3; - let mut b : u64 = (self.length as u64 & 0xff) << 56; - - if self.ntail > 0 { b |= self.tail[0] as u64 << 0; } - if self.ntail > 1 { b |= self.tail[1] as u64 << 8; } - if self.ntail > 2 { b |= self.tail[2] as u64 << 16; } - if self.ntail > 3 { b |= self.tail[3] as u64 << 24; } - if self.ntail > 4 { b |= self.tail[4] as u64 << 32; } - if self.ntail > 5 { b |= self.tail[5] as u64 << 40; } - if self.ntail > 6 { b |= self.tail[6] as u64 << 48; } + let b : u64 = ((self.length as u64 & 0xff) << 56) | self.tail; v3 ^= b; compress!(v0, v1, v2, v3); @@ -163,7 +151,7 @@ impl Writer for SipState { if length < needed { let mut t = 0; while t < length { - self.tail[self.ntail+t] = msg[t]; + self.tail |= msg[t] as u64 << 8*(self.ntail+t); t += 1; } self.ntail += length; @@ -172,11 +160,11 @@ impl Writer for SipState { let mut t = 0; while t < needed { - self.tail[self.ntail+t] = msg[t]; + self.tail |= msg[t] as u64 << 8*(self.ntail+t); t += 1; } - let m = u8to64_le!(self.tail, 0); + let m = self.tail; self.v3 ^= m; compress!(self.v0, self.v1, self.v2, self.v3); @@ -184,6 +172,7 @@ impl Writer for SipState { self.v0 ^= m; self.ntail = 0; + self.tail = 0; } // Buffered tail is now flushed, process new input. @@ -205,13 +194,101 @@ impl Writer for SipState { let mut t = 0u; while t < left { - self.tail[t] = msg[i+t]; + self.tail |= msg[i+t] as u64 << 8*t; t += 1 } self.ntail = left; Ok(()) } + + #[inline] + fn write_u8(&mut self, n: u8) -> IoResult<()> { + self.tail |= n as u64 << 8*self.ntail; + self.ntail += 1; + + if self.ntail == 8 { + let m = self.tail; + + self.v3 ^= m; + compress!(self.v0, self.v1, self.v2, self.v3); + compress!(self.v0, self.v1, self.v2, self.v3); + self.v0 ^= m; + + self.tail = 0; + self.ntail = 0; + } + + self.length += 1; + + Ok(()) + } + + #[inline] + fn write_le_u16(&mut self, n: u16) -> IoResult<()> { + self.tail |= n as u64 << 8*self.ntail; + self.ntail += 2; + + if self.ntail >= 8 { + let m = self.tail; + + self.v3 ^= m; + compress!(self.v0, self.v1, self.v2, self.v3); + compress!(self.v0, self.v1, self.v2, self.v3); + self.v0 ^= m; + + self.tail = n as u64 >> 64 - 8*self.ntail; + self.ntail -= 8; + } + + self.length += 2; + + Ok(()) + } + + #[inline] + fn write_le_u32(&mut self, n: u32) -> IoResult<()> { + self.tail |= n as u64 << 8*self.ntail; + self.ntail += 4; + + if self.ntail >= 8 { + let m = self.tail; + + self.v3 ^= m; + compress!(self.v0, self.v1, self.v2, self.v3); + compress!(self.v0, self.v1, self.v2, self.v3); + self.v0 ^= m; + + self.tail = n as u64 >> 64 - 8*self.ntail; + self.ntail -= 8; + } + + self.length += 4; + + Ok(()) + } + + #[inline] + fn write_le_u64(&mut self, n: u64) -> IoResult<()> { + self.tail |= n << 8*self.ntail; + + let m = self.tail; + + self.v3 ^= m; + compress!(self.v0, self.v1, self.v2, self.v3); + compress!(self.v0, self.v1, self.v2, self.v3); + self.v0 ^= m; + + if self.ntail == 0 { + self.tail = 0; + } else { + self.tail = n >> 64 - 8*self.ntail; + } + + self.length += 8; + + Ok(()) + } } impl Clone for SipState { @@ -287,8 +364,10 @@ pub fn hash_with_keys>(k0: u64, k1: u64, value: &T) -> u64 { #[cfg(test)] mod tests { extern crate test; + use cast::transmute; use io::Writer; use iter::Iterator; + use mem::to_le64; use num::ToStrRadix; use option::{Some, None}; use str::{Str, OwnedStr}; @@ -523,6 +602,19 @@ mod tests { }) } + #[bench] + fn bench_long_str(bh: &mut BenchHarness) { + let s = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor \ + incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud \ + exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute \ + irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \ + pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui \ + officia deserunt mollit anim id est laborum."; + bh.iter(|| { + assert_eq!(hash(&s), 17717065544121360093); + }) + } + struct Compound { x: u8, y: u16, From aa618541b89e2b3185b7faef4c9ab7500bd77e08 Mon Sep 17 00:00:00 2001 From: Jonathan S Date: Fri, 28 Mar 2014 18:01:57 -0500 Subject: [PATCH 2/5] Reverted change to u8to64_le due to it being invalid on unaligned architectures --- src/libstd/hash/sip.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/libstd/hash/sip.rs b/src/libstd/hash/sip.rs index be36076f9d456..169b6ad98657a 100644 --- a/src/libstd/hash/sip.rs +++ b/src/libstd/hash/sip.rs @@ -55,7 +55,14 @@ pub struct SipState { macro_rules! u8to64_le ( ($buf:expr, $i:expr) => - (unsafe { to_le64(*transmute::<*u8, *u64>($buf.slice_from($i).as_ptr()) as i64) as u64 }) + ($buf[0+$i] as u64 | + $buf[1+$i] as u64 << 8 | + $buf[2+$i] as u64 << 16 | + $buf[3+$i] as u64 << 24 | + $buf[4+$i] as u64 << 32 | + $buf[5+$i] as u64 << 40 | + $buf[6+$i] as u64 << 48 | + $buf[7+$i] as u64 << 56) ) macro_rules! rotl ( From 5bfe692fb5addcbbf36ddf858cb8386ccc5d00cf Mon Sep 17 00:00:00 2001 From: Jonathan S Date: Fri, 28 Mar 2014 22:04:31 -0500 Subject: [PATCH 3/5] Switched to a macro for the write_le functions --- src/libstd/hash/sip.rs | 109 +++++++++++++---------------------------- 1 file changed, 33 insertions(+), 76 deletions(-) diff --git a/src/libstd/hash/sip.rs b/src/libstd/hash/sip.rs index 169b6ad98657a..1894eaceb1c45 100644 --- a/src/libstd/hash/sip.rs +++ b/src/libstd/hash/sip.rs @@ -24,13 +24,12 @@ * discouraged. */ -use cast::transmute; use clone::Clone; use container::Container; use default::Default; use io::{IoResult, Writer}; use iter::Iterator; -use mem::to_le64; +use mem::size_of_val; use result::Ok; use slice::ImmutableVector; @@ -82,6 +81,34 @@ macro_rules! compress ( }) ) +macro_rules! make_write_le ( + () => + ({ + self.tail |= n as u64 << 8*self.ntail; + self.ntail += size_of_val(&n); + + if self.ntail >= 8 { + let m = self.tail; + + self.v3 ^= m; + compress!(self.v0, self.v1, self.v2, self.v3); + compress!(self.v0, self.v1, self.v2, self.v3); + self.v0 ^= m; + + self.ntail -= 8; + if self.ntail == 0 { + self.tail = 0; + } else { + self.tail = n as u64 >> 64 - 8*self.ntail; + } + } + + self.length += size_of_val(&n); + + Ok(()) + }) +) + impl SipState { /// Create a `SipState` that is keyed off the provided keys. #[inline] @@ -211,90 +238,22 @@ impl Writer for SipState { #[inline] fn write_u8(&mut self, n: u8) -> IoResult<()> { - self.tail |= n as u64 << 8*self.ntail; - self.ntail += 1; - - if self.ntail == 8 { - let m = self.tail; - - self.v3 ^= m; - compress!(self.v0, self.v1, self.v2, self.v3); - compress!(self.v0, self.v1, self.v2, self.v3); - self.v0 ^= m; - - self.tail = 0; - self.ntail = 0; - } - - self.length += 1; - - Ok(()) + make_write_le!() } #[inline] fn write_le_u16(&mut self, n: u16) -> IoResult<()> { - self.tail |= n as u64 << 8*self.ntail; - self.ntail += 2; - - if self.ntail >= 8 { - let m = self.tail; - - self.v3 ^= m; - compress!(self.v0, self.v1, self.v2, self.v3); - compress!(self.v0, self.v1, self.v2, self.v3); - self.v0 ^= m; - - self.tail = n as u64 >> 64 - 8*self.ntail; - self.ntail -= 8; - } - - self.length += 2; - - Ok(()) + make_write_le!() } #[inline] fn write_le_u32(&mut self, n: u32) -> IoResult<()> { - self.tail |= n as u64 << 8*self.ntail; - self.ntail += 4; - - if self.ntail >= 8 { - let m = self.tail; - - self.v3 ^= m; - compress!(self.v0, self.v1, self.v2, self.v3); - compress!(self.v0, self.v1, self.v2, self.v3); - self.v0 ^= m; - - self.tail = n as u64 >> 64 - 8*self.ntail; - self.ntail -= 8; - } - - self.length += 4; - - Ok(()) + make_write_le!() } #[inline] fn write_le_u64(&mut self, n: u64) -> IoResult<()> { - self.tail |= n << 8*self.ntail; - - let m = self.tail; - - self.v3 ^= m; - compress!(self.v0, self.v1, self.v2, self.v3); - compress!(self.v0, self.v1, self.v2, self.v3); - self.v0 ^= m; - - if self.ntail == 0 { - self.tail = 0; - } else { - self.tail = n >> 64 - 8*self.ntail; - } - - self.length += 8; - - Ok(()) + make_write_le!() } } @@ -371,10 +330,8 @@ pub fn hash_with_keys>(k0: u64, k1: u64, value: &T) -> u64 { #[cfg(test)] mod tests { extern crate test; - use cast::transmute; use io::Writer; use iter::Iterator; - use mem::to_le64; use num::ToStrRadix; use option::{Some, None}; use str::{Str, OwnedStr}; From 7a99a1bda97ece80053942252311716e438e90e3 Mon Sep 17 00:00:00 2001 From: Jonathan S Date: Sun, 30 Mar 2014 12:30:44 -0500 Subject: [PATCH 4/5] Added u64_from_le_bytes to io::extensions and used it in the SipHash implementation to get the same benifits as the previous try using unsafe code. --- src/libstd/hash/sip.rs | 11 ++----- src/libstd/io/extensions.rs | 60 +++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/src/libstd/hash/sip.rs b/src/libstd/hash/sip.rs index 1894eaceb1c45..fc72617c3d440 100644 --- a/src/libstd/hash/sip.rs +++ b/src/libstd/hash/sip.rs @@ -28,6 +28,7 @@ use clone::Clone; use container::Container; use default::Default; use io::{IoResult, Writer}; +use io::extensions::u64_from_le_bytes; use iter::Iterator; use mem::size_of_val; use result::Ok; @@ -54,14 +55,7 @@ pub struct SipState { macro_rules! u8to64_le ( ($buf:expr, $i:expr) => - ($buf[0+$i] as u64 | - $buf[1+$i] as u64 << 8 | - $buf[2+$i] as u64 << 16 | - $buf[3+$i] as u64 << 24 | - $buf[4+$i] as u64 << 32 | - $buf[5+$i] as u64 << 40 | - $buf[6+$i] as u64 << 48 | - $buf[7+$i] as u64 << 56) + (u64_from_le_bytes($buf, $i, 8)) ) macro_rules! rotl ( @@ -331,6 +325,7 @@ pub fn hash_with_keys>(k0: u64, k1: u64, value: &T) -> u64 { mod tests { extern crate test; use io::Writer; + use io::extensions::u64_from_le_bytes; use iter::Iterator; use num::ToStrRadix; use option::{Some, None}; diff --git a/src/libstd/io/extensions.rs b/src/libstd/io/extensions.rs index b9e933d0b14dd..bcfb5c687a512 100644 --- a/src/libstd/io/extensions.rs +++ b/src/libstd/io/extensions.rs @@ -170,6 +170,37 @@ pub fn u64_from_be_bytes(data: &[u8], start: uint, size: uint) -> u64 { } } +/// Extracts an 8-bit to 64-bit unsigned little-endian value from the given byte +/// buffer and returns it as a 64-bit value. +/// +/// Arguments: +/// +/// * `data`: The buffer in which to extract the value. +/// * `start`: The offset at which to extract the value. +/// * `size`: The size of the value in bytes to extract. This must be 8 or +/// less, or task failure occurs. If this is less than 8, then only +/// that many bytes are parsed. For example, if `size` is 4, then a +/// 32-bit value is parsed. +pub fn u64_from_le_bytes(data: &[u8], start: uint, size: uint) -> u64 { + use ptr::{copy_nonoverlapping_memory}; + use mem::from_le64; + use slice::MutableVector; + + assert!(size <= 8u); + + if data.len() - start < size { + fail!("index out of bounds"); + } + + let mut buf = [0u8, ..8]; + unsafe { + let ptr = data.as_ptr().offset(start as int); + let out = buf.as_mut_ptr(); + copy_nonoverlapping_memory(out, ptr, size); + from_le64(*(out as *i64)) as u64 + } +} + #[cfg(test)] mod test { use prelude::*; @@ -498,6 +529,35 @@ mod test { assert_eq!(u64_from_be_bytes(buf, 1, 7), 0x02030405060708); assert_eq!(u64_from_be_bytes(buf, 1, 8), 0x0203040506070809); } + + #[test] + fn test_u64_from_le_bytes() { + use super::u64_from_le_bytes; + + let buf = [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]; + + // Aligned access + assert_eq!(u64_from_le_bytes(buf, 0, 0), 0); + assert_eq!(u64_from_le_bytes(buf, 0, 1), 0x01); + assert_eq!(u64_from_le_bytes(buf, 0, 2), 0x0201); + assert_eq!(u64_from_le_bytes(buf, 0, 3), 0x030201); + assert_eq!(u64_from_le_bytes(buf, 0, 4), 0x04030201); + assert_eq!(u64_from_le_bytes(buf, 0, 5), 0x0504030201); + assert_eq!(u64_from_le_bytes(buf, 0, 6), 0x060504030201); + assert_eq!(u64_from_le_bytes(buf, 0, 7), 0x07060504030201); + assert_eq!(u64_from_le_bytes(buf, 0, 8), 0x0807060504030201); + + // Unaligned access + assert_eq!(u64_from_le_bytes(buf, 1, 0), 0); + assert_eq!(u64_from_le_bytes(buf, 1, 1), 0x02); + assert_eq!(u64_from_le_bytes(buf, 1, 2), 0x0302); + assert_eq!(u64_from_le_bytes(buf, 1, 3), 0x040302); + assert_eq!(u64_from_le_bytes(buf, 1, 4), 0x05040302); + assert_eq!(u64_from_le_bytes(buf, 1, 5), 0x0605040302); + assert_eq!(u64_from_le_bytes(buf, 1, 6), 0x070605040302); + assert_eq!(u64_from_le_bytes(buf, 1, 7), 0x08070605040302); + assert_eq!(u64_from_le_bytes(buf, 1, 8), 0x0908070605040302); + } } #[cfg(test)] From 5dba2cc9aae690835dd56e1d0167cb0510ac4b69 Mon Sep 17 00:00:00 2001 From: Jonathan S Date: Sun, 30 Mar 2014 13:57:21 -0500 Subject: [PATCH 5/5] Switched wholly over to u64_from_le_bytes --- src/libstd/hash/sip.rs | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/src/libstd/hash/sip.rs b/src/libstd/hash/sip.rs index fc72617c3d440..a408089127c4f 100644 --- a/src/libstd/hash/sip.rs +++ b/src/libstd/hash/sip.rs @@ -53,11 +53,6 @@ pub struct SipState { // because they're needed in the following defs; // this design could be improved. -macro_rules! u8to64_le ( - ($buf:expr, $i:expr) => - (u64_from_le_bytes($buf, $i, 8)) -) - macro_rules! rotl ( ($x:expr, $b:expr) => (($x << $b) | ($x >> (64 - $b))) @@ -177,22 +172,12 @@ impl Writer for SipState { needed = 8 - self.ntail; if length < needed { - let mut t = 0; - while t < length { - self.tail |= msg[t] as u64 << 8*(self.ntail+t); - t += 1; - } + self.tail |= u64_from_le_bytes(msg, 0, length) << 8*self.ntail; self.ntail += length; return Ok(()); } - let mut t = 0; - while t < needed { - self.tail |= msg[t] as u64 << 8*(self.ntail+t); - t += 1; - } - - let m = self.tail; + let m = self.tail | u64_from_le_bytes(msg, 0, needed) << 8*self.ntail; self.v3 ^= m; compress!(self.v0, self.v1, self.v2, self.v3); @@ -210,7 +195,7 @@ impl Writer for SipState { let mut i = needed; while i < end { - let mi = u8to64_le!(msg, i); + let mi = u64_from_le_bytes(msg, i, 8); self.v3 ^= mi; compress!(self.v0, self.v1, self.v2, self.v3); @@ -220,16 +205,16 @@ impl Writer for SipState { i += 8; } - let mut t = 0u; - while t < left { - self.tail |= msg[i+t] as u64 << 8*t; - t += 1 - } + self.tail = u64_from_le_bytes(msg, i, left); self.ntail = left; Ok(()) } + // We override these functions because by default, they convert `n` to bytes (possibly with an + // expensive byte swap) then convert those bytes back into a `u64` (possibly with another byte swap which + // reverses the first). Additionally, in these cases, we know that at most one flush will be + // needed, and so can optimize appropriately. #[inline] fn write_u8(&mut self, n: u8) -> IoResult<()> { make_write_le!() @@ -455,7 +440,7 @@ mod tests { while t < 64 { debug!("siphash test {}", t); - let vec = u8to64_le!(vecs[t], 0); + let vec = u64_from_le_bytes(vecs[t], 0, 8); let out = hash_with_keys(k0, k1, &Bytes(buf.as_slice())); debug!("got {:?}, expected {:?}", out, vec); assert_eq!(vec, out);