[WIP] polyval: Initial implementation

Implements POLYVAL using Shay Gueron's techniques for efficient field multiplications using PCLMULQDQ. More information on these techniques here: https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html
RustCrypto · Aug 15, 2019 · 8075ae1 · 8075ae1
1 parent 2fbadfb
commit 8075ae1
Show file tree

Hide file tree

Showing 10 changed files with 669 additions and 2 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,5 +5,5 @@ members = [
     "hmac",
     "pmac",
     "poly1305",
-    "polyval"
+    "polyval",
 ]
diff --git a/polyval/Cargo.toml b/polyval/Cargo.toml
@@ -10,6 +10,15 @@ keywords = ["aes-gcm-siv", "crypto", "ghash", "gcm", "universal-hashing"]
 categories = ["cryptography", "no-std"]
 
 [dependencies]
+byteorder = { version = "1", default-features = false }
+zeroize = { version = "0.9", optional = true, default-features = false }
+
+[dev-dependencies]
+crypto-mac = { version = "0.7", features = ["dev"] }
+hex-literal = "0.2"
+
+[features]
+nightly = []
 
 [badges]
 travis-ci = { repository = "RustCrypto/hashes" }
diff --git a/polyval/benches/polyval.rs b/polyval/benches/polyval.rs
@@ -0,0 +1,89 @@
+#![feature(test)]
+#[macro_use]
+extern crate crypto_mac;
+extern crate polyval;
+
+use crypto_mac::generic_array::{typenum::U16, GenericArray};
+use crypto_mac::MacResult;
+use polyval::{FieldElement, Polyval};
+use std::{cmp::min, convert::TryInto};
+
+bench!(PolyvalMac);
+
+/// POLYVAL isn't a traditional MAC and for that reason doesn't impl the
+/// `crypto_mac::Mac` trait.
+///
+/// This type is a newtype that impls a pseudo-MAC to leverage the benchmark
+/// functionality.
+///
+/// This is just for benchmarking! Don't copy and paste this into your program
+/// unless you really know what you're doing!!!
+#[derive(Clone)]
+struct PolyvalMac {
+    poly: Polyval,
+    leftover: usize,
+    buffer: FieldElement,
+}
+
+impl Mac for PolyvalMac {
+    type OutputSize = U16;
+    type KeySize = U16;
+
+    fn new(key: &GenericArray<u8, Self::KeySize>) -> PolyvalMac {
+        let poly = Polyval::new(key.as_slice().try_into().unwrap());
+
+        PolyvalMac {
+            poly,
+            leftover: 0,
+            buffer: FieldElement::default(),
+        }
+    }
+
+    fn input(&mut self, data: &[u8]) {
+        let mut m = data;
+
+        if self.leftover > 0 {
+            let want = min(16 - self.leftover, m.len());
+
+            for (i, byte) in m.iter().cloned().enumerate().take(want) {
+                self.buffer[self.leftover + i] = byte;
+            }
+
+            m = &m[want..];
+            self.leftover += want;
+
+            if self.leftover < 16 {
+                return;
+            }
+
+            self.block();
+            self.leftover = 0;
+        }
+
+        while m.len() >= 16 {
+            self.block();
+            m = &m[16..];
+        }
+
+        self.buffer[..m.len()].copy_from_slice(m);
+        self.leftover = m.len();
+    }
+
+    fn reset(&mut self) {
+        unimplemented!();
+    }
+
+    fn result(self) -> MacResult<Self::OutputSize> {
+        let mut mac = GenericArray::default();
+        mac.copy_from_slice(&self.poly.result());
+        MacResult::new(mac)
+    }
+}
+
+impl PolyvalMac {
+    /// Input the current internal buffer into POLYVAL
+    fn block(&mut self) {
+        let elem = self.buffer;
+        self.poly.input(&elem)
+    }
+}
diff --git a/polyval/src/field.rs b/polyval/src/field.rs
@@ -0,0 +1,147 @@
+//! Implementation of POLYVAL's finite field.
+//!
+//! From [RFC 8452 Section 3] which defines POLYVAL for use in AES-GCM_SIV:
+//!
+//! > "POLYVAL, like GHASH (the authenticator in AES-GCM; ...), operates in a
+//! > binary field of size 2^128.  The field is defined by the irreducible
+//! > polynomial x^128 + x^127 + x^126 + x^121 + 1."
+//!
+//! This implementation provides multiplication over GF(2^128) optimized using
+//! Shay Gueron's PCLMULQDQ-based techniques.
+//!
+//! For more information on how these techniques work, see:
+//! <https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html>
+//!
+//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
+
+pub mod backend;
+pub mod clmul;
+
+use self::backend::Xmm;
+use super::FIELD_SIZE;
+use byteorder::{ByteOrder, LE};
+use core::ops::{BitXor, Mul};
+
+/// POLYVAL field element.
+#[derive(Copy, Clone)]
+pub struct FieldElement<X: Xmm>(X);
+
+impl<X: Xmm> FieldElement<X> {
+    /// Load a `FieldElement` from its bytestring representation.
+    pub fn from_bytes(bytes: [u8; FIELD_SIZE]) -> Self {
+        let mut u64x2 = [0u64; 2];
+        LE::read_u64_into(&bytes, &mut u64x2);
+        u64x2.into()
+    }
+
+    /// Serialize this `FieldElement` as a bytestring.
+    pub fn to_bytes(self) -> [u8; FIELD_SIZE] {
+        let u64x2: [u64; 2] = self.0.into();
+        let mut result = [0u8; FIELD_SIZE];
+        LE::write_u64_into(&u64x2, &mut result);
+        result
+    }
+}
+
+impl<X: Xmm> Mul for FieldElement<X> {
+    type Output = Self;
+
+    /// Computes POLYVAL multiplication over GF(2^128).
+    ///
+    /// From [RFC 8452 Section 3]:
+    ///
+    /// > "The product of any two elements is calculated using standard
+    /// > (binary) polynomial multiplication followed by reduction modulo the
+    /// > irreducible polynomial."
+    ///
+    /// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
+    fn mul(self, rhs: Self) -> Self {
+        let mut t1 = self.0.clmul(rhs.0, 0x00);
+        let mut t2 = self.0.clmul(rhs.0, 0x01);
+        let mut t3 = self.0.clmul(rhs.0, 0x10);
+        let mut t4 = self.0.clmul(rhs.0, 0x11);
+
+        t2 ^= t3;
+        t3 = t2.shift_right();
+        t2 = t2.shift_left();
+        t1 ^= t3;
+        t4 ^= t2;
+        t4 ^= t1.reduce();
+
+        FieldElement(t4)
+    }
+}
+
+impl<X: Xmm> From<X> for FieldElement<X> {
+    fn from(element: X) -> FieldElement<X> {
+        FieldElement(element)
+    }
+}
+
+impl<X: Xmm> From<[u64; 2]> for FieldElement<X> {
+    fn from(array: [u64; 2]) -> FieldElement<X> {
+        FieldElement(array.into())
+    }
+}
+
+impl<X: Xmm> From<FieldElement<X>> for [u64; 2] {
+    fn from(fe: FieldElement<X>) -> [u64; 2] {
+        fe.0.into()
+    }
+}
+
+impl<X: Xmm> BitXor for FieldElement<X> {
+    type Output = Self;
+
+    fn bitxor(self, rhs: Self) -> Self::Output {
+        FieldElement(self.0 ^ rhs.0)
+    }
+}
+
+#[cfg(test)]
+#[allow(non_snake_case)]
+mod tests {
+    use super::FieldElement;
+    use crate::{field::backend::soft::U64x2, FIELD_SIZE};
+
+    type FE = FieldElement<U64x2>;
+
+    //
+    // Field Operation Examples from RFC 8452 Section 7
+    // <https://tools.ietf.org/html/rfc8452#section-7>
+    //
+
+    const A: [u8; FIELD_SIZE] = hex!("66e94bd4ef8a2c3b884cfa59ca342b2e");
+    const B: [u8; FIELD_SIZE] = hex!("ff000000000000000000000000000000");
+    const A_MUL_B: [u8; FIELD_SIZE] = hex!("37856175e9dc9df26ebc6d6171aa0ae9");
+
+    #[test]
+    fn rfc_8452_test_vector() {
+        let a = FE::from_bytes(A);
+        let b = FE::from_bytes(B);
+        assert_eq!(&A_MUL_B, &(a * b).to_bytes());
+    }
+
+    //
+    // `mulX_POLYVAL` examples from rFC 8452 Appendix A
+    // <https://tools.ietf.org/html/rfc8452#appendix-A>
+    //
+
+    const X: [u8; FIELD_SIZE] = hex!("02000000000000000000000000000000");
+
+    /// Implementation of the `mulX_POLYVAL` function described in Appendix A.
+    fn mulX_polyval(input: [u8; FIELD_SIZE]) -> [u8; FIELD_SIZE] {
+        (FE::from_bytes(input) * FE::from_bytes(X)).to_bytes()
+    }
+
+    #[test]
+    fn rfc_8452_mulx_polyval_vectors() {
+        let one = hex!("01000000000000000000000000000000");
+        let one_result = mulX_polyval(one);
+        assert_eq!(&one_result, &hex!("02000000000000000000000000000000"));
+
+        let another = hex!("9c98c04df9387ded828175a92ba652d8");
+        let another_result = mulX_polyval(another);
+        assert_eq!(&another_result, &hex!("3931819bf271fada0503eb52574ca5f2"));
+    }
+}
diff --git a/polyval/src/field/backend.rs b/polyval/src/field/backend.rs
@@ -0,0 +1,58 @@
+//! Field arithmetic backends
+
+#[cfg(all(
+    target_feature = "pclmulqdq",
+    target_feature = "sse2",
+    target_feature = "sse4.1",
+    any(target_arch = "x86", target_arch = "x86_64")
+))]
+pub mod pclmulqdq;
+pub mod soft;
+
+use super::clmul::Clmul;
+use core::ops::{BitXor, BitXorAssign};
+
+/// Mask value to load into XMM register when performing Montgomery reduction.
+/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
+const MASK: [u64; 2] = [0x1, 0xc200_0000_0000_0000];
+
+/// Trait representing the arithmetic operations we expect on the XMM registers
+pub trait Xmm:
+    BitXor<Output = Self> + BitXorAssign + Clmul + Copy + From<[u64; 2]> + Into<[u64; 2]>
+{
+    /// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
+    /// Algorithm 4: "Montgomery reduction"
+    fn reduce(self) -> Self {
+        let mask = Self::from(MASK);
+        let a = mask.clmul(self, 0x01);
+        let b = self.rotate_left() ^ a;
+        let c = mask.clmul(b, 0x01);
+        b.rotate_left() ^ c
+    }
+
+    /// Rotate the contents of the register left by 64-bits
+    fn rotate_left(self) -> Self {
+        let mut u64x2: [u64; 2] = self.into();
+        let x1 = u128::from(u64x2[0]) | (u128::from(u64x2[1]) << 64);
+        let x2 = x1.rotate_left(64);
+        u64x2[0] = (x2 & 0xFFFF_FFFF) as u64;
+        u64x2[1] = (x2 >> 64) as u64;
+        u64x2.into()
+    }
+
+    /// Shift the contents of the register right by 64-bits
+    fn shift_right(self) -> Self {
+        let mut u64x2: [u64; 2] = self.into();
+        u64x2[1] = u64x2[0];
+        u64x2[0] = 0;
+        u64x2.into()
+    }
+
+    /// Shift the contents of the register left by 64-bits
+    fn shift_left(self) -> Self {
+        let mut u64x2: [u64; 2] = self.into();
+        u64x2[0] = u64x2[1];
+        u64x2[1] = 0;
+        u64x2.into()
+    }
+}