polyval: Initial implementation

Implements POLYVAL using Shay Gueron's techniques for efficient field multiplications using PCLMULQDQ. More information on these techniques here: https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html
RustCrypto · Aug 14, 2019 · 8470b83 · 8470b83
1 parent cf5c0f0
commit 8470b83
Show file tree

Hide file tree

Showing 3 changed files with 278 additions and 1 deletion.
diff --git a/polyval/Cargo.toml b/polyval/Cargo.toml
@@ -10,6 +10,11 @@ keywords = ["aes-gcm-siv", "crypto", "ghash", "gcm", "universal-hashing"]
 categories = ["cryptography", "no-std"]
 
 [dependencies]
+byteorder = { version = "1", default-features = false }
+zeroize = { version = "0.9", optional = true, default-features = false }
+
+[dev-dependencies]
+crypto-mac = { version = "0.7", features = ["dev"] }
 
 [badges]
 travis-ci = { repository = "RustCrypto/hashes" }
diff --git a/polyval/benches/polyval.rs b/polyval/benches/polyval.rs
@@ -0,0 +1,92 @@
+#![feature(test)]
+#[macro_use]
+extern crate crypto_mac;
+extern crate polyval;
+
+use crypto_mac::generic_array::{
+    typenum::U16,
+    GenericArray,
+};
+use crypto_mac::MacResult;
+use polyval::{Polyval, FieldElement};
+use std::{cmp::min, convert::TryInto};
+
+bench!(PolyvalMac);
+
+/// POLYVAL isn't a traditional MAC and for that reason doesn't impl the
+/// `crypto_mac::Mac` trait.
+///
+/// This type is a newtype that impls a pseudo-MAC to leverage the benchmark
+/// functionality.
+///
+/// This is just for benchmarking! Don't copy and paste this into your program
+/// unless you really know what you're doing!!!
+#[derive(Clone)]
+struct PolyvalMac {
+    poly: Polyval,
+    leftover: usize,
+    buffer: FieldElement
+}
+
+impl Mac for PolyvalMac {
+    type OutputSize = U16;
+    type KeySize = U16;
+
+    fn new(key: &GenericArray<u8, Self::KeySize>) -> PolyvalMac {
+        let poly = Polyval::new(key.as_slice().try_into().unwrap());
+
+        PolyvalMac {
+            poly,
+            leftover: 0,
+            buffer: FieldElement::default()
+        }
+    }
+
+    fn input(&mut self, data: &[u8]) {
+        let mut m = data;
+
+        if self.leftover > 0 {
+            let want = min(16 - self.leftover, m.len());
+
+            for (i, byte) in m.iter().cloned().enumerate().take(want) {
+                self.buffer[self.leftover + i] = byte;
+            }
+
+            m = &m[want..];
+            self.leftover += want;
+
+            if self.leftover < 16 {
+                return;
+            }
+
+            self.block();
+            self.leftover = 0;
+        }
+
+        while m.len() >= 16 {
+            self.block();
+            m = &m[16..];
+        }
+
+        self.buffer[..m.len()].copy_from_slice(m);
+        self.leftover = m.len();
+    }
+
+    fn reset(&mut self) {
+        unimplemented!();
+    }
+
+    fn result(self) -> MacResult<Self::OutputSize> {
+        let mut mac = GenericArray::default();
+        mac.copy_from_slice(&self.poly.result());
+        MacResult::new(mac)
+    }
+}
+
+impl PolyvalMac {
+    /// Input the current internal buffer into POLYVAL
+    fn block(&mut self) {
+        let elem = self.buffer;
+        self.poly.input(&elem)
+    }
+}
diff --git a/polyval/src/lib.rs b/polyval/src/lib.rs
@@ -1 +1,181 @@
-//! POLYVAL
+//! **POLYVAL** is a GHASH-like universal hash over GF(2^128) useful for
+//! implementing [AES-GCM-SIV] or [AES-GCM/GMAC].
+//!
+//! From [RFC 8452 Section 3] which defines POLYVAL for use in AES-GCM_SIV:
+//!
+//! > "POLYVAL, like GHASH (the authenticator in AES-GCM; ...), operates in a
+//! > binary field of size 2^128.  The field is defined by the irreducible
+//! > polynomial x^128 + x^127 + x^126 + x^121 + 1."
+//!
+//! By multiplying (in the finite field sense) a sequence of 128-bit blocks of
+//! input data data by a field element `H`, POLYVAL can be used to authenticate
+//! the message sequence as powers (in a finite field sense) of `H`.
+//!
+//! ## Relationship to GHASH
+//!
+//! POLYVAL can be thought of as the little endian equivalent of GHASH, which
+//! affords it a small performance advantage over GHASH when used on little
+//! endian architectures.
+//!
+//! It has also been designed so it can also be used to compute GHASH and with
+//! it GMAC, the Message Authentication Code (MAC) used by AES-GCM.
+//!
+//! From [RFC 8452 Appendix A]:
+//!
+//! > "GHASH and POLYVAL both operate in GF(2^128), although with different
+//! > irreducible polynomials: POLYVAL works modulo x^128 + x^127 + x^126 +
+//! > x^121 + 1 and GHASH works modulo x^128 + x^7 + x^2 + x + 1.  Note
+//! > that these irreducible polynomials are the 'reverse' of each other."
+//!
+//! [AES-GCM-SIV]: https://en.wikipedia.org/wiki/AES-GCM-SIV
+//! [AES-GCM/GMAC]: https://en.wikipedia.org/wiki/Galois/Counter_Mode
+//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
+//! [RFC 8452 Appendix A]: https://tools.ietf.org/html/rfc8452#appendix-A
+
+#![no_std]
+#![doc(html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo_small.png")]
+#![deny(missing_docs)]
+
+extern crate byteorder;
+#[cfg(feature = "zeroize")]
+extern crate zeroize;
+
+use byteorder::{ByteOrder, LE};
+#[cfg(feature = "zeroize")]
+use zeroize::Zeroize;
+
+/// Size of the GF(2^128) field modulus in bytes (16-bytes).
+pub const FIELD_SIZE: usize = 16;
+
+/// Byte array representation of an individual 128-bit field element.
+///
+/// From [RFC 8452 Section 3]:
+///
+/// > "Polynomials in this field are converted to and from 128-bit strings
+/// > by taking the least significant bit of the first byte to be the
+/// > coefficient of x^0, the most significant bit of the first byte to be
+/// > the coefficient of x^7, and so on, until the most significant bit of
+/// > the last byte is the coefficient of x^127."
+///
+/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
+pub type FieldElement = [u8; FIELD_SIZE];
+
+/// **POLYVAL**: GHASH-like universal hash over GF(2^128).
+#[repr(align(16))]
+#[derive(Clone)]
+#[allow(non_snake_case)]
+pub struct Polyval {
+    /// GF(2^128) field element input blocks are multiplied by
+    H: u128,
+
+    /// Field element representing the computed universal hash
+    S: u128,
+}
+
+impl Polyval {
+    /// Initialize POLYVAL with the given `H` field element
+    pub fn new(h: &FieldElement) -> Self {
+        Self {
+            H: LE::read_u128(h),
+            S: 0,
+        }
+    }
+
+    /// Input a field element `X` to be authenticated into POLYVAL.
+    pub fn input(&mut self, x: &FieldElement) {
+        // "The sum of any two elements in the field is the result of XORing them."
+        // -- RFC 8452 Section 3
+        let x = self.S ^ LE::read_u128(x);
+        self.S = gfmul(x, self.H);
+    }
+
+    /// Get POLYVAL result (i.e. computed `S` field element)
+    pub fn result(self) -> FieldElement {
+        let mut output = FieldElement::default();
+        LE::write_u128(&mut output, self.S);
+        output
+    }
+}
+
+#[cfg(feature = "zeroize")]
+impl Drop for Polyval {
+    fn drop(&mut self) {
+        self.H.zeroize();
+        self.S.zeroize();
+        self.buffer.zeroize();
+    }
+}
+
+/// Computes multiplication over POLYVAL's finite field using Shay Gueron's
+/// PCLMULQDQ-based optimization techniques.
+///
+/// From [RFC 8032 Section 3]:
+///
+/// > "The product of any two elements is calculated using standard
+/// > (binary) polynomial multiplication followed by reduction modulo the
+/// > irreducible polynomial."
+///
+/// For more information on the techniques used in this implementation, see:
+/// <https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html>
+///
+/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
+fn gfmul(a: u128, b: u128) -> u128 {
+    let t1 = pclmulqdq(a, b, 0x00);
+    let t2 = pclmulqdq(a, b, 0x01);
+    let t3 = pclmulqdq(a, b, 0x10);
+    let t4 = pclmulqdq(a, b, 0x11);
+    let t5 = t2 ^ t3;
+    let t6 = t1 ^ ((t5 & 0xffff_ffff) << 64);
+    t4 ^ (t5 >> 64) ^ reduce(t6)
+}
+
+/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
+/// Algorithm 4: "Montgomery reduction"
+///
+/// <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
+#[allow(clippy::let_and_return, clippy::many_single_char_names)] // Quiet clippy, we're mathing!
+fn reduce(x: u128) -> u128 {
+    let mask = 0xc200_0000_0000_0000;
+    let a = pclmulqdq(mask, x, 0x01);
+    let b = x.rotate_left(64) ^ a;
+    let c = pclmulqdq(mask, b, 0x01);
+    let d = b.rotate_left(64) ^ c;
+    d
+}
+
+/// Software reimplementation of the PCLMULQDQ CPU instruction
+// TODO(tarcieri): support for CLMUL intrinsics
+// Background: <https://github.com/rust-lang/stdarch/issues/318>
+fn pclmulqdq(a: u128, b: u128, imm: u8) -> u128 {
+    match imm {
+        0x00 => clmul((a & 0xffff_ffff) as u64, (b & 0xffff_ffff) as u64),
+        0x01 => clmul(((a >> 64) & 0xffff_ffff) as u64, (b & 0xffff_ffff) as u64),
+        0x10 => clmul((a & 0xffff_ffff) as u64, ((b >> 64) & 0xffff_ffff) as u64),
+        0x11 => clmul(
+            ((a >> 64) & 0xffff_ffff) as u64,
+            ((b >> 64) & 0xffff_ffff) as u64,
+        ),
+        _ => panic!("invalid immediate byte value: 0x{:02x}", imm),
+    }
+}
+
+/// Carryless multiplication
+fn clmul(a: u64, b: u64) -> u128 {
+    let mut r = [0u64; 2];
+
+    for i in 0..64 {
+        if b & 1 << i != 0 {
+            r[1] ^= a;
+        }
+
+        r[0] >>= 1;
+
+        if r[1] & 1 != 0 {
+            r[0] ^= 1 << 63;
+        }
+
+        r[1] >>= 1;
+    }
+
+    (u128::from(r[0]) << 64) | u128::from(r[1])
+}