diff --git a/polyval/Cargo.toml b/polyval/Cargo.toml index a0fc47d..75693ab 100644 --- a/polyval/Cargo.toml +++ b/polyval/Cargo.toml @@ -10,6 +10,12 @@ keywords = ["aes-gcm-siv", "crypto", "ghash", "gcm", "universal-hashing"] categories = ["cryptography", "no-std"] [dependencies] +byteorder = { version = "1", default-features = false } +zeroize = { version = "0.9", optional = true, default-features = false } + +[dev-dependencies] +crypto-mac = { version = "0.7", features = ["dev"] } +hex-literal = "0.2" [badges] travis-ci = { repository = "RustCrypto/hashes" } diff --git a/polyval/benches/polyval.rs b/polyval/benches/polyval.rs new file mode 100644 index 0000000..a3cba0a --- /dev/null +++ b/polyval/benches/polyval.rs @@ -0,0 +1,89 @@ +#![feature(test)] +#[macro_use] +extern crate crypto_mac; +extern crate polyval; + +use crypto_mac::generic_array::{typenum::U16, GenericArray}; +use crypto_mac::MacResult; +use polyval::{FieldElement, Polyval}; +use std::{cmp::min, convert::TryInto}; + +bench!(PolyvalMac); + +/// POLYVAL isn't a traditional MAC and for that reason doesn't impl the +/// `crypto_mac::Mac` trait. +/// +/// This type is a newtype that impls a pseudo-MAC to leverage the benchmark +/// functionality. +/// +/// This is just for benchmarking! Don't copy and paste this into your program +/// unless you really know what you're doing!!! +#[derive(Clone)] +struct PolyvalMac { + poly: Polyval, + leftover: usize, + buffer: FieldElement, +} + +impl Mac for PolyvalMac { + type OutputSize = U16; + type KeySize = U16; + + fn new(key: &GenericArray) -> PolyvalMac { + let poly = Polyval::new(key.as_slice().try_into().unwrap()); + + PolyvalMac { + poly, + leftover: 0, + buffer: FieldElement::default(), + } + } + + fn input(&mut self, data: &[u8]) { + let mut m = data; + + if self.leftover > 0 { + let want = min(16 - self.leftover, m.len()); + + for (i, byte) in m.iter().cloned().enumerate().take(want) { + self.buffer[self.leftover + i] = byte; + } + + m = &m[want..]; + self.leftover += want; + + if self.leftover < 16 { + return; + } + + self.block(); + self.leftover = 0; + } + + while m.len() >= 16 { + self.block(); + m = &m[16..]; + } + + self.buffer[..m.len()].copy_from_slice(m); + self.leftover = m.len(); + } + + fn reset(&mut self) { + unimplemented!(); + } + + fn result(self) -> MacResult { + let mut mac = GenericArray::default(); + mac.copy_from_slice(&self.poly.result()); + MacResult::new(mac) + } +} + +impl PolyvalMac { + /// Input the current internal buffer into POLYVAL + fn block(&mut self) { + let elem = self.buffer; + self.poly.input(&elem) + } +} diff --git a/polyval/src/lib.rs b/polyval/src/lib.rs index 1b3bed6..ad6f9cc 100644 --- a/polyval/src/lib.rs +++ b/polyval/src/lib.rs @@ -1 +1,187 @@ -//! POLYVAL +//! **POLYVAL** is a GHASH-like universal hash over GF(2^128) useful for +//! implementing [AES-GCM-SIV] or [AES-GCM/GMAC]. +//! +//! From [RFC 8452 Section 3] which defines POLYVAL for use in AES-GCM_SIV: +//! +//! > "POLYVAL, like GHASH (the authenticator in AES-GCM; ...), operates in a +//! > binary field of size 2^128. The field is defined by the irreducible +//! > polynomial x^128 + x^127 + x^126 + x^121 + 1." +//! +//! By multiplying (in the finite field sense) a sequence of 128-bit blocks of +//! input data data by a field element `H`, POLYVAL can be used to authenticate +//! the message sequence as powers (in a finite field sense) of `H`. +//! +//! ## Relationship to GHASH +//! +//! POLYVAL can be thought of as the little endian equivalent of GHASH, which +//! affords it a small performance advantage over GHASH when used on little +//! endian architectures. +//! +//! It has also been designed so it can also be used to compute GHASH and with +//! it GMAC, the Message Authentication Code (MAC) used by AES-GCM. +//! +//! From [RFC 8452 Appendix A]: +//! +//! > "GHASH and POLYVAL both operate in GF(2^128), although with different +//! > irreducible polynomials: POLYVAL works modulo x^128 + x^127 + x^126 + +//! > x^121 + 1 and GHASH works modulo x^128 + x^7 + x^2 + x + 1. Note +//! > that these irreducible polynomials are the 'reverse' of each other." +//! +//! [AES-GCM-SIV]: https://en.wikipedia.org/wiki/AES-GCM-SIV +//! [AES-GCM/GMAC]: https://en.wikipedia.org/wiki/Galois/Counter_Mode +//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3 +//! [RFC 8452 Appendix A]: https://tools.ietf.org/html/rfc8452#appendix-A + +#![no_std] +#![doc(html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo_small.png")] +#![deny(missing_docs)] + +extern crate byteorder; +#[cfg(feature = "zeroize")] +extern crate zeroize; + +use byteorder::{ByteOrder, LE}; +#[cfg(feature = "zeroize")] +use zeroize::Zeroize; + +/// Size of the GF(2^128) field modulus in bytes (16-bytes). +pub const FIELD_SIZE: usize = 16; + +/// Byte array representation of an individual 128-bit field element. +/// +/// From [RFC 8452 Section 3]: +/// +/// > "Polynomials in this field are converted to and from 128-bit strings +/// > by taking the least significant bit of the first byte to be the +/// > coefficient of x^0, the most significant bit of the first byte to be +/// > the coefficient of x^7, and so on, until the most significant bit of +/// > the last byte is the coefficient of x^127." +/// +/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3 +pub type FieldElement = [u8; FIELD_SIZE]; + +/// **POLYVAL**: GHASH-like universal hash over GF(2^128). +#[repr(align(16))] +#[derive(Clone)] +#[allow(non_snake_case)] +pub struct Polyval { + /// GF(2^128) field element input blocks are multiplied by + H: u128, + + /// Field element representing the computed universal hash + S: u128, +} + +impl Polyval { + /// Initialize POLYVAL with the given `H` field element + pub fn new(h: &FieldElement) -> Self { + Self { + H: LE::read_u128(h), + S: 0, + } + } + + /// Input a field element `X` to be authenticated into POLYVAL. + pub fn input(&mut self, x: &FieldElement) { + // "The sum of any two elements in the field is the result of XORing them." + // -- RFC 8452 Section 3 + let x = self.S ^ LE::read_u128(x); + self.S = gfmul(x, self.H); + } + + /// Process input blocks in a chained manner + pub fn chain(mut self, x: &FieldElement) -> Self { + self.input(x); + self + } + + /// Get POLYVAL result (i.e. computed `S` field element) + pub fn result(self) -> FieldElement { + let mut output = FieldElement::default(); + LE::write_u128(&mut output, self.S); + output + } +} + +#[cfg(feature = "zeroize")] +impl Drop for Polyval { + fn drop(&mut self) { + self.H.zeroize(); + self.S.zeroize(); + self.buffer.zeroize(); + } +} + +/// Computes POLYVAL field multiplication over GF(2^128) field using +/// Shay Gueron's PCLMULQDQ-based optimization techniques. +/// +/// From [RFC 8452 Section 3]: +/// +/// > "The product of any two elements is calculated using standard +/// > (binary) polynomial multiplication followed by reduction modulo the +/// > irreducible polynomial." +/// +/// For more information on the techniques used in this implementation, see: +/// +/// +/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3 +fn gfmul(a: u128, b: u128) -> u128 { + let t1 = pclmulqdq(a, b, 0x00); + let t2 = pclmulqdq(a, b, 0x01); + let t3 = pclmulqdq(a, b, 0x10); + let t4 = pclmulqdq(a, b, 0x11); + let t5 = t2 ^ t3; + let t6 = t1 ^ ((t5 & 0xffff_ffff) << 64); + t4 ^ (t5 >> 64) ^ reduce(t6) +} + +/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012) +/// Algorithm 4: "Montgomery reduction" +/// +/// +#[allow(clippy::let_and_return, clippy::many_single_char_names)] // Quiet clippy, we're mathing! +fn reduce(x: u128) -> u128 { + let mask = 0xc200_0000_0000_0000; + let a = pclmulqdq(mask, x, 0x01); + let b = x.rotate_left(64) ^ a; + let c = pclmulqdq(mask, b, 0x01); + let d = b.rotate_left(64) ^ c; + d +} + +/// Software reimplementation of the PCLMULQDQ CPU instruction +// TODO(tarcieri): support for CLMUL intrinsics +// Background: +fn pclmulqdq(a: u128, b: u128, imm: u8) -> u128 { + match imm { + 0x00 => clmul((a & 0xffff_ffff) as u64, (b & 0xffff_ffff) as u64), + 0x01 => clmul(((a >> 64) & 0xffff_ffff) as u64, (b & 0xffff_ffff) as u64), + 0x10 => clmul((a & 0xffff_ffff) as u64, ((b >> 64) & 0xffff_ffff) as u64), + 0x11 => clmul( + ((a >> 64) & 0xffff_ffff) as u64, + ((b >> 64) & 0xffff_ffff) as u64, + ), + _ => panic!("invalid immediate byte value: 0x{:02x}", imm), + } +} + +/// Carryless multiplication +fn clmul(a: u64, b: u64) -> u128 { + let mut r = [0u64; 2]; + + for i in 0..64 { + if b & 1 << i != 0 { + r[1] ^= a; + } + + r[0] >>= 1; + + if r[1] & 1 != 0 { + r[0] ^= 1 << 63; + } + + r[1] >>= 1; + } + + (u128::from(r[0]) << 64) | u128::from(r[1]) +} diff --git a/polyval/tests/lib.rs b/polyval/tests/lib.rs new file mode 100644 index 0000000..a3b18cd --- /dev/null +++ b/polyval/tests/lib.rs @@ -0,0 +1,23 @@ +#[macro_use] +extern crate hex_literal; +extern crate polyval; + +use polyval::{FieldElement, Polyval}; + +// +// Test vectors or POLYVAL from RFC 8452 Appendix A +// +// + +const H: FieldElement = hex!("25629347589242761d31f826ba4b757b"); +const X_1: FieldElement = hex!("4f4f95668c83dfb6401762bb2d01a262"); +const X_2: FieldElement = hex!("d1a24ddd2721d006bbe45f20d3c9f362"); + +/// POLYVAL(H, X_1, X_2) +const POLYVAL_RESULT: FieldElement = hex!("f7a3b47b846119fae5b7866cf5e5b77e"); + +#[test] +fn rfc_8452_test_vector() { + let result = Polyval::new(&H).chain(&X_1).chain(&X_2).result(); + assert_eq!(&result, &POLYVAL_RESULT); +}