Skip to content

Commit

Permalink
[WIP] polyval: Initial implementation
Browse files Browse the repository at this point in the history
Implements POLYVAL using Shay Gueron's techniques for efficient field
multiplications using PCLMULQDQ.

More information on these techniques here:

https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html
  • Loading branch information
tarcieri committed Aug 15, 2019
1 parent 2fbadfb commit 8075ae1
Show file tree
Hide file tree
Showing 10 changed files with 669 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ members = [
"hmac",
"pmac",
"poly1305",
"polyval"
"polyval",
]
9 changes: 9 additions & 0 deletions polyval/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ keywords = ["aes-gcm-siv", "crypto", "ghash", "gcm", "universal-hashing"]
categories = ["cryptography", "no-std"]

[dependencies]
byteorder = { version = "1", default-features = false }
zeroize = { version = "0.9", optional = true, default-features = false }

[dev-dependencies]
crypto-mac = { version = "0.7", features = ["dev"] }
hex-literal = "0.2"

[features]
nightly = []

[badges]
travis-ci = { repository = "RustCrypto/hashes" }
89 changes: 89 additions & 0 deletions polyval/benches/polyval.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#![feature(test)]
#[macro_use]
extern crate crypto_mac;
extern crate polyval;

use crypto_mac::generic_array::{typenum::U16, GenericArray};
use crypto_mac::MacResult;
use polyval::{FieldElement, Polyval};
use std::{cmp::min, convert::TryInto};

bench!(PolyvalMac);

/// POLYVAL isn't a traditional MAC and for that reason doesn't impl the
/// `crypto_mac::Mac` trait.
///
/// This type is a newtype that impls a pseudo-MAC to leverage the benchmark
/// functionality.
///
/// This is just for benchmarking! Don't copy and paste this into your program
/// unless you really know what you're doing!!!
#[derive(Clone)]
struct PolyvalMac {
poly: Polyval,
leftover: usize,
buffer: FieldElement,
}

impl Mac for PolyvalMac {
type OutputSize = U16;
type KeySize = U16;

fn new(key: &GenericArray<u8, Self::KeySize>) -> PolyvalMac {
let poly = Polyval::new(key.as_slice().try_into().unwrap());

PolyvalMac {
poly,
leftover: 0,
buffer: FieldElement::default(),
}
}

fn input(&mut self, data: &[u8]) {
let mut m = data;

if self.leftover > 0 {
let want = min(16 - self.leftover, m.len());

for (i, byte) in m.iter().cloned().enumerate().take(want) {
self.buffer[self.leftover + i] = byte;
}

m = &m[want..];
self.leftover += want;

if self.leftover < 16 {
return;
}

self.block();
self.leftover = 0;
}

while m.len() >= 16 {
self.block();
m = &m[16..];
}

self.buffer[..m.len()].copy_from_slice(m);
self.leftover = m.len();
}

fn reset(&mut self) {
unimplemented!();
}

fn result(self) -> MacResult<Self::OutputSize> {
let mut mac = GenericArray::default();
mac.copy_from_slice(&self.poly.result());
MacResult::new(mac)
}
}

impl PolyvalMac {
/// Input the current internal buffer into POLYVAL
fn block(&mut self) {
let elem = self.buffer;
self.poly.input(&elem)
}
}
147 changes: 147 additions & 0 deletions polyval/src/field.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
//! Implementation of POLYVAL's finite field.
//!
//! From [RFC 8452 Section 3] which defines POLYVAL for use in AES-GCM_SIV:
//!
//! > "POLYVAL, like GHASH (the authenticator in AES-GCM; ...), operates in a
//! > binary field of size 2^128. The field is defined by the irreducible
//! > polynomial x^128 + x^127 + x^126 + x^121 + 1."
//!
//! This implementation provides multiplication over GF(2^128) optimized using
//! Shay Gueron's PCLMULQDQ-based techniques.
//!
//! For more information on how these techniques work, see:
//! <https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html>
//!
//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3

pub mod backend;
pub mod clmul;

use self::backend::Xmm;
use super::FIELD_SIZE;
use byteorder::{ByteOrder, LE};
use core::ops::{BitXor, Mul};

/// POLYVAL field element.
#[derive(Copy, Clone)]
pub struct FieldElement<X: Xmm>(X);

impl<X: Xmm> FieldElement<X> {
/// Load a `FieldElement` from its bytestring representation.
pub fn from_bytes(bytes: [u8; FIELD_SIZE]) -> Self {
let mut u64x2 = [0u64; 2];
LE::read_u64_into(&bytes, &mut u64x2);
u64x2.into()
}

/// Serialize this `FieldElement` as a bytestring.
pub fn to_bytes(self) -> [u8; FIELD_SIZE] {
let u64x2: [u64; 2] = self.0.into();
let mut result = [0u8; FIELD_SIZE];
LE::write_u64_into(&u64x2, &mut result);
result
}
}

impl<X: Xmm> Mul for FieldElement<X> {
type Output = Self;

/// Computes POLYVAL multiplication over GF(2^128).
///
/// From [RFC 8452 Section 3]:
///
/// > "The product of any two elements is calculated using standard
/// > (binary) polynomial multiplication followed by reduction modulo the
/// > irreducible polynomial."
///
/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
fn mul(self, rhs: Self) -> Self {
let mut t1 = self.0.clmul(rhs.0, 0x00);
let mut t2 = self.0.clmul(rhs.0, 0x01);
let mut t3 = self.0.clmul(rhs.0, 0x10);
let mut t4 = self.0.clmul(rhs.0, 0x11);

t2 ^= t3;
t3 = t2.shift_right();
t2 = t2.shift_left();
t1 ^= t3;
t4 ^= t2;
t4 ^= t1.reduce();

FieldElement(t4)
}
}

impl<X: Xmm> From<X> for FieldElement<X> {
fn from(element: X) -> FieldElement<X> {
FieldElement(element)
}
}

impl<X: Xmm> From<[u64; 2]> for FieldElement<X> {
fn from(array: [u64; 2]) -> FieldElement<X> {
FieldElement(array.into())
}
}

impl<X: Xmm> From<FieldElement<X>> for [u64; 2] {
fn from(fe: FieldElement<X>) -> [u64; 2] {
fe.0.into()
}
}

impl<X: Xmm> BitXor for FieldElement<X> {
type Output = Self;

fn bitxor(self, rhs: Self) -> Self::Output {
FieldElement(self.0 ^ rhs.0)
}
}

#[cfg(test)]
#[allow(non_snake_case)]
mod tests {
use super::FieldElement;
use crate::{field::backend::soft::U64x2, FIELD_SIZE};

type FE = FieldElement<U64x2>;

//
// Field Operation Examples from RFC 8452 Section 7
// <https://tools.ietf.org/html/rfc8452#section-7>
//

const A: [u8; FIELD_SIZE] = hex!("66e94bd4ef8a2c3b884cfa59ca342b2e");
const B: [u8; FIELD_SIZE] = hex!("ff000000000000000000000000000000");
const A_MUL_B: [u8; FIELD_SIZE] = hex!("37856175e9dc9df26ebc6d6171aa0ae9");

#[test]
fn rfc_8452_test_vector() {
let a = FE::from_bytes(A);
let b = FE::from_bytes(B);
assert_eq!(&A_MUL_B, &(a * b).to_bytes());
}

//
// `mulX_POLYVAL` examples from rFC 8452 Appendix A
// <https://tools.ietf.org/html/rfc8452#appendix-A>
//

const X: [u8; FIELD_SIZE] = hex!("02000000000000000000000000000000");

/// Implementation of the `mulX_POLYVAL` function described in Appendix A.
fn mulX_polyval(input: [u8; FIELD_SIZE]) -> [u8; FIELD_SIZE] {
(FE::from_bytes(input) * FE::from_bytes(X)).to_bytes()
}

#[test]
fn rfc_8452_mulx_polyval_vectors() {
let one = hex!("01000000000000000000000000000000");
let one_result = mulX_polyval(one);
assert_eq!(&one_result, &hex!("02000000000000000000000000000000"));

let another = hex!("9c98c04df9387ded828175a92ba652d8");
let another_result = mulX_polyval(another);
assert_eq!(&another_result, &hex!("3931819bf271fada0503eb52574ca5f2"));
}
}
58 changes: 58 additions & 0 deletions polyval/src/field/backend.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//! Field arithmetic backends

#[cfg(all(
target_feature = "pclmulqdq",
target_feature = "sse2",
target_feature = "sse4.1",
any(target_arch = "x86", target_arch = "x86_64")
))]
pub mod pclmulqdq;
pub mod soft;

use super::clmul::Clmul;
use core::ops::{BitXor, BitXorAssign};

/// Mask value to load into XMM register when performing Montgomery reduction.
/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
const MASK: [u64; 2] = [0x1, 0xc200_0000_0000_0000];

/// Trait representing the arithmetic operations we expect on the XMM registers
pub trait Xmm:
BitXor<Output = Self> + BitXorAssign + Clmul + Copy + From<[u64; 2]> + Into<[u64; 2]>
{
/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
/// Algorithm 4: "Montgomery reduction"
fn reduce(self) -> Self {
let mask = Self::from(MASK);
let a = mask.clmul(self, 0x01);
let b = self.rotate_left() ^ a;
let c = mask.clmul(b, 0x01);
b.rotate_left() ^ c
}

/// Rotate the contents of the register left by 64-bits
fn rotate_left(self) -> Self {
let mut u64x2: [u64; 2] = self.into();
let x1 = u128::from(u64x2[0]) | (u128::from(u64x2[1]) << 64);
let x2 = x1.rotate_left(64);
u64x2[0] = (x2 & 0xFFFF_FFFF) as u64;
u64x2[1] = (x2 >> 64) as u64;
u64x2.into()
}

/// Shift the contents of the register right by 64-bits
fn shift_right(self) -> Self {
let mut u64x2: [u64; 2] = self.into();
u64x2[1] = u64x2[0];
u64x2[0] = 0;
u64x2.into()
}

/// Shift the contents of the register left by 64-bits
fn shift_left(self) -> Self {
let mut u64x2: [u64; 2] = self.into();
u64x2[0] = u64x2[1];
u64x2[1] = 0;
u64x2.into()
}
}
Loading

0 comments on commit 8075ae1

Please sign in to comment.