-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[WIP] polyval: Initial implementation
Implements POLYVAL using Shay Gueron's techniques for efficient field multiplications using PCLMULQDQ. More information on these techniques here: https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html
- Loading branch information
Showing
10 changed files
with
620 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,5 +5,5 @@ members = [ | |
"hmac", | ||
"pmac", | ||
"poly1305", | ||
"polyval" | ||
"polyval", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#![feature(test)] | ||
#[macro_use] | ||
extern crate crypto_mac; | ||
extern crate polyval; | ||
|
||
use crypto_mac::generic_array::{typenum::U16, GenericArray}; | ||
use crypto_mac::MacResult; | ||
use polyval::{FieldElement, Polyval}; | ||
use std::{cmp::min, convert::TryInto}; | ||
|
||
bench!(PolyvalMac); | ||
|
||
/// POLYVAL isn't a traditional MAC and for that reason doesn't impl the | ||
/// `crypto_mac::Mac` trait. | ||
/// | ||
/// This type is a newtype that impls a pseudo-MAC to leverage the benchmark | ||
/// functionality. | ||
/// | ||
/// This is just for benchmarking! Don't copy and paste this into your program | ||
/// unless you really know what you're doing!!! | ||
#[derive(Clone)] | ||
struct PolyvalMac { | ||
poly: Polyval, | ||
leftover: usize, | ||
buffer: FieldElement, | ||
} | ||
|
||
impl Mac for PolyvalMac { | ||
type OutputSize = U16; | ||
type KeySize = U16; | ||
|
||
fn new(key: &GenericArray<u8, Self::KeySize>) -> PolyvalMac { | ||
let poly = Polyval::new(key.as_slice().try_into().unwrap()); | ||
|
||
PolyvalMac { | ||
poly, | ||
leftover: 0, | ||
buffer: FieldElement::default(), | ||
} | ||
} | ||
|
||
fn input(&mut self, data: &[u8]) { | ||
let mut m = data; | ||
|
||
if self.leftover > 0 { | ||
let want = min(16 - self.leftover, m.len()); | ||
|
||
for (i, byte) in m.iter().cloned().enumerate().take(want) { | ||
self.buffer[self.leftover + i] = byte; | ||
} | ||
|
||
m = &m[want..]; | ||
self.leftover += want; | ||
|
||
if self.leftover < 16 { | ||
return; | ||
} | ||
|
||
self.block(); | ||
self.leftover = 0; | ||
} | ||
|
||
while m.len() >= 16 { | ||
self.block(); | ||
m = &m[16..]; | ||
} | ||
|
||
self.buffer[..m.len()].copy_from_slice(m); | ||
self.leftover = m.len(); | ||
} | ||
|
||
fn reset(&mut self) { | ||
unimplemented!(); | ||
} | ||
|
||
fn result(self) -> MacResult<Self::OutputSize> { | ||
let mut mac = GenericArray::default(); | ||
mac.copy_from_slice(&self.poly.result()); | ||
MacResult::new(mac) | ||
} | ||
} | ||
|
||
impl PolyvalMac { | ||
/// Input the current internal buffer into POLYVAL | ||
fn block(&mut self) { | ||
let elem = self.buffer; | ||
self.poly.input(&elem) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
//! Field arithmetic backends | ||
|
||
#[cfg(all( | ||
target_feature = "pclmulqdq", | ||
target_feature = "sse2", | ||
target_feature = "sse4.1", | ||
any(target_arch = "x86", target_arch = "x86_64") | ||
))] | ||
pub mod pclmulqdq; | ||
pub mod soft; | ||
|
||
use super::clmul::Clmul; | ||
use core::{ | ||
mem, | ||
ops::{BitXor, BitXorAssign}, | ||
}; | ||
|
||
/// Mask value to load into XMM register when performing Montgomery reduction. | ||
/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf> | ||
const MASK: [u64; 2] = [0x1, 0xc200_0000_0000_0000]; | ||
|
||
/// Trait representing the arithmetic operations we expect on the XMM registers | ||
pub trait Xmm: | ||
BitXor<Output = Self> + BitXorAssign + Clmul + Copy + From<[u64; 2]> + Into<[u64; 2]> | ||
{ | ||
/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012) | ||
/// Algorithm 4: "Montgomery reduction" | ||
fn reduce(self) -> Self { | ||
let mask = Self::from(MASK); | ||
let a = mask.clmul(self, 0x01); | ||
let b = self.rotate_left() ^ a; | ||
let c = mask.clmul(b, 0x01); | ||
b.rotate_left() ^ c | ||
} | ||
|
||
/// Rotate the contents of the register left by 64-bits | ||
fn rotate_left(self) -> Self { | ||
let t1: [u64; 2] = self.into(); | ||
let t2: [u32; 4] = unsafe { mem::transmute(t1) }; | ||
let t3 = [t2[2], t2[3], t2[0], t2[1]]; | ||
let t4: [u64; 2] = unsafe { mem::transmute(t3) }; | ||
t4.into() | ||
|
||
} | ||
|
||
/// Shift the contents of the register right by 64-bits | ||
fn shift_right(self) -> Self { | ||
let mut u64x2: [u64; 2] = self.into(); | ||
u64x2[1] = u64x2[0]; | ||
u64x2[0] = 0; | ||
u64x2.into() | ||
} | ||
|
||
/// Shift the contents of the register left by 64-bits | ||
fn shift_left(self) -> Self { | ||
let mut u64x2: [u64; 2] = self.into(); | ||
u64x2[0] = u64x2[1]; | ||
u64x2[1] = 0; | ||
u64x2.into() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
//! Support for the VPCLMULQDQ CPU intrinsic on `x86` and `x86_64` target | ||
//! architectures. | ||
|
||
#[cfg(target_arch = "x86")] | ||
use core::arch::x86::*; | ||
#[cfg(target_arch = "x86_64")] | ||
use core::arch::x86_64::*; | ||
|
||
use super::Xmm; | ||
use crate::field::clmul::{self, Clmul}; | ||
use core::ops::{BitXor, BitXorAssign}; | ||
|
||
/// 2 x `u64` values loaded into a `__m128i` register | ||
#[repr(align(16))] | ||
#[derive(Copy, Clone)] | ||
pub struct U64x2(__m128i); | ||
|
||
impl From<[u64; 2]> for U64x2 { | ||
fn from(array: [u64; 2]) -> U64x2 { | ||
unsafe { _mm_loadu_si128(array.as_ptr() as *const __m128i) }.into() | ||
} | ||
} | ||
|
||
impl From<U64x2> for [u64; 2] { | ||
fn from(u64x2: U64x2) -> [u64; 2] { | ||
let mut result = [0u64; 2]; | ||
|
||
unsafe { | ||
_mm_storeu_si128(result.as_mut_ptr() as *mut __m128i, u64x2.0); | ||
} | ||
|
||
result | ||
} | ||
} | ||
|
||
impl From<__m128i> for U64x2 { | ||
fn from(mm: __m128i) -> U64x2 { | ||
U64x2(mm) | ||
} | ||
} | ||
|
||
impl From<U64x2> for __m128i { | ||
fn from(u64x2: U64x2) -> __m128i { | ||
u64x2.0 | ||
} | ||
} | ||
|
||
impl BitXor for U64x2 { | ||
type Output = Self; | ||
|
||
fn bitxor(self, rhs: Self) -> Self::Output { | ||
U64x2(unsafe { xor(self.0, rhs.0) }) | ||
} | ||
} | ||
|
||
impl BitXorAssign for U64x2 { | ||
fn bitxor_assign(&mut self, rhs: Self) { | ||
// TODO(tarcieri): optimize | ||
self.0 = unsafe { xor(self.0, rhs.0) }; | ||
} | ||
} | ||
|
||
impl Clmul for U64x2 { | ||
fn clmul<I>(self, rhs: Self, imm: I) -> Self | ||
where | ||
I: Into<clmul::PseudoOp>, | ||
{ | ||
unsafe { vpclmulqdq(self.0, rhs.0, imm.into()) }.into() | ||
} | ||
} | ||
|
||
// TODO(tarcieri): optimized `rotate_left`, `shift_right`, `shift_left` | ||
impl Xmm for U64x2 {} | ||
|
||
#[target_feature(enable = "sse2", enable = "sse4.1")] | ||
unsafe fn xor(a: __m128i, b: __m128i) -> __m128i { | ||
_mm_xor_si128(a, b) | ||
} | ||
|
||
#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")] | ||
unsafe fn vpclmulqdq(a: __m128i, b: __m128i, op: clmul::PseudoOp) -> __m128i { | ||
match op { | ||
clmul::PseudoOp::PCLMULLQLQDQ => _mm_clmulepi64_si128(a, b, 0x00), | ||
clmul::PseudoOp::PCLMULHQLQDQ => _mm_clmulepi64_si128(a, b, 0x01), | ||
clmul::PseudoOp::PCLMULLQHQDQ => _mm_clmulepi64_si128(a, b, 0x10), | ||
clmul::PseudoOp::PCLMULHQHQDQ => _mm_clmulepi64_si128(a, b, 0x11), | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::field::{ | ||
backend::soft, | ||
clmul::{self, Clmul}, | ||
}; | ||
|
||
#[test] | ||
fn vclmul_emulation() { | ||
let a: [u64; 2] = [0x00000000ada5f29b, 0]; | ||
let b: [u64; 2] = [0x000000002d978a49, 0]; | ||
let op = clmul::PseudoOp::from(0x00); | ||
|
||
let hard_result: [u64; 2] = super::U64x2::from(a).clmul(b.into(), op).into(); | ||
let soft_result: [u64; 2] = soft::U64x2::from(a).clmul(b.into(), op).into(); | ||
|
||
assert_eq!(&hard_result, &soft_result); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
//! Software emulation support for CLMUL hardware intrinsics. | ||
//! | ||
//! WARNING: Not constant time! Should be made constant-time or disabled by default. | ||
|
||
use super::Xmm; | ||
use field::clmul::{self, Clmul}; | ||
use core::ops::{BitXor, BitXorAssign}; | ||
|
||
/// 2 x `u64` values | ||
#[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
pub struct U64x2([u64; 2]); | ||
|
||
impl From<[u64; 2]> for U64x2 { | ||
fn from(array: [u64; 2]) -> U64x2 { | ||
U64x2(array) | ||
} | ||
} | ||
|
||
impl From<U64x2> for [u64; 2] { | ||
fn from(u64x2: U64x2) -> [u64; 2] { | ||
u64x2.0 | ||
} | ||
} | ||
|
||
impl BitXor for U64x2 { | ||
type Output = Self; | ||
|
||
fn bitxor(self, rhs: Self) -> Self::Output { | ||
U64x2([self.0[0] ^ rhs.0[0], self.0[1] ^ rhs.0[1]]) | ||
} | ||
} | ||
|
||
impl BitXorAssign for U64x2 { | ||
fn bitxor_assign(&mut self, rhs: Self) { | ||
self.0[0] ^= rhs.0[0]; | ||
self.0[1] ^= rhs.0[1]; | ||
} | ||
} | ||
|
||
impl Clmul for U64x2 { | ||
fn clmul<I>(self, other: Self, imm: I) -> Self | ||
where | ||
I: Into<clmul::PseudoOp>, | ||
{ | ||
let (a, b) = match imm.into() { | ||
clmul::PseudoOp::PCLMULLQLQDQ => (self.0[0], other.0[0]), | ||
clmul::PseudoOp::PCLMULHQLQDQ => (self.0[1], other.0[0]), | ||
clmul::PseudoOp::PCLMULLQHQDQ => (self.0[0], other.0[1]), | ||
clmul::PseudoOp::PCLMULHQHQDQ => (self.0[1], other.0[1]), | ||
}; | ||
|
||
let mut result = [0u64; 2]; | ||
|
||
for i in 0..64 { | ||
if b & (1 << i) != 0 { | ||
result[1] ^= a; | ||
} | ||
|
||
result[0] >>= 1; | ||
|
||
if result[1] & 1 != 0 { | ||
result[0] ^= 1 << 63; | ||
} | ||
|
||
result[1] >>= 1; | ||
} | ||
|
||
result.into() | ||
} | ||
} | ||
|
||
impl Xmm for U64x2 {} |
Oops, something went wrong.