-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implements POLYVAL using Shay Gueron's techniques for efficient field multiplications using PCLMULQDQ. More information on these techniques here: https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html
- Loading branch information
Showing
11 changed files
with
689 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,5 +5,5 @@ members = [ | |
"hmac", | ||
"pmac", | ||
"poly1305", | ||
"polyval" | ||
"polyval", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#![feature(test)] | ||
#[macro_use] | ||
extern crate crypto_mac; | ||
extern crate polyval; | ||
|
||
use crypto_mac::generic_array::{typenum::U16, GenericArray}; | ||
use crypto_mac::MacResult; | ||
use polyval::{Block, Polyval}; | ||
use std::{cmp::min, convert::TryInto}; | ||
|
||
bench!(PolyvalMac); | ||
|
||
/// POLYVAL isn't a traditional MAC and for that reason doesn't impl the | ||
/// `crypto_mac::Mac` trait. | ||
/// | ||
/// This type is a newtype that impls a pseudo-MAC to leverage the benchmark | ||
/// functionality. | ||
/// | ||
/// This is just for benchmarking! Don't copy and paste this into your program | ||
/// unless you really know what you're doing!!! | ||
#[derive(Clone)] | ||
struct PolyvalMac { | ||
poly: Polyval, | ||
leftover: usize, | ||
buffer: Block, | ||
} | ||
|
||
impl Mac for PolyvalMac { | ||
type OutputSize = U16; | ||
type KeySize = U16; | ||
|
||
fn new(key: &GenericArray<u8, Self::KeySize>) -> PolyvalMac { | ||
let poly = Polyval::new(key.as_slice().try_into().unwrap()); | ||
|
||
PolyvalMac { | ||
poly, | ||
leftover: 0, | ||
buffer: Block::default(), | ||
} | ||
} | ||
|
||
fn input(&mut self, data: &[u8]) { | ||
let mut m = data; | ||
|
||
if self.leftover > 0 { | ||
let want = min(16 - self.leftover, m.len()); | ||
|
||
for (i, byte) in m.iter().cloned().enumerate().take(want) { | ||
self.buffer[self.leftover + i] = byte; | ||
} | ||
|
||
m = &m[want..]; | ||
self.leftover += want; | ||
|
||
if self.leftover < 16 { | ||
return; | ||
} | ||
|
||
self.block(); | ||
self.leftover = 0; | ||
} | ||
|
||
while m.len() >= 16 { | ||
self.block(); | ||
m = &m[16..]; | ||
} | ||
|
||
self.buffer[..m.len()].copy_from_slice(m); | ||
self.leftover = m.len(); | ||
} | ||
|
||
fn reset(&mut self) { | ||
unimplemented!(); | ||
} | ||
|
||
fn result(self) -> MacResult<Self::OutputSize> { | ||
self.poly.result() | ||
} | ||
} | ||
|
||
impl PolyvalMac { | ||
/// Input the current internal buffer into POLYVAL | ||
fn block(&mut self) { | ||
let elem = self.buffer; | ||
self.poly.input(elem) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
//! Field arithmetic backends | ||
|
||
#[cfg(all( | ||
target_feature = "pclmulqdq", | ||
target_feature = "sse2", | ||
target_feature = "sse4.1", | ||
any(target_arch = "x86", target_arch = "x86_64") | ||
))] | ||
mod pclmulqdq; | ||
|
||
#[cfg(feature = "insecure-soft")] | ||
mod soft; | ||
|
||
use super::clmul::Clmul; | ||
use core::ops::BitXor; | ||
use Block; | ||
|
||
#[cfg(not(any( | ||
all( | ||
target_feature = "pclmulqdq", | ||
target_feature = "sse2", | ||
target_feature = "sse4.1", | ||
any(target_arch = "x86", target_arch = "x86_64") | ||
), | ||
feature = "insecure-soft" | ||
)))] | ||
compile_error!( | ||
"no backends available! On x86/x86-64 platforms, enable intrinsics with \ | ||
RUSTFLAGS=\"-Ctarget-cpu=sandybridge -Ctarget-feature=+sse2,+sse4.1\" or \ | ||
enable **INSECURE** portable emulation with the `insecure-soft` feature" | ||
); | ||
|
||
#[cfg(all( | ||
target_feature = "pclmulqdq", | ||
target_feature = "sse2", | ||
target_feature = "sse4.1", | ||
any(target_arch = "x86", target_arch = "x86_64") | ||
))] | ||
pub(crate) use self::pclmulqdq::M128i; | ||
|
||
#[cfg(all( | ||
not(all( | ||
target_feature = "pclmulqdq", | ||
target_feature = "sse2", | ||
target_feature = "sse4.1", | ||
any(target_arch = "x86", target_arch = "x86_64") | ||
)), | ||
feature = "insecure-soft" | ||
))] | ||
pub(crate) use self::soft::U64x2 as M128i; | ||
|
||
/// Trait representing the arithmetic operations we expect on the XMM registers | ||
pub trait Xmm: | ||
BitXor<Output = Self> + Clmul + Copy + From<Block> + Into<Block> + From<u128> | ||
{ | ||
/// Swap the hi and low 64-bit halves of the register | ||
fn shuffle(self) -> Self; | ||
|
||
/// Shift the contents of the register left by 64-bits | ||
fn shl64(self) -> Self; | ||
|
||
/// Shift the contents of the register right by 64-bits | ||
fn shr64(self) -> Self; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
//! Support for the PCLMULQDQ CPU intrinsic on `x86` and `x86_64` target | ||
//! architectures. | ||
|
||
// The code below uses `loadu`/`storeu` to support unaligned loads/stores | ||
#![allow(clippy::cast_ptr_alignment)] | ||
|
||
#[cfg(target_arch = "x86")] | ||
use core::arch::x86::*; | ||
#[cfg(target_arch = "x86_64")] | ||
use core::arch::x86_64::*; | ||
|
||
use super::Xmm; | ||
use core::ops::BitXor; | ||
use field::clmul::{self, Clmul}; | ||
use Block; | ||
|
||
/// Wrapper for `__m128i` - a 128-bit XMM register (SSE2) | ||
#[repr(align(16))] | ||
#[derive(Copy, Clone)] | ||
pub struct M128i(__m128i); | ||
|
||
impl From<Block> for M128i { | ||
fn from(bytes: Block) -> M128i { | ||
M128i(unsafe { _mm_loadu_si128(bytes.as_ptr() as *const __m128i) }) | ||
} | ||
} | ||
|
||
impl From<M128i> for Block { | ||
fn from(xmm: M128i) -> Block { | ||
let mut result = Block::default(); | ||
|
||
unsafe { | ||
_mm_storeu_si128(result.as_mut_ptr() as *mut __m128i, xmm.0); | ||
} | ||
|
||
result | ||
} | ||
} | ||
|
||
impl From<u128> for M128i { | ||
fn from(x: u128) -> M128i { | ||
M128i(unsafe { _mm_loadu_si128(&x as *const u128 as *const __m128i) }) | ||
} | ||
} | ||
|
||
impl BitXor for M128i { | ||
type Output = Self; | ||
|
||
fn bitxor(self, rhs: Self) -> Self::Output { | ||
M128i(unsafe { xor(self.0, rhs.0) }) | ||
} | ||
} | ||
|
||
impl Clmul for M128i { | ||
fn clmul<I>(self, rhs: Self, imm: I) -> Self | ||
where | ||
I: Into<clmul::PseudoOp>, | ||
{ | ||
M128i(unsafe { pclmulqdq(self.0, rhs.0, imm.into()) }) | ||
} | ||
} | ||
|
||
impl Xmm for M128i { | ||
fn shuffle(self) -> Self { | ||
M128i(unsafe { shufpd1(self.0) }) | ||
} | ||
|
||
fn shl64(self) -> Self { | ||
M128i(unsafe { pslldq8(self.0) }) | ||
} | ||
|
||
fn shr64(self) -> Self { | ||
M128i(unsafe { psrldq8(self.0) }) | ||
} | ||
} | ||
|
||
#[target_feature(enable = "sse2", enable = "sse4.1")] | ||
unsafe fn xor(a: __m128i, b: __m128i) -> __m128i { | ||
_mm_xor_si128(a, b) | ||
} | ||
|
||
#[target_feature(enable = "sse2", enable = "sse4.1")] | ||
unsafe fn shufpd1(a: __m128i) -> __m128i { | ||
let a = _mm_castsi128_pd(a); | ||
_mm_castpd_si128(_mm_shuffle_pd(a, a, 1)) | ||
} | ||
|
||
#[target_feature(enable = "sse2", enable = "sse4.1")] | ||
unsafe fn pslldq8(a: __m128i) -> __m128i { | ||
_mm_bslli_si128(a, 8) | ||
} | ||
|
||
#[target_feature(enable = "sse2", enable = "sse4.1")] | ||
unsafe fn psrldq8(a: __m128i) -> __m128i { | ||
_mm_bsrli_si128(a, 8) | ||
} | ||
|
||
// TODO(tarcieri): _mm256_clmulepi64_epi128 (vpclmulqdq) | ||
#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")] | ||
unsafe fn pclmulqdq(a: __m128i, b: __m128i, op: clmul::PseudoOp) -> __m128i { | ||
match op { | ||
clmul::PseudoOp::PCLMULLQLQDQ => _mm_clmulepi64_si128(a, b, 0x00), | ||
clmul::PseudoOp::PCLMULHQLQDQ => _mm_clmulepi64_si128(a, b, 0x01), | ||
clmul::PseudoOp::PCLMULLQHQDQ => _mm_clmulepi64_si128(a, b, 0x10), | ||
clmul::PseudoOp::PCLMULHQHQDQ => _mm_clmulepi64_si128(a, b, 0x11), | ||
} | ||
} |
Oops, something went wrong.