Skip to content

Commit

Permalink
polyval: Initial implementation
Browse files Browse the repository at this point in the history
Implements POLYVAL using Shay Gueron's techniques for efficient field
multiplications using PCLMULQDQ.

More information on these techniques here:

https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html
  • Loading branch information
tarcieri committed Aug 26, 2019
1 parent 0778fa9 commit 1182f2b
Show file tree
Hide file tree
Showing 11 changed files with 689 additions and 13 deletions.
29 changes: 19 additions & 10 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,40 @@ sudo: required
matrix:
include:
- rust: 1.27.0
script: cargo test --verbose --all
script: cargo test --verbose --all --exclude=polyval
- rust: stable
script: cargo test --verbose --all
script: cargo test --verbose --all --exclude=polyval
- rust: nightly
script: cargo test --verbose --all
script: cargo test --verbose --all --exclude=polyval

- env: TARGET=i686-unknown-linux-gnu
rust: stable
- env: TARGET=powerpc-unknown-linux-gnu
rust: stable
- env: TARGET=powerpc64-unknown-linux-gnu
rust: stable

# tests if crates truly can be built without std
- env: TARGET=thumbv7em-none-eabi
rust: nightly
script: xargo build --verbose --target $TARGET
install:
- cargo install xargo || true
- rustup target install armv7-unknown-linux-gnueabihf
- rustup component add rust-src
rust: stable
install: rustup target install $TARGET
script:
- cargo build --verbose --all --exclude=polyval --tests

# polyval presently needs either RUSTFLAGS or non-default features
- name: polyval
rust:
- 1.27.0
- stable
script:
- cargo test --package=polyval --all-features
- RUSTFLAGS="-Ctarget-cpu=sandybridge -Ctarget-feature=+sse2,+sse4.1" cargo test --package polyval --tests
- RUSTFLAGS="-Ctarget-cpu=sandybridge -Ctarget-feature=+sse2,+sse4.1" cargo test --package polyval --all-features

install:
- cargo install cross || true

script:
- cross test --verbose --all --target $TARGET
- cross test --verbose --all --exclude=polyval --target $TARGET

cache: cargo
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ members = [
"hmac",
"pmac",
"poly1305",
"polyval"
"polyval",
]
17 changes: 16 additions & 1 deletion polyval/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,28 @@ name = "polyval"
version = "0.0.0"
authors = ["RustCrypto Developers"]
license = "MIT/Apache-2.0"
description = "GHASH-like universal hash over GF(2^128)"
description = """
POLYVAL is a GHASH-like universal hash over GF(2^128) useful for constructing
a Message Authentication Code (MAC)
"""
documentation = "https://docs.rs/polyval"
repository = "https://github.com/RustCrypto/MACs"
keywords = ["aes-gcm-siv", "crypto", "ghash", "gcm", "universal-hashing"]
categories = ["cryptography", "no-std"]

[dependencies]
byteorder = { version = "1", optional = true, default-features = false }
crypto-mac = "0.7"

[dev-dependencies]
crypto-mac = { version = "0.7", features = ["dev"] }
hex-literal = "0.1"

[features]
insecure-soft = ["byteorder"]

[badges]
travis-ci = { repository = "RustCrypto/hashes" }

[package.metadata.docs.rs]
all-features = true
87 changes: 87 additions & 0 deletions polyval/benches/polyval.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#![feature(test)]
#[macro_use]
extern crate crypto_mac;
extern crate polyval;

use crypto_mac::generic_array::{typenum::U16, GenericArray};
use crypto_mac::MacResult;
use polyval::{Block, Polyval};
use std::{cmp::min, convert::TryInto};

bench!(PolyvalMac);

/// POLYVAL isn't a traditional MAC and for that reason doesn't impl the
/// `crypto_mac::Mac` trait.
///
/// This type is a newtype that impls a pseudo-MAC to leverage the benchmark
/// functionality.
///
/// This is just for benchmarking! Don't copy and paste this into your program
/// unless you really know what you're doing!!!
#[derive(Clone)]
struct PolyvalMac {
poly: Polyval,
leftover: usize,
buffer: Block,
}

impl Mac for PolyvalMac {
type OutputSize = U16;
type KeySize = U16;

fn new(key: &GenericArray<u8, Self::KeySize>) -> PolyvalMac {
let poly = Polyval::new(key.as_slice().try_into().unwrap());

PolyvalMac {
poly,
leftover: 0,
buffer: Block::default(),
}
}

fn input(&mut self, data: &[u8]) {
let mut m = data;

if self.leftover > 0 {
let want = min(16 - self.leftover, m.len());

for (i, byte) in m.iter().cloned().enumerate().take(want) {
self.buffer[self.leftover + i] = byte;
}

m = &m[want..];
self.leftover += want;

if self.leftover < 16 {
return;
}

self.block();
self.leftover = 0;
}

while m.len() >= 16 {
self.block();
m = &m[16..];
}

self.buffer[..m.len()].copy_from_slice(m);
self.leftover = m.len();
}

fn reset(&mut self) {
unimplemented!();
}

fn result(self) -> MacResult<Self::OutputSize> {
self.poly.result()
}
}

impl PolyvalMac {
/// Input the current internal buffer into POLYVAL
fn block(&mut self) {
let elem = self.buffer;
self.poly.input(elem)
}
}
64 changes: 64 additions & 0 deletions polyval/src/field/backend/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//! Field arithmetic backends

#[cfg(all(
target_feature = "pclmulqdq",
target_feature = "sse2",
target_feature = "sse4.1",
any(target_arch = "x86", target_arch = "x86_64")
))]
mod pclmulqdq;

#[cfg(feature = "insecure-soft")]
mod soft;

use super::clmul::Clmul;
use core::ops::BitXor;
use Block;

#[cfg(not(any(
all(
target_feature = "pclmulqdq",
target_feature = "sse2",
target_feature = "sse4.1",
any(target_arch = "x86", target_arch = "x86_64")
),
feature = "insecure-soft"
)))]
compile_error!(
"no backends available! On x86/x86-64 platforms, enable intrinsics with \
RUSTFLAGS=\"-Ctarget-cpu=sandybridge -Ctarget-feature=+sse2,+sse4.1\" or \
enable **INSECURE** portable emulation with the `insecure-soft` feature"
);

#[cfg(all(
target_feature = "pclmulqdq",
target_feature = "sse2",
target_feature = "sse4.1",
any(target_arch = "x86", target_arch = "x86_64")
))]
pub(crate) use self::pclmulqdq::M128i;

#[cfg(all(
not(all(
target_feature = "pclmulqdq",
target_feature = "sse2",
target_feature = "sse4.1",
any(target_arch = "x86", target_arch = "x86_64")
)),
feature = "insecure-soft"
))]
pub(crate) use self::soft::U64x2 as M128i;

/// Trait representing the arithmetic operations we expect on the XMM registers
pub trait Xmm:
BitXor<Output = Self> + Clmul + Copy + From<Block> + Into<Block> + From<u128>
{
/// Swap the hi and low 64-bit halves of the register
fn shuffle(self) -> Self;

/// Shift the contents of the register left by 64-bits
fn shl64(self) -> Self;

/// Shift the contents of the register right by 64-bits
fn shr64(self) -> Self;
}
107 changes: 107 additions & 0 deletions polyval/src/field/backend/pclmulqdq.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//! Support for the PCLMULQDQ CPU intrinsic on `x86` and `x86_64` target
//! architectures.

// The code below uses `loadu`/`storeu` to support unaligned loads/stores
#![allow(clippy::cast_ptr_alignment)]

#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

use super::Xmm;
use core::ops::BitXor;
use field::clmul::{self, Clmul};
use Block;

/// Wrapper for `__m128i` - a 128-bit XMM register (SSE2)
#[repr(align(16))]
#[derive(Copy, Clone)]
pub struct M128i(__m128i);

impl From<Block> for M128i {
fn from(bytes: Block) -> M128i {
M128i(unsafe { _mm_loadu_si128(bytes.as_ptr() as *const __m128i) })
}
}

impl From<M128i> for Block {
fn from(xmm: M128i) -> Block {
let mut result = Block::default();

unsafe {
_mm_storeu_si128(result.as_mut_ptr() as *mut __m128i, xmm.0);
}

result
}
}

impl From<u128> for M128i {
fn from(x: u128) -> M128i {
M128i(unsafe { _mm_loadu_si128(&x as *const u128 as *const __m128i) })
}
}

impl BitXor for M128i {
type Output = Self;

fn bitxor(self, rhs: Self) -> Self::Output {
M128i(unsafe { xor(self.0, rhs.0) })
}
}

impl Clmul for M128i {
fn clmul<I>(self, rhs: Self, imm: I) -> Self
where
I: Into<clmul::PseudoOp>,
{
M128i(unsafe { pclmulqdq(self.0, rhs.0, imm.into()) })
}
}

impl Xmm for M128i {
fn shuffle(self) -> Self {
M128i(unsafe { shufpd1(self.0) })
}

fn shl64(self) -> Self {
M128i(unsafe { pslldq8(self.0) })
}

fn shr64(self) -> Self {
M128i(unsafe { psrldq8(self.0) })
}
}

#[target_feature(enable = "sse2", enable = "sse4.1")]
unsafe fn xor(a: __m128i, b: __m128i) -> __m128i {
_mm_xor_si128(a, b)
}

#[target_feature(enable = "sse2", enable = "sse4.1")]
unsafe fn shufpd1(a: __m128i) -> __m128i {
let a = _mm_castsi128_pd(a);
_mm_castpd_si128(_mm_shuffle_pd(a, a, 1))
}

#[target_feature(enable = "sse2", enable = "sse4.1")]
unsafe fn pslldq8(a: __m128i) -> __m128i {
_mm_bslli_si128(a, 8)
}

#[target_feature(enable = "sse2", enable = "sse4.1")]
unsafe fn psrldq8(a: __m128i) -> __m128i {
_mm_bsrli_si128(a, 8)
}

// TODO(tarcieri): _mm256_clmulepi64_epi128 (vpclmulqdq)
#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")]
unsafe fn pclmulqdq(a: __m128i, b: __m128i, op: clmul::PseudoOp) -> __m128i {
match op {
clmul::PseudoOp::PCLMULLQLQDQ => _mm_clmulepi64_si128(a, b, 0x00),
clmul::PseudoOp::PCLMULHQLQDQ => _mm_clmulepi64_si128(a, b, 0x01),
clmul::PseudoOp::PCLMULLQHQDQ => _mm_clmulepi64_si128(a, b, 0x10),
clmul::PseudoOp::PCLMULHQHQDQ => _mm_clmulepi64_si128(a, b, 0x11),
}
}
Loading

0 comments on commit 1182f2b

Please sign in to comment.