From 16999f5730e92ae00e5b915bb28b39d8b50b69bf Mon Sep 17 00:00:00 2001
From: Tony Arcieri <bascule@gmail.com>
Date: Tue, 13 Aug 2019 16:32:42 -0700
Subject: [PATCH] [WIP] polyval: Initial implementation

Implements POLYVAL using Shay Gueron's techniques for efficient field
multiplications using PCLMULQDQ.

More information on these techniques here:

https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html
---
 Cargo.toml                             |   2 +-
 polyval/Cargo.toml                     |   9 +++
 polyval/benches/polyval.rs             |  89 ++++++++++++++++++++
 polyval/src/field/backend/mod.rs       |  61 ++++++++++++++
 polyval/src/field/backend/pclmulqdq.rs | 108 +++++++++++++++++++++++++
 polyval/src/field/backend/soft.rs      |  72 +++++++++++++++++
 polyval/src/field/clmul.rs             |  55 +++++++++++++
 polyval/src/field/mod.rs               |  99 +++++++++++++++++++++++
 polyval/src/lib.rs                     | 104 +++++++++++++++++++++++-
 polyval/tests/lib.rs                   |  23 ++++++
 10 files changed, 620 insertions(+), 2 deletions(-)
 create mode 100644 polyval/benches/polyval.rs
 create mode 100644 polyval/src/field/backend/mod.rs
 create mode 100644 polyval/src/field/backend/pclmulqdq.rs
 create mode 100644 polyval/src/field/backend/soft.rs
 create mode 100644 polyval/src/field/clmul.rs
 create mode 100644 polyval/src/field/mod.rs
 create mode 100644 polyval/tests/lib.rs

diff --git a/Cargo.toml b/Cargo.toml
index 5c5a885..4fc543c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,5 +5,5 @@ members = [
     "hmac",
     "pmac",
     "poly1305",
-    "polyval"
+    "polyval",
 ]
diff --git a/polyval/Cargo.toml b/polyval/Cargo.toml
index a0fc47d..5c5f9f2 100644
--- a/polyval/Cargo.toml
+++ b/polyval/Cargo.toml
@@ -10,6 +10,15 @@ keywords = ["aes-gcm-siv", "crypto", "ghash", "gcm", "universal-hashing"]
 categories = ["cryptography", "no-std"]
 
 [dependencies]
+byteorder = { version = "1", default-features = false }
+zeroize = { version = "0.9", optional = true, default-features = false }
+
+[dev-dependencies]
+crypto-mac = { version = "0.7", features = ["dev"] }
+hex-literal = "0.1"
+
+[features]
+nightly = []
 
 [badges]
 travis-ci = { repository = "RustCrypto/hashes" }
diff --git a/polyval/benches/polyval.rs b/polyval/benches/polyval.rs
new file mode 100644
index 0000000..a3cba0a
--- /dev/null
+++ b/polyval/benches/polyval.rs
@@ -0,0 +1,89 @@
+#![feature(test)]
+#[macro_use]
+extern crate crypto_mac;
+extern crate polyval;
+
+use crypto_mac::generic_array::{typenum::U16, GenericArray};
+use crypto_mac::MacResult;
+use polyval::{FieldElement, Polyval};
+use std::{cmp::min, convert::TryInto};
+
+bench!(PolyvalMac);
+
+/// POLYVAL isn't a traditional MAC and for that reason doesn't impl the
+/// `crypto_mac::Mac` trait.
+///
+/// This type is a newtype that impls a pseudo-MAC to leverage the benchmark
+/// functionality.
+///
+/// This is just for benchmarking! Don't copy and paste this into your program
+/// unless you really know what you're doing!!!
+#[derive(Clone)]
+struct PolyvalMac {
+    poly: Polyval,
+    leftover: usize,
+    buffer: FieldElement,
+}
+
+impl Mac for PolyvalMac {
+    type OutputSize = U16;
+    type KeySize = U16;
+
+    fn new(key: &GenericArray<u8, Self::KeySize>) -> PolyvalMac {
+        let poly = Polyval::new(key.as_slice().try_into().unwrap());
+
+        PolyvalMac {
+            poly,
+            leftover: 0,
+            buffer: FieldElement::default(),
+        }
+    }
+
+    fn input(&mut self, data: &[u8]) {
+        let mut m = data;
+
+        if self.leftover > 0 {
+            let want = min(16 - self.leftover, m.len());
+
+            for (i, byte) in m.iter().cloned().enumerate().take(want) {
+                self.buffer[self.leftover + i] = byte;
+            }
+
+            m = &m[want..];
+            self.leftover += want;
+
+            if self.leftover < 16 {
+                return;
+            }
+
+            self.block();
+            self.leftover = 0;
+        }
+
+        while m.len() >= 16 {
+            self.block();
+            m = &m[16..];
+        }
+
+        self.buffer[..m.len()].copy_from_slice(m);
+        self.leftover = m.len();
+    }
+
+    fn reset(&mut self) {
+        unimplemented!();
+    }
+
+    fn result(self) -> MacResult<Self::OutputSize> {
+        let mut mac = GenericArray::default();
+        mac.copy_from_slice(&self.poly.result());
+        MacResult::new(mac)
+    }
+}
+
+impl PolyvalMac {
+    /// Input the current internal buffer into POLYVAL
+    fn block(&mut self) {
+        let elem = self.buffer;
+        self.poly.input(&elem)
+    }
+}
diff --git a/polyval/src/field/backend/mod.rs b/polyval/src/field/backend/mod.rs
new file mode 100644
index 0000000..d7519d7
--- /dev/null
+++ b/polyval/src/field/backend/mod.rs
@@ -0,0 +1,61 @@
+//! Field arithmetic backends
+
+#[cfg(all(
+    target_feature = "pclmulqdq",
+    target_feature = "sse2",
+    target_feature = "sse4.1",
+    any(target_arch = "x86", target_arch = "x86_64")
+))]
+pub mod pclmulqdq;
+pub mod soft;
+
+use super::clmul::Clmul;
+use core::{
+    mem,
+    ops::{BitXor, BitXorAssign},
+};
+
+/// Mask value to load into XMM register when performing Montgomery reduction.
+/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
+const MASK: [u64; 2] = [0x1, 0xc200_0000_0000_0000];
+
+/// Trait representing the arithmetic operations we expect on the XMM registers
+pub trait Xmm:
+    BitXor<Output = Self> + BitXorAssign + Clmul + Copy + From<[u64; 2]> + Into<[u64; 2]>
+{
+    /// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
+    /// Algorithm 4: "Montgomery reduction"
+    fn reduce(self) -> Self {
+        let mask = Self::from(MASK);
+        let a = mask.clmul(self, 0x01);
+        let b = self.rotate_left() ^ a;
+        let c = mask.clmul(b, 0x01);
+        b.rotate_left() ^ c
+    }
+
+    /// Rotate the contents of the register left by 64-bits
+    fn rotate_left(self) -> Self {
+        let t1: [u64; 2] = self.into();
+        let t2: [u32; 4] = unsafe { mem::transmute(t1) };
+        let t3 = [t2[2], t2[3], t2[0], t2[1]];
+        let t4: [u64; 2] = unsafe { mem::transmute(t3) };
+        t4.into()
+
+    }
+
+    /// Shift the contents of the register right by 64-bits
+    fn shift_right(self) -> Self {
+        let mut u64x2: [u64; 2] = self.into();
+        u64x2[1] = u64x2[0];
+        u64x2[0] = 0;
+        u64x2.into()
+    }
+
+    /// Shift the contents of the register left by 64-bits
+    fn shift_left(self) -> Self {
+        let mut u64x2: [u64; 2] = self.into();
+        u64x2[0] = u64x2[1];
+        u64x2[1] = 0;
+        u64x2.into()
+    }
+}
diff --git a/polyval/src/field/backend/pclmulqdq.rs b/polyval/src/field/backend/pclmulqdq.rs
new file mode 100644
index 0000000..cf30580
--- /dev/null
+++ b/polyval/src/field/backend/pclmulqdq.rs
@@ -0,0 +1,108 @@
+//! Support for the VPCLMULQDQ CPU intrinsic on `x86` and `x86_64` target
+//! architectures.
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+use super::Xmm;
+use crate::field::clmul::{self, Clmul};
+use core::ops::{BitXor, BitXorAssign};
+
+/// 2 x `u64` values loaded into a `__m128i` register
+#[repr(align(16))]
+#[derive(Copy, Clone)]
+pub struct U64x2(__m128i);
+
+impl From<[u64; 2]> for U64x2 {
+    fn from(array: [u64; 2]) -> U64x2 {
+        unsafe { _mm_loadu_si128(array.as_ptr() as *const __m128i) }.into()
+    }
+}
+
+impl From<U64x2> for [u64; 2] {
+    fn from(u64x2: U64x2) -> [u64; 2] {
+        let mut result = [0u64; 2];
+
+        unsafe {
+            _mm_storeu_si128(result.as_mut_ptr() as *mut __m128i, u64x2.0);
+        }
+
+        result
+    }
+}
+
+impl From<__m128i> for U64x2 {
+    fn from(mm: __m128i) -> U64x2 {
+        U64x2(mm)
+    }
+}
+
+impl From<U64x2> for __m128i {
+    fn from(u64x2: U64x2) -> __m128i {
+        u64x2.0
+    }
+}
+
+impl BitXor for U64x2 {
+    type Output = Self;
+
+    fn bitxor(self, rhs: Self) -> Self::Output {
+        U64x2(unsafe { xor(self.0, rhs.0) })
+    }
+}
+
+impl BitXorAssign for U64x2 {
+    fn bitxor_assign(&mut self, rhs: Self) {
+        // TODO(tarcieri): optimize
+        self.0 = unsafe { xor(self.0, rhs.0) };
+    }
+}
+
+impl Clmul for U64x2 {
+    fn clmul<I>(self, rhs: Self, imm: I) -> Self
+    where
+        I: Into<clmul::PseudoOp>,
+    {
+        unsafe { vpclmulqdq(self.0, rhs.0, imm.into()) }.into()
+    }
+}
+
+// TODO(tarcieri): optimized `rotate_left`, `shift_right`, `shift_left`
+impl Xmm for U64x2 {}
+
+#[target_feature(enable = "sse2", enable = "sse4.1")]
+unsafe fn xor(a: __m128i, b: __m128i) -> __m128i {
+    _mm_xor_si128(a, b)
+}
+
+#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")]
+unsafe fn vpclmulqdq(a: __m128i, b: __m128i, op: clmul::PseudoOp) -> __m128i {
+    match op {
+        clmul::PseudoOp::PCLMULLQLQDQ => _mm_clmulepi64_si128(a, b, 0x00),
+        clmul::PseudoOp::PCLMULHQLQDQ => _mm_clmulepi64_si128(a, b, 0x01),
+        clmul::PseudoOp::PCLMULLQHQDQ => _mm_clmulepi64_si128(a, b, 0x10),
+        clmul::PseudoOp::PCLMULHQHQDQ => _mm_clmulepi64_si128(a, b, 0x11),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::field::{
+        backend::soft,
+        clmul::{self, Clmul},
+    };
+
+    #[test]
+    fn vclmul_emulation() {
+        let a: [u64; 2] = [0x00000000ada5f29b, 0];
+        let b: [u64; 2] = [0x000000002d978a49, 0];
+        let op = clmul::PseudoOp::from(0x00);
+
+        let hard_result: [u64; 2] = super::U64x2::from(a).clmul(b.into(), op).into();
+        let soft_result: [u64; 2] = soft::U64x2::from(a).clmul(b.into(), op).into();
+
+        assert_eq!(&hard_result, &soft_result);
+    }
+}
diff --git a/polyval/src/field/backend/soft.rs b/polyval/src/field/backend/soft.rs
new file mode 100644
index 0000000..0c5df4e
--- /dev/null
+++ b/polyval/src/field/backend/soft.rs
@@ -0,0 +1,72 @@
+//! Software emulation support for CLMUL hardware intrinsics.
+//!
+//! WARNING: Not constant time! Should be made constant-time or disabled by default.
+
+use super::Xmm;
+use field::clmul::{self, Clmul};
+use core::ops::{BitXor, BitXorAssign};
+
+/// 2 x `u64` values
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct U64x2([u64; 2]);
+
+impl From<[u64; 2]> for U64x2 {
+    fn from(array: [u64; 2]) -> U64x2 {
+        U64x2(array)
+    }
+}
+
+impl From<U64x2> for [u64; 2] {
+    fn from(u64x2: U64x2) -> [u64; 2] {
+        u64x2.0
+    }
+}
+
+impl BitXor for U64x2 {
+    type Output = Self;
+
+    fn bitxor(self, rhs: Self) -> Self::Output {
+        U64x2([self.0[0] ^ rhs.0[0], self.0[1] ^ rhs.0[1]])
+    }
+}
+
+impl BitXorAssign for U64x2 {
+    fn bitxor_assign(&mut self, rhs: Self) {
+        self.0[0] ^= rhs.0[0];
+        self.0[1] ^= rhs.0[1];
+    }
+}
+
+impl Clmul for U64x2 {
+    fn clmul<I>(self, other: Self, imm: I) -> Self
+    where
+        I: Into<clmul::PseudoOp>,
+    {
+        let (a, b) = match imm.into() {
+            clmul::PseudoOp::PCLMULLQLQDQ => (self.0[0], other.0[0]),
+            clmul::PseudoOp::PCLMULHQLQDQ => (self.0[1], other.0[0]),
+            clmul::PseudoOp::PCLMULLQHQDQ => (self.0[0], other.0[1]),
+            clmul::PseudoOp::PCLMULHQHQDQ => (self.0[1], other.0[1]),
+        };
+
+        let mut result = [0u64; 2];
+
+        for i in 0..64 {
+            if b & (1 << i) != 0 {
+                result[1] ^= a;
+            }
+
+            result[0] >>= 1;
+
+            if result[1] & 1 != 0 {
+                result[0] ^= 1 << 63;
+            }
+
+            result[1] >>= 1;
+        }
+
+        result.into()
+    }
+}
+
+impl Xmm for U64x2 {}
diff --git a/polyval/src/field/clmul.rs b/polyval/src/field/clmul.rs
new file mode 100644
index 0000000..7b9e5a7
--- /dev/null
+++ b/polyval/src/field/clmul.rs
@@ -0,0 +1,55 @@
+//! Carry-less multiplication support.
+//!
+//! Modern `x86` and `x86_64` CPUs support hardware instructions for
+//! carry-less multiplication which are necessary for efficient implementations
+//! of GHASH and POLYVAL.
+
+/// Carry-less multiplication trait - allows field arithmetic to be generic
+/// across both the `hard` and `soft` backends
+pub trait Clmul: Copy {
+    /// Performs carry-less multiplication of two 64-bit polynomials over the
+    /// finite field GF(2^k).
+    fn clmul<I: Into<PseudoOp>>(self, other: Self, imm: I) -> Self;
+}
+
+/// Pseudo-Op: selected by bits 4 and 0 of the immediate byte (`imm8`).
+///
+/// PCLMULQDQ performs carry-less multiplication of two quadwords which are
+/// selected from both operands according to the value of `imm8`.
+///
+/// Bits 4 and 0 of `imm8` are used to select which 64-bit half of each operand
+/// to use. Each of the possibilities has a named CLMUL Pseudo-Op, which is
+/// represented by this enum.
+#[repr(u8)]
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum PseudoOp {
+    /// Low-Low: `clmul(a[0..8], b[0..8])`
+    PCLMULLQLQDQ = 0x00,
+
+    /// High-Low: `clmul(a[8..16], b[0..8])`
+    PCLMULHQLQDQ = 0x01,
+
+    /// Low-High: `clmul(a[0..8], b[8..16])`
+    PCLMULLQHQDQ = 0x10,
+
+    /// High-High: `clmul(a[8..16], b[8..16])`
+    PCLMULHQHQDQ = 0x11,
+}
+
+impl From<u8> for PseudoOp {
+    fn from(imm8: u8) -> PseudoOp {
+        match imm8 {
+            0x00 => PseudoOp::PCLMULLQLQDQ,
+            0x01 => PseudoOp::PCLMULHQLQDQ,
+            0x10 => PseudoOp::PCLMULLQHQDQ,
+            0x11 => PseudoOp::PCLMULHQHQDQ,
+            _ => panic!("invalid imm8 value: 0x{:02x}", imm8),
+        }
+    }
+}
+
+impl From<PseudoOp> for u8 {
+    fn from(op: PseudoOp) -> u8 {
+        op as u8
+    }
+}
diff --git a/polyval/src/field/mod.rs b/polyval/src/field/mod.rs
new file mode 100644
index 0000000..d08686c
--- /dev/null
+++ b/polyval/src/field/mod.rs
@@ -0,0 +1,99 @@
+//! Implementation of POLYVAL's finite field.
+//!
+//! From [RFC 8452 Section 3] which defines POLYVAL for use in AES-GCM_SIV:
+//!
+//! > "POLYVAL, like GHASH (the authenticator in AES-GCM; ...), operates in a
+//! > binary field of size 2^128.  The field is defined by the irreducible
+//! > polynomial x^128 + x^127 + x^126 + x^121 + 1."
+//!
+//! This implementation provides multiplication over GF(2^128) optimized using
+//! Shay Gueron's PCLMULQDQ-based techniques.
+//!
+//! For more information on how these techniques work, see:
+//! <https://blog.quarkslab.com/reversing-a-finite-field-multiplication-optimization.html>
+//!
+//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
+
+pub mod backend;
+pub mod clmul;
+
+use self::backend::Xmm;
+use super::FIELD_SIZE;
+use byteorder::{ByteOrder, LE};
+use core::ops::{BitXor, Mul};
+
+/// POLYVAL field element.
+#[derive(Copy, Clone)]
+pub struct FieldElement<X: Xmm>(X);
+
+impl<X: Xmm> FieldElement<X> {
+    /// Load a `FieldElement` from its bytestring representation.
+    pub fn from_bytes(bytes: [u8; FIELD_SIZE]) -> Self {
+        let mut u64x2 = [0u64; 2];
+        LE::read_u64_into(&bytes, &mut u64x2);
+        u64x2.into()
+    }
+
+    /// Serialize this `FieldElement` as a bytestring.
+    pub fn to_bytes(self) -> [u8; FIELD_SIZE] {
+        let u64x2: [u64; 2] = self.0.into();
+        let mut result = [0u8; FIELD_SIZE];
+        LE::write_u64_into(&u64x2, &mut result);
+        result
+    }
+}
+
+impl<X: Xmm> Mul for FieldElement<X> {
+    type Output = Self;
+
+    /// Computes POLYVAL multiplication over GF(2^128).
+    ///
+    /// From [RFC 8452 Section 3]:
+    ///
+    /// > "The product of any two elements is calculated using standard
+    /// > (binary) polynomial multiplication followed by reduction modulo the
+    /// > irreducible polynomial."
+    ///
+    /// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
+    fn mul(self, rhs: Self) -> Self {
+        let mut t1 = self.0.clmul(rhs.0, 0x00);
+        let mut t2 = self.0.clmul(rhs.0, 0x01);
+        let mut t3 = self.0.clmul(rhs.0, 0x10);
+        let mut t4 = self.0.clmul(rhs.0, 0x11);
+
+        t2 ^= t3;
+        t3 = t2.shift_right();
+        t2 = t2.shift_left();
+        t1 ^= t3;
+        t4 ^= t2;
+        t4 ^= t1.reduce();
+
+        FieldElement(t4)
+    }
+}
+
+impl<X: Xmm> From<X> for FieldElement<X> {
+    fn from(element: X) -> FieldElement<X> {
+        FieldElement(element)
+    }
+}
+
+impl<X: Xmm> From<[u64; 2]> for FieldElement<X> {
+    fn from(array: [u64; 2]) -> FieldElement<X> {
+        FieldElement(array.into())
+    }
+}
+
+impl<X: Xmm> From<FieldElement<X>> for [u64; 2] {
+    fn from(fe: FieldElement<X>) -> [u64; 2] {
+        fe.0.into()
+    }
+}
+
+impl<X: Xmm> BitXor for FieldElement<X> {
+    type Output = Self;
+
+    fn bitxor(self, rhs: Self) -> Self::Output {
+        FieldElement(self.0 ^ rhs.0)
+    }
+}
diff --git a/polyval/src/lib.rs b/polyval/src/lib.rs
index 1b3bed6..3fd3eb6 100644
--- a/polyval/src/lib.rs
+++ b/polyval/src/lib.rs
@@ -1 +1,103 @@
-//! POLYVAL
+//! **POLYVAL** is a GHASH-like universal hash over GF(2^128) useful for
+//! implementing [AES-GCM-SIV] or [AES-GCM/GMAC].
+//!
+//! From [RFC 8452 Section 3] which defines POLYVAL for use in AES-GCM_SIV:
+//!
+//! > "POLYVAL, like GHASH (the authenticator in AES-GCM; ...), operates in a
+//! > binary field of size 2^128.  The field is defined by the irreducible
+//! > polynomial x^128 + x^127 + x^126 + x^121 + 1."
+//!
+//! By multiplying (in the finite field sense) a sequence of 128-bit blocks of
+//! input data data by a field element `H`, POLYVAL can be used to authenticate
+//! the message sequence as powers (in a finite field sense) of `H`.
+//!
+//! ## Relationship to GHASH
+//!
+//! POLYVAL can be thought of as the little endian equivalent of GHASH, which
+//! affords it a small performance advantage over GHASH when used on little
+//! endian architectures.
+//!
+//! It has also been designed so it can also be used to compute GHASH and with
+//! it GMAC, the Message Authentication Code (MAC) used by AES-GCM.
+//!
+//! From [RFC 8452 Appendix A]:
+//!
+//! > "GHASH and POLYVAL both operate in GF(2^128), although with different
+//! > irreducible polynomials: POLYVAL works modulo x^128 + x^127 + x^126 +
+//! > x^121 + 1 and GHASH works modulo x^128 + x^7 + x^2 + x + 1.  Note
+//! > that these irreducible polynomials are the 'reverse' of each other."
+//!
+//! [AES-GCM-SIV]: https://en.wikipedia.org/wiki/AES-GCM-SIV
+//! [AES-GCM/GMAC]: https://en.wikipedia.org/wiki/Galois/Counter_Mode
+//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
+//! [RFC 8452 Appendix A]: https://tools.ietf.org/html/rfc8452#appendix-A
+
+#![no_std]
+#![doc(html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo_small.png")]
+#![deny(missing_docs)]
+
+extern crate byteorder;
+#[cfg(feature = "zeroize")]
+extern crate zeroize;
+
+pub mod field;
+
+use self::field::FieldElement;
+#[cfg(feature = "zeroize")]
+use zeroize::Zeroize;
+
+// TODO(tarcieri): selectable backends
+use self::field::backend::soft::U64x2;
+
+/// Size of the GF(2^128) field modulus in bytes (16-bytes).
+pub const FIELD_SIZE: usize = 16;
+
+/// **POLYVAL**: GHASH-like universal hash over GF(2^128).
+#[repr(align(16))]
+#[derive(Clone)]
+#[allow(non_snake_case)]
+pub struct Polyval {
+    /// GF(2^128) field element input blocks are multiplied by
+    H: FieldElement<U64x2>,
+
+    /// Field element representing the computed universal hash
+    S: FieldElement<U64x2>,
+}
+
+impl Polyval {
+    /// Initialize POLYVAL with the given `H` field element
+    pub fn new(h: [u8; FIELD_SIZE]) -> Self {
+        Self {
+            H: FieldElement::from_bytes(h),
+            S: [0u64; 2].into(),
+        }
+    }
+
+    /// Input a field element `X` to be authenticated into POLYVAL.
+    pub fn input(&mut self, x: [u8; FIELD_SIZE]) {
+        // "The sum of any two elements in the field is the result of XORing them."
+        // -- RFC 8452 Section 3
+        let sum = self.S ^ FieldElement::from_bytes(x);
+        self.S = sum * self.H;
+    }
+
+    /// Process input blocks in a chained manner
+    pub fn chain(mut self, x: [u8; FIELD_SIZE]) -> Self {
+        self.input(x);
+        self
+    }
+
+    /// Get POLYVAL result (i.e. computed `S` field element)
+    pub fn result(self) -> [u8; FIELD_SIZE] {
+        self.S.to_bytes()
+    }
+}
+
+#[cfg(feature = "zeroize")]
+impl Drop for Polyval {
+    fn drop(&mut self) {
+        self.H.zeroize();
+        self.S.zeroize();
+        self.buffer.zeroize();
+    }
+}
diff --git a/polyval/tests/lib.rs b/polyval/tests/lib.rs
new file mode 100644
index 0000000..34254b9
--- /dev/null
+++ b/polyval/tests/lib.rs
@@ -0,0 +1,23 @@
+#[macro_use]
+extern crate hex_literal;
+extern crate polyval;
+
+use polyval::{Polyval, FIELD_SIZE};
+
+//
+// Test vectors or POLYVAL from RFC 8452 Appendix A
+// <https://tools.ietf.org/html/rfc8452#appendix-A>
+//
+
+const H: [u8; FIELD_SIZE] = hex!("25629347589242761d31f826ba4b757b");
+const X_1: [u8; FIELD_SIZE] = hex!("4f4f95668c83dfb6401762bb2d01a262");
+const X_2: [u8; FIELD_SIZE] = hex!("d1a24ddd2721d006bbe45f20d3c9f362");
+
+/// POLYVAL(H, X_1, X_2)
+const POLYVAL_RESULT: [u8; FIELD_SIZE] = hex!("f7a3b47b846119fae5b7866cf5e5b77e");
+
+#[test]
+fn rfc_8452_test_vector() {
+    let result = Polyval::new(H).chain(X_1).chain(X_2).result();
+    assert_eq!(&result, &POLYVAL_RESULT);
+}