diff --git a/core/crypto/_aes/ct64/ct64.odin b/core/crypto/_aes/ct64/ct64.odin index b2d5b72bc64..af2b42c1eff 100644 --- a/core/crypto/_aes/ct64/ct64.odin +++ b/core/crypto/_aes/ct64/ct64.odin @@ -210,11 +210,8 @@ orthogonalize :: proc "contextless" (q: ^[8]u64) { } @(require_results) -interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check { - if len(w) < 4 { - panic_contextless("aes/ct64: invalid input size") - } - x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3]) +interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check { + x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3) x0 |= (x0 << 16) x1 |= (x1 << 16) x2 |= (x2 << 16) diff --git a/core/crypto/_aes/ct64/ct64_keysched.odin b/core/crypto/_aes/ct64/ct64_keysched.odin index 0cb01aa08e3..591bf53e6c6 100644 --- a/core/crypto/_aes/ct64/ct64_keysched.odin +++ b/core/crypto/_aes/ct64/ct64_keysched.odin @@ -77,7 +77,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int { q: [8]u64 = --- for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 { - q[0], q[4] = interleave_in(skey[i:]) + q[0], q[4] = interleave_in(skey[i], skey[i+1], skey[i+2], skey[i+3]) q[1] = q[0] q[2] = q[0] q[3] = q[0] @@ -122,57 +122,3 @@ skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) { skey[v + 3] = (x3 << 4) - x3 } } - -orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) { - if len(qq) < 8 || len(key) != 16 { - panic_contextless("aes/ct64: invalid round key size") - } - - skey: [4]u32 = --- - skey[0] = endian.unchecked_get_u32le(key[0:]) - skey[1] = endian.unchecked_get_u32le(key[4:]) - skey[2] = endian.unchecked_get_u32le(key[8:]) - skey[3] = endian.unchecked_get_u32le(key[12:]) - - q: [8]u64 = --- - q[0], q[4] = interleave_in(skey[:]) - q[1] = q[0] - q[2] = q[0] - q[3] = q[0] - q[5] = q[4] - q[6] = q[4] - q[7] = q[4] - orthogonalize(&q) - - comp_skey: [2]u64 = --- - comp_skey[0] = - (q[0] & 0x1111111111111111) | - (q[1] & 0x2222222222222222) | - (q[2] & 0x4444444444444444) | - (q[3] & 0x8888888888888888) - comp_skey[1] = - (q[4] & 0x1111111111111111) | - (q[5] & 0x2222222222222222) | - (q[6] & 0x4444444444444444) | - (q[7] & 0x8888888888888888) - - for x, u in comp_skey { - x0 := x - x1, x2, x3 := x0, x0, x0 - x0 &= 0x1111111111111111 - x1 &= 0x2222222222222222 - x2 &= 0x4444444444444444 - x3 &= 0x8888888888888888 - x1 >>= 1 - x2 >>= 2 - x3 >>= 3 - qq[u * 4 + 0] = (x0 << 4) - x0 - qq[u * 4 + 1] = (x1 << 4) - x1 - qq[u * 4 + 2] = (x2 << 4) - x2 - qq[u * 4 + 3] = (x3 << 4) - x3 - } - - mem.zero_explicit(&skey, size_of(skey)) - mem.zero_explicit(&q, size_of(q)) - mem.zero_explicit(&comp_skey, size_of(comp_skey)) -} diff --git a/core/crypto/_aes/ct64/helpers.odin b/core/crypto/_aes/ct64/helpers.odin index 0ca9c3f4e3d..b26817cb31a 100644 --- a/core/crypto/_aes/ct64/helpers.odin +++ b/core/crypto/_aes/ct64/helpers.odin @@ -3,17 +3,39 @@ package aes_ct64 import "core:crypto/_aes" import "core:encoding/endian" +@(require_results) +load_interleaved :: proc "contextless" (src: []byte) -> (u64, u64) { + w0 := endian.unchecked_get_u32le(src[0:]) + w1 := endian.unchecked_get_u32le(src[4:]) + w2 := endian.unchecked_get_u32le(src[8:]) + w3 := endian.unchecked_get_u32le(src[12:]) + return interleave_in(w0, w1, w2, w3) +} + +store_interleaved :: proc "contextless" (dst: []byte, a0, a1: u64) { + w0, w1, w2, w3 := interleave_out(a0, a1) + endian.unchecked_put_u32le(dst[0:], w0) + endian.unchecked_put_u32le(dst[4:], w1) + endian.unchecked_put_u32le(dst[8:], w2) + endian.unchecked_put_u32le(dst[12:], w3) +} + +@(require_results) +xor_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) { + return a0 ~ b0, a1 ~ b1 +} + +@(require_results) +and_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) { + return a0 & b0, a1 & b1 +} + load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) { if len(src) != _aes.BLOCK_SIZE { panic_contextless("aes/ct64: invalid block size") } - w: [4]u32 = --- - w[0] = endian.unchecked_get_u32le(src[0:]) - w[1] = endian.unchecked_get_u32le(src[4:]) - w[2] = endian.unchecked_get_u32le(src[8:]) - w[3] = endian.unchecked_get_u32le(src[12:]) - q[0], q[4] = interleave_in(w[:]) + q[0], q[4] = #force_inline load_interleaved(src) orthogonalize(q) } @@ -23,11 +45,7 @@ store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) { } orthogonalize(q) - w0, w1, w2, w3 := interleave_out(q[0], q[4]) - endian.unchecked_put_u32le(dst[0:], w0) - endian.unchecked_put_u32le(dst[4:], w1) - endian.unchecked_put_u32le(dst[8:], w2) - endian.unchecked_put_u32le(dst[12:], w3) + #force_inline store_interleaved(dst, q[0], q[4]) } load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) { @@ -35,17 +53,11 @@ load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) { panic_contextless("aes/ct64: invalid block(s) size") } - w: [4]u32 = --- for s, i in src { if len(s) != _aes.BLOCK_SIZE { panic_contextless("aes/ct64: invalid block size") } - - w[0] = endian.unchecked_get_u32le(s[0:]) - w[1] = endian.unchecked_get_u32le(s[4:]) - w[2] = endian.unchecked_get_u32le(s[8:]) - w[3] = endian.unchecked_get_u32le(s[12:]) - q[i], q[i + 4] = interleave_in(w[:]) + q[i], q[i + 4] = #force_inline load_interleaved(s) } orthogonalize(q) } @@ -64,11 +76,6 @@ store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) { if len(d) != _aes.BLOCK_SIZE { panic_contextless("aes/ct64: invalid block size") } - - w0, w1, w2, w3 := interleave_out(q[i], q[i + 4]) - endian.unchecked_put_u32le(d[0:], w0) - endian.unchecked_put_u32le(d[4:], w1) - endian.unchecked_put_u32le(d[8:], w2) - endian.unchecked_put_u32le(d[12:], w3) + #force_inline store_interleaved(d, q[i], q[i + 4]) } } diff --git a/core/crypto/aead/low_level.odin b/core/crypto/aead/low_level.odin index 38a0c84ba9c..a7ecef801d4 100644 --- a/core/crypto/aead/low_level.odin +++ b/core/crypto/aead/low_level.odin @@ -1,5 +1,6 @@ package aead +import "core:crypto/aegis" import "core:crypto/aes" import "core:crypto/chacha20" import "core:crypto/chacha20poly1305" @@ -15,7 +16,7 @@ Implementation :: union { // MAX_TAG_SIZE is the maximum size tag that can be returned by any of the // Algorithms supported via this package. -MAX_TAG_SIZE :: 16 +MAX_TAG_SIZE :: 32 // Algorithm is the algorithm identifier associated with a given Context. Algorithm :: enum { @@ -25,9 +26,13 @@ Algorithm :: enum { AES_GCM_256, CHACHA20POLY1305, XCHACHA20POLY1305, + AEGIS_128L, + AEGIS_128L_256, // AEGIS-128L (256-bit tag) + AEGIS_256, + AEGIS_256_256, // AEGIS-256 (256-bit tag) } -// ALGORITM_NAMES is the Agorithm to algorithm name string. +// ALGORITM_NAMES is the Algorithm to algorithm name string. ALGORITHM_NAMES := [Algorithm]string { .Invalid = "Invalid", .AES_GCM_128 = "AES-GCM-128", @@ -35,6 +40,10 @@ ALGORITHM_NAMES := [Algorithm]string { .AES_GCM_256 = "AES-GCM-256", .CHACHA20POLY1305 = "chacha20poly1305", .XCHACHA20POLY1305 = "xchacha20poly1305", + .AEGIS_128L = "AEGIS-128L", + .AEGIS_128L_256 = "AEGIS-128L-256", + .AEGIS_256 = "AEGIS-256", + .AEGIS_256_256 = "AEGIS-256-256", } // TAG_SIZES is the Algorithm to tag size in bytes. @@ -45,6 +54,10 @@ TAG_SIZES := [Algorithm]int { .AES_GCM_256 = aes.GCM_TAG_SIZE, .CHACHA20POLY1305 = chacha20poly1305.TAG_SIZE, .XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE, + .AEGIS_128L = aegis.TAG_SIZE_128, + .AEGIS_128L_256 = aegis.TAG_SIZE_256, + .AEGIS_256 = aegis.TAG_SIZE_128, + .AEGIS_256_256 = aegis.TAG_SIZE_256, } // KEY_SIZES is the Algorithm to key size in bytes. @@ -55,6 +68,10 @@ KEY_SIZES := [Algorithm]int { .AES_GCM_256 = aes.KEY_SIZE_256, .CHACHA20POLY1305 = chacha20poly1305.KEY_SIZE, .XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE, + .AEGIS_128L = aegis.KEY_SIZE_128L, + .AEGIS_128L_256 = aegis.KEY_SIZE_128L, + .AEGIS_256 = aegis.KEY_SIZE_256, + .AEGIS_256_256 = aegis.KEY_SIZE_256, } // IV_SIZES is the Algorithm to initialization vector size in bytes. @@ -67,6 +84,10 @@ IV_SIZES := [Algorithm]int { .AES_GCM_256 = aes.GCM_IV_SIZE, .CHACHA20POLY1305 = chacha20poly1305.IV_SIZE, .XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE, + .AEGIS_128L = aegis.IV_SIZE_128L, + .AEGIS_128L_256 = aegis.IV_SIZE_128L, + .AEGIS_256 = aegis.IV_SIZE_256, + .AEGIS_256_256 = aegis.IV_SIZE_256, } // Context is a concrete instantiation of a specific AEAD algorithm. @@ -75,6 +96,7 @@ Context :: struct { _impl: union { aes.Context_GCM, chacha20poly1305.Context, + aegis.Context, }, } @@ -86,6 +108,10 @@ _IMPL_IDS := [Algorithm]typeid { .AES_GCM_256 = typeid_of(aes.Context_GCM), .CHACHA20POLY1305 = typeid_of(chacha20poly1305.Context), .XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context), + .AEGIS_128L = typeid_of(aegis.Context), + .AEGIS_128L_256 = typeid_of(aegis.Context), + .AEGIS_256 = typeid_of(aegis.Context), + .AEGIS_256_256 = typeid_of(aegis.Context), } // init initializes a Context with a specific AEAD Algorithm. @@ -113,6 +139,9 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat case .XCHACHA20POLY1305: impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_) + case .AEGIS_128L, .AEGIS_128L_256, .AEGIS_256, .AEGIS_256_256: + impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION + aegis.init(&ctx._impl.(aegis.Context), key, impl_) case .Invalid: panic("crypto/aead: uninitialized algorithm") case: @@ -127,11 +156,17 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat // // dst and plaintext MUST alias exactly or not at all. seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { + if len(tag) != TAG_SIZES[ctx._algo] { + panic("crypto/aead: invalid tag size") + } + switch &impl in ctx._impl { case aes.Context_GCM: aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext) case chacha20poly1305.Context: chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext) + case aegis.Context: + aegis.seal(&impl, dst, tag, iv, aad, plaintext) case: panic("crypto/aead: uninitialized algorithm") } @@ -145,11 +180,17 @@ seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { // dst and plaintext MUST alias exactly or not at all. @(require_results) open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + if len(tag) != TAG_SIZES[ctx._algo] { + panic("crypto/aead: invalid tag size") + } + switch &impl in ctx._impl { case aes.Context_GCM: return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag) case chacha20poly1305.Context: return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag) + case aegis.Context: + return aegis.open(&impl, dst, iv, aad, ciphertext, tag) case: panic("crypto/aead: uninitialized algorithm") } @@ -163,6 +204,8 @@ reset :: proc(ctx: ^Context) { aes.reset_gcm(&impl) case chacha20poly1305.Context: chacha20poly1305.reset(&impl) + case aegis.Context: + aegis.reset(&impl) case: // Calling reset repeatedly is fine. } diff --git a/core/crypto/aegis/aegis.odin b/core/crypto/aegis/aegis.odin new file mode 100644 index 00000000000..96872aad1e7 --- /dev/null +++ b/core/crypto/aegis/aegis.odin @@ -0,0 +1,222 @@ +/* +package aegis implements the AEGIS-128L and AEGIS-256 Authenticated +Encryption with Additional Data algorithms. + +See: +- [[ https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-12.txt ]] +*/ +package aegis + +import "core:bytes" +import "core:crypto" +import "core:crypto/aes" +import "core:mem" + +// KEY_SIZE_128L is the AEGIS-128L key size in bytes. +KEY_SIZE_128L :: 16 +// KEY_SIZE_256 is the AEGIS-256 key size in bytes. +KEY_SIZE_256 :: 32 +// IV_SIZE_128L is the AEGIS-128L IV size in bytes. +IV_SIZE_128L :: 16 +// IV_SIZE_256 is the AEGIS-256 IV size in bytes. +IV_SIZE_256 :: 32 +// TAG_SIZE_128 is the AEGIS-128L or AEGIS-256 128-bit tag size in bytes. +TAG_SIZE_128 :: 16 +// TAG_SIZE_256 is the AEGIS-128L or AEGIS-256 256-bit tag size in bytes. +TAG_SIZE_256 :: 32 + +@(private) +_RATE_128L :: 32 +@(private) +_RATE_256 :: 16 +@(private) +_RATE_MAX :: _RATE_128L + +@(private, rodata) +_C0 := [16]byte{ + 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, + 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62, +} + +@(private, rodata) +_C1 := [16]byte { + 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, + 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd, +} + +// Context is a keyed AEGIS-128L or AEGIS-256 instance. +Context :: struct { + _key: [KEY_SIZE_256]byte, + _key_len: int, + _impl: aes.Implementation, + _is_initialized: bool, +} + +@(private) +_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) { + switch len(tag) { + case TAG_SIZE_128, TAG_SIZE_256: + case: + panic("crypto/aegis: invalid tag size") + } + + iv_ok: bool + switch ctx._key_len { + case KEY_SIZE_128L: + iv_ok = len(iv) == IV_SIZE_128L + case KEY_SIZE_256: + iv_ok = len(iv) == IV_SIZE_256 + } + if !iv_ok { + panic("crypto/aegis: invalid IV size") + } + + #assert(size_of(int) == 8 || size_of(int) <= 4) + // As A_MAX and P_MAX are both defined to be 2^61 - 1 bytes, and + // the maximum length of a slice is bound by `size_of(int)`, where + // `int` is register sized, there is no need to check AAD/text + // lengths. +} + +// init initializes a Context with the provided key, for AEGIS-128L or AEGIS-256. +init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) { + switch len(key) { + case KEY_SIZE_128L, KEY_SIZE_256: + case: + panic("crypto/aegis: invalid key size") + } + + copy(ctx._key[:], key) + ctx._key_len = len(key) + ctx._impl = impl + if ctx._impl == .Hardware && !is_hardware_accelerated() { + ctx._impl = .Portable + } + ctx._is_initialized = true +} + +// seal encrypts the plaintext and authenticates the aad and ciphertext, +// with the provided Context and iv, stores the output in dst and tag. +// +// dst and plaintext MUST alias exactly or not at all. +seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { + assert(ctx._is_initialized) + + _validate_common_slice_sizes(ctx, tag, iv, aad, plaintext) + if len(dst) != len(plaintext) { + panic("crypto/aegis: invalid destination ciphertext size") + } + if bytes.alias_inexactly(dst, plaintext) { + panic("crypto/aegis: dst and plaintext alias inexactly") + } + + switch ctx._impl { + case .Hardware: + st: State_HW + defer reset_state_hw(&st) + + init_hw(ctx, &st, iv) + + aad_len, pt_len := len(aad), len(plaintext) + if aad_len > 0 { + absorb_hw(&st, aad) + } + + if pt_len > 0 { + enc_hw(&st, dst, plaintext) + } + + finalize_hw(&st, tag, aad_len, pt_len) + case .Portable: + st: State_SW + defer reset_state_sw(&st) + + init_sw(ctx, &st, iv) + + aad_len, pt_len := len(aad), len(plaintext) + if aad_len > 0 { + absorb_sw(&st, aad) + } + + if pt_len > 0 { + enc_sw(&st, dst, plaintext) + } + + finalize_sw(&st, tag, aad_len, pt_len) + case: + panic("core/crypto/aegis: not implemented") + } +} + +// open authenticates the aad and ciphertext, and decrypts the ciphertext, +// with the provided Context, iv, and tag, and stores the output in dst, +// returning true iff the authentication was successful. If authentication +// fails, the destination buffer will be zeroed. +// +// dst and plaintext MUST alias exactly or not at all. +@(require_results) +open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + assert(ctx._is_initialized) + + _validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext) + if len(dst) != len(ciphertext) { + panic("crypto/aegis: invalid destination plaintext size") + } + if bytes.alias_inexactly(dst, ciphertext) { + panic("crypto/aegis: dst and ciphertext alias inexactly") + } + + tmp: [TAG_SIZE_256]byte + derived_tag := tmp[:len(tag)] + aad_len, ct_len := len(aad), len(ciphertext) + + switch ctx._impl { + case .Hardware: + st: State_HW + defer reset_state_hw(&st) + + init_hw(ctx, &st, iv) + + if aad_len > 0 { + absorb_hw(&st, aad) + } + + if ct_len > 0 { + dec_hw(&st, dst, ciphertext) + } + + finalize_hw(&st, derived_tag, aad_len, ct_len) + case .Portable: + st: State_SW + defer reset_state_sw(&st) + + init_sw(ctx, &st, iv) + + if aad_len > 0 { + absorb_sw(&st, aad) + } + + if ct_len > 0 { + dec_sw(&st, dst, ciphertext) + } + + finalize_sw(&st, derived_tag, aad_len, ct_len) + case: + panic("core/crypto/aegis: not implemented") + } + + if crypto.compare_constant_time(tag, derived_tag) != 1 { + mem.zero_explicit(raw_data(dst), ct_len) + return false + } + + return true +} + +// reset sanitizes the Context. The Context must be +// re-initialized to be used again. +reset :: proc "contextless" (ctx: ^Context) { + mem.zero_explicit(&ctx._key, len(ctx._key)) + ctx._key_len = 0 + ctx._is_initialized = false +} diff --git a/core/crypto/aegis/aegis_impl_ct64.odin b/core/crypto/aegis/aegis_impl_ct64.odin new file mode 100644 index 00000000000..596f1622655 --- /dev/null +++ b/core/crypto/aegis/aegis_impl_ct64.odin @@ -0,0 +1,446 @@ +package aegis + +import aes "core:crypto/_aes/ct64" +import "core:encoding/endian" +import "core:mem" + +// This uses the bitlsiced 64-bit general purpose register SWAR AES +// round function. The intermediate state is stored in interleaved +// but NOT orthogonalized form, as leaving things in the orthgonalized +// format would overly complicate the update implementation. +// +// The update function leverages the paralleism (4xblocks) at once. + +@(private) +State_SW :: struct { + s0_0, s0_1: u64, + s1_0, s1_1: u64, + s2_0, s2_1: u64, + s3_0, s3_1: u64, + s4_0, s4_1: u64, + s5_0, s5_1: u64, + s6_0, s6_1: u64, + s7_0, s7_1: u64, + q_k, q_b: [8]u64, + rate: int, +} + +@(private) +init_sw :: proc "contextless" (ctx: ^Context, st: ^State_SW, iv: []byte) { + switch ctx._key_len { + case KEY_SIZE_128L: + key_0, key_1 := aes.load_interleaved(ctx._key[:16]) + iv_0, iv_1 := aes.load_interleaved(iv) + + st.s0_0, st.s0_1 = aes.xor_interleaved(key_0, key_1, iv_0, iv_1) + st.s1_0, st.s1_1 = aes.load_interleaved(_C1[:]) + st.s2_0, st.s2_1 = aes.load_interleaved(_C0[:]) + st.s3_0, st.s3_1 = st.s1_0, st.s1_1 + st.s4_0, st.s4_1 = st.s0_0, st.s0_1 + st.s5_0, st.s5_1 = aes.xor_interleaved(key_0, key_1, st.s2_0, st.s2_1) + st.s6_0, st.s6_1 = aes.xor_interleaved(key_0, key_1, st.s1_0, st.s1_1) + st.s7_0, st.s7_1 = st.s5_0, st.s5_1 + st.rate = _RATE_128L + + for _ in 0 ..< 10 { + update_sw_128l(st, iv_0, iv_1, key_0, key_1) + } + case KEY_SIZE_256: + k0_0, k0_1 := aes.load_interleaved(ctx._key[:16]) + k1_0, k1_1 := aes.load_interleaved(ctx._key[16:]) + n0_0, n0_1 := aes.load_interleaved(iv[:16]) + n1_0, n1_1 := aes.load_interleaved(iv[16:]) + + st.s0_0, st.s0_1 = aes.xor_interleaved(k0_0, k0_1, n0_0, n0_1) + st.s1_0, st.s1_1 = aes.xor_interleaved(k1_0, k1_1, n1_0, n1_1) + st.s2_0, st.s2_1 = aes.load_interleaved(_C1[:]) + st.s3_0, st.s3_1 = aes.load_interleaved(_C0[:]) + st.s4_0, st.s4_1 = aes.xor_interleaved(k0_0, k0_1, st.s3_0, st.s3_1) + st.s5_0, st.s5_1 = aes.xor_interleaved(k1_0, k1_1, st.s2_0, st.s2_1) + st.rate = _RATE_256 + + u0_0, u0_1, u1_0, u1_1 := st.s0_0, st.s0_1, st.s1_0, st.s1_1 + for _ in 0 ..< 4 { + update_sw_256(st, k0_0, k0_1) + update_sw_256(st, k1_0, k1_1) + update_sw_256(st, u0_0, u0_1) + update_sw_256(st, u1_0, u1_1) + } + } +} + +@(private = "file") +update_sw_128l :: proc "contextless" (st: ^State_SW, m0_0, m0_1, m1_0, m1_1: u64) { + st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m0_0, m0_1) + st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1 + st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1 + st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1 + aes.orthogonalize(&st.q_k) + + st.q_b[0], st.q_b[4] = st.s7_0, st.s7_1 + st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1 + st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1 + st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1 + aes.orthogonalize(&st.q_b) + + aes.sub_bytes(&st.q_b) + aes.shift_rows(&st.q_b) + aes.mix_columns(&st.q_b) + aes.add_round_key(&st.q_b, st.q_k[:]) + aes.orthogonalize(&st.q_b) + + st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4] + st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5] + st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6] + s3_0, s3_1 := st.q_b[3], st.q_b[7] + + st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s4_0, st.s4_1, m1_0, m1_1) + st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1 + st.q_k[2], st.q_k[6] = st.s6_0, st.s6_1 + st.q_k[3], st.q_k[7] = st.s7_0, st.s7_1 + aes.orthogonalize(&st.q_k) + + st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1 + st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1 + st.q_b[2], st.q_b[6] = st.s5_0, st.s5_1 + st.q_b[3], st.q_b[7] = st.s6_0, st.s6_1 + aes.orthogonalize(&st.q_b) + + aes.sub_bytes(&st.q_b) + aes.shift_rows(&st.q_b) + aes.mix_columns(&st.q_b) + aes.add_round_key(&st.q_b, st.q_k[:]) + aes.orthogonalize(&st.q_b) + + st.s3_0, st.s3_1 = s3_0, s3_1 + st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4] + st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5] + st.s6_0, st.s6_1 = st.q_b[2], st.q_b[6] + st.s7_0, st.s7_1 = st.q_b[3], st.q_b[7] +} + +@(private = "file") +update_sw_256 :: proc "contextless" (st: ^State_SW, m_0, m_1: u64) { + st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m_0, m_1) + st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1 + st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1 + st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1 + aes.orthogonalize(&st.q_k) + + st.q_b[0], st.q_b[4] = st.s5_0, st.s5_1 + st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1 + st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1 + st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1 + aes.orthogonalize(&st.q_b) + + aes.sub_bytes(&st.q_b) + aes.shift_rows(&st.q_b) + aes.mix_columns(&st.q_b) + aes.add_round_key(&st.q_b, st.q_k[:]) + aes.orthogonalize(&st.q_b) + + st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4] + st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5] + st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6] + s3_0, s3_1 := st.q_b[3], st.q_b[7] + + st.q_k[0], st.q_k[4] = st.s4_0, st.s4_1 + st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1 + aes.orthogonalize(&st.q_k) + + st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1 + st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1 + aes.orthogonalize(&st.q_b) + + aes.sub_bytes(&st.q_b) + aes.shift_rows(&st.q_b) + aes.mix_columns(&st.q_b) + aes.add_round_key(&st.q_b, st.q_k[:]) + aes.orthogonalize(&st.q_b) + + st.s3_0, st.s3_1 = s3_0, s3_1 + st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4] + st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5] +} + +@(private = "file") +absorb_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) #no_bounds_check { + t0_0, t0_1 := aes.load_interleaved(ai[:16]) + t1_0, t1_1 := aes.load_interleaved(ai[16:]) + update_sw_128l(st, t0_0, t0_1, t1_0, t1_1) +} + +@(private = "file") +absorb_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) { + m_0, m_1 := aes.load_interleaved(ai) + update_sw_256(st, m_0, m_1) +} + +@(private) +absorb_sw :: proc "contextless" (st: ^State_SW, aad: []byte) #no_bounds_check { + ai, l := aad, len(aad) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + absorb_sw_128l(st, ai) + ai = ai[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + absorb_sw_256(st, ai) + + ai = ai[_RATE_256:] + l -= _RATE_256 + } + } + + // Pad out the remainder with `0`s till it is rate sized. + if l > 0 { + tmp: [_RATE_MAX]byte // AAD is not confidential. + copy(tmp[:], ai) + switch st.rate { + case _RATE_128L: + absorb_sw_128l(st, tmp[:]) + case _RATE_256: + absorb_sw_256(st, tmp[:]) + } + } +} + +@(private = "file", require_results) +z_sw_128l :: proc "contextless" (st: ^State_SW) -> (u64, u64, u64, u64) { + z0_0, z0_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1) + z0_0, z0_1 = aes.xor_interleaved(st.s1_0, st.s1_1, z0_0, z0_1) + z0_0, z0_1 = aes.xor_interleaved(st.s6_0, st.s6_1, z0_0, z0_1) + + z1_0, z1_1 := aes.and_interleaved(st.s6_0, st.s6_1, st.s7_0, st.s7_1) + z1_0, z1_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z1_0, z1_1) + z1_0, z1_1 = aes.xor_interleaved(st.s2_0, st.s2_1, z1_0, z1_1) + + return z0_0, z0_1, z1_0, z1_1 +} + +@(private = "file", require_results) +z_sw_256 :: proc "contextless" (st: ^State_SW) -> (u64, u64) { + z_0, z_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1) + z_0, z_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z_0, z_1) + z_0, z_1 = aes.xor_interleaved(st.s4_0, st.s4_1, z_0, z_1) + return aes.xor_interleaved(st.s1_0, st.s1_1, z_0, z_1) +} + +@(private = "file") +enc_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check { + z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st) + + t0_0, t0_1 := aes.load_interleaved(xi[:16]) + t1_0, t1_1 := aes.load_interleaved(xi[16:]) + update_sw_128l(st, t0_0, t0_1, t1_0, t1_1) + + out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1) + out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1) + aes.store_interleaved(ci[:16], out0_0, out0_1) + aes.store_interleaved(ci[16:], out1_0, out1_1) +} + +@(private = "file") +enc_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check { + z_0, z_1 := z_sw_256(st) + + xi_0, xi_1 := aes.load_interleaved(xi) + update_sw_256(st, xi_0, xi_1) + + ci_0, ci_1 := aes.xor_interleaved(xi_0, xi_1, z_0, z_1) + aes.store_interleaved(ci, ci_0, ci_1) +} + +@(private) +enc_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check { + ci, xi, l := dst, src, len(src) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + enc_sw_128l(st, ci, xi) + ci = ci[_RATE_128L:] + xi = xi[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + enc_sw_256(st, ci, xi) + ci = ci[_RATE_256:] + xi = xi[_RATE_256:] + l -= _RATE_256 + } + } + + // Pad out the remainder with `0`s till it is rate sized. + if l > 0 { + tmp: [_RATE_MAX]byte // Ciphertext is not confidential. + copy(tmp[:], xi) + switch st.rate { + case _RATE_128L: + enc_sw_128l(st, tmp[:], tmp[:]) + case _RATE_256: + enc_sw_256(st, tmp[:], tmp[:]) + } + copy(ci, tmp[:l]) + } +} + +@(private = "file") +dec_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check { + z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st) + + t0_0, t0_1 := aes.load_interleaved(ci[:16]) + t1_0, t1_1 := aes.load_interleaved(ci[16:]) + out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1) + out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1) + + update_sw_128l(st, out0_0, out0_1, out1_0, out1_1) + aes.store_interleaved(xi[:16], out0_0, out0_1) + aes.store_interleaved(xi[16:], out1_0, out1_1) +} + +@(private = "file") +dec_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check { + z_0, z_1 := z_sw_256(st) + + ci_0, ci_1 := aes.load_interleaved(ci) + xi_0, xi_1 := aes.xor_interleaved(ci_0, ci_1, z_0, z_1) + + update_sw_256(st, xi_0, xi_1) + aes.store_interleaved(xi, xi_0, xi_1) +} + +@(private = "file") +dec_partial_sw_128l :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check { + tmp: [_RATE_128L]byte + defer mem.zero_explicit(&tmp, size_of(tmp)) + + z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st) + copy(tmp[:], cn) + + t0_0, t0_1 := aes.load_interleaved(tmp[:16]) + t1_0, t1_1 := aes.load_interleaved(tmp[16:]) + out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1) + out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1) + + aes.store_interleaved(tmp[:16], out0_0, out0_1) + aes.store_interleaved(tmp[16:], out1_0, out1_1) + copy(xn, tmp[:]) + + for off := len(xn); off < _RATE_128L; off += 1 { + tmp[off] = 0 + } + out0_0, out0_1 = aes.load_interleaved(tmp[:16]) + out1_0, out1_1 = aes.load_interleaved(tmp[16:]) + update_sw_128l(st, out0_0, out0_1, out1_0, out1_1) +} + +@(private = "file") +dec_partial_sw_256 :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check { + tmp: [_RATE_256]byte + defer mem.zero_explicit(&tmp, size_of(tmp)) + + z_0, z_1 := z_sw_256(st) + copy(tmp[:], cn) + + cn_0, cn_1 := aes.load_interleaved(tmp[:]) + xn_0, xn_1 := aes.xor_interleaved(cn_0, cn_1, z_0, z_1) + + aes.store_interleaved(tmp[:], xn_0, xn_1) + copy(xn, tmp[:]) + + for off := len(xn); off < _RATE_256; off += 1 { + tmp[off] = 0 + } + xn_0, xn_1 = aes.load_interleaved(tmp[:]) + update_sw_256(st, xn_0, xn_1) +} + +@(private) +dec_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check { + xi, ci, l := dst, src, len(src) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + dec_sw_128l(st, xi, ci) + xi = xi[_RATE_128L:] + ci = ci[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + dec_sw_256(st, xi, ci) + xi = xi[_RATE_256:] + ci = ci[_RATE_256:] + l -= _RATE_256 + } + } + + // Process the remainder. + if l > 0 { + switch st.rate { + case _RATE_128L: + dec_partial_sw_128l(st, xi, ci) + case _RATE_256: + dec_partial_sw_256(st, xi, ci) + } + } +} + +@(private) +finalize_sw :: proc "contextless" (st: ^State_SW, tag: []byte, ad_len, msg_len: int) { + tmp: [16]byte + endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8) + endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8) + + t_0, t_1 := aes.load_interleaved(tmp[:]) + + t0_0, t0_1, t1_0, t1_1: u64 = ---, ---, ---, --- + switch st.rate { + case _RATE_128L: + t_0, t_1 = aes.xor_interleaved(st.s2_0, st.s2_1, t_0, t_1) + for _ in 0 ..< 7 { + update_sw_128l(st, t_0, t_1, t_0, t_1) + } + + t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1) + t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1) + t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s3_0, st.s3_1) + + t1_0, t1_1 = aes.xor_interleaved(st.s4_0, st.s4_1, st.s5_0, st.s5_1) + t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s6_0, st.s6_1) + if len(tag) == TAG_SIZE_256 { + t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s7_0, st.s7_1) + } + case _RATE_256: + t_0, t_1 = aes.xor_interleaved(st.s3_0, st.s3_1, t_0, t_1) + for _ in 0 ..< 7 { + update_sw_256(st, t_0, t_1) + } + + t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1) + t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1) + + t1_0, t1_1 = aes.xor_interleaved(st.s3_0, st.s3_1, st.s4_0, st.s4_1) + t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s5_0, st.s5_1) + } + switch len(tag) { + case TAG_SIZE_128: + t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, t1_0, t1_1) + aes.store_interleaved(tag, t0_0, t0_1) + case TAG_SIZE_256: + aes.store_interleaved(tag[:16], t0_0, t0_1) + aes.store_interleaved(tag[16:], t1_0, t1_1) + } +} + +@(private) +reset_state_sw :: proc "contextless" (st: ^State_SW) { + mem.zero_explicit(st, size_of(st^)) +} diff --git a/core/crypto/aegis/aegis_impl_hw_gen.odin b/core/crypto/aegis/aegis_impl_hw_gen.odin new file mode 100644 index 00000000000..5ec2f3d6e5d --- /dev/null +++ b/core/crypto/aegis/aegis_impl_hw_gen.odin @@ -0,0 +1,44 @@ +#+build !amd64 +package aegis + +@(private = "file") +ERR_HW_NOT_SUPPORTED :: "crypto/aegis: hardware implementation unsupported" + +@(private) +State_HW :: struct {} + +// is_hardware_accelerated returns true iff hardware accelerated AEGIS +// is supported. +is_hardware_accelerated :: proc "contextless" () -> bool { + return false +} + +@(private) +init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} + +@(private) +reset_state_hw :: proc "contextless" (st: ^State_HW) { + panic_contextless(ERR_HW_NOT_SUPPORTED) +} diff --git a/core/crypto/aegis/aegis_impl_hw_intel.odin b/core/crypto/aegis/aegis_impl_hw_intel.odin new file mode 100644 index 00000000000..5334f325862 --- /dev/null +++ b/core/crypto/aegis/aegis_impl_hw_intel.odin @@ -0,0 +1,389 @@ +#+build amd64 +package aegis + +import "base:intrinsics" +import "core:crypto/aes" +import "core:encoding/endian" +import "core:mem" +import "core:simd/x86" + +@(private) +State_HW :: struct { + s0: x86.__m128i, + s1: x86.__m128i, + s2: x86.__m128i, + s3: x86.__m128i, + s4: x86.__m128i, + s5: x86.__m128i, + s6: x86.__m128i, + s7: x86.__m128i, + rate: int, +} + +// is_hardware_accelerated returns true iff hardware accelerated AEGIS +// is supported. +is_hardware_accelerated :: proc "contextless" () -> bool { + return aes.is_hardware_accelerated() +} + +@(private, enable_target_feature = "sse2,aes") +init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) { + switch ctx._key_len { + case KEY_SIZE_128L: + key := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0])) + iv := intrinsics.unaligned_load((^x86.__m128i)(raw_data(iv))) + + st.s0 = x86._mm_xor_si128(key, iv) + st.s1 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0])) + st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0])) + st.s3 = st.s1 + st.s4 = st.s0 + st.s5 = x86._mm_xor_si128(key, st.s2) // key ^ C0 + st.s6 = x86._mm_xor_si128(key, st.s1) // key ^ C1 + st.s7 = st.s5 + st.rate = _RATE_128L + + for _ in 0 ..< 10 { + update_hw_128l(st, iv, key) + } + case KEY_SIZE_256: + k0 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0])) + k1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[16])) + n0 := intrinsics.unaligned_load((^x86.__m128i)(&iv[0])) + n1 := intrinsics.unaligned_load((^x86.__m128i)(&iv[16])) + + st.s0 = x86._mm_xor_si128(k0, n0) + st.s1 = x86._mm_xor_si128(k1, n1) + st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0])) + st.s3 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0])) + st.s4 = x86._mm_xor_si128(k0, st.s3) // k0 ^ C0 + st.s5 = x86._mm_xor_si128(k1, st.s2) // k1 ^ C1 + st.rate = _RATE_256 + + u0, u1 := st.s0, st.s1 + for _ in 0 ..< 4 { + update_hw_256(st, k0) + update_hw_256(st, k1) + update_hw_256(st, u0) + update_hw_256(st, u1) + } + } +} + +@(private = "file", enable_target_feature = "sse2,aes") +update_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, m0, m1: x86.__m128i) { + s0_ := x86._mm_aesenc_si128(st.s7, x86._mm_xor_si128(st.s0, m0)) + s1_ := x86._mm_aesenc_si128(st.s0, st.s1) + s2_ := x86._mm_aesenc_si128(st.s1, st.s2) + s3_ := x86._mm_aesenc_si128(st.s2, st.s3) + s4_ := x86._mm_aesenc_si128(st.s3, x86._mm_xor_si128(st.s4, m1)) + s5_ := x86._mm_aesenc_si128(st.s4, st.s5) + s6_ := x86._mm_aesenc_si128(st.s5, st.s6) + s7_ := x86._mm_aesenc_si128(st.s6, st.s7) + st.s0, st.s1, st.s2, st.s3, st.s4, st.s5, st.s6, st.s7 = s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_ +} + +@(private = "file", enable_target_feature = "sse2,aes") +update_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, m: x86.__m128i) { + s0_ := x86._mm_aesenc_si128(st.s5, x86._mm_xor_si128(st.s0, m)) + s1_ := x86._mm_aesenc_si128(st.s0, st.s1) + s2_ := x86._mm_aesenc_si128(st.s1, st.s2) + s3_ := x86._mm_aesenc_si128(st.s2, st.s3) + s4_ := x86._mm_aesenc_si128(st.s3, st.s4) + s5_ := x86._mm_aesenc_si128(st.s4, st.s5) + st.s0, st.s1, st.s2, st.s3, st.s4, st.s5 = s0_, s1_, s2_, s3_, s4_, s5_ +} + +@(private = "file", enable_target_feature = "sse2,aes") +absorb_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) { + t0 := intrinsics.unaligned_load((^x86.__m128i)(&ai[0])) + t1 := intrinsics.unaligned_load((^x86.__m128i)(&ai[16])) + update_hw_128l(st, t0, t1) +} + +@(private = "file", enable_target_feature = "sse2,aes") +absorb_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) { + m := intrinsics.unaligned_load((^x86.__m128i)(&ai[0])) + update_hw_256(st, m) +} + +@(private, enable_target_feature = "sse2,aes") +absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) #no_bounds_check { + ai, l := aad, len(aad) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + absorb_hw_128l(st, ai) + ai = ai[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + absorb_hw_256(st, ai) + + ai = ai[_RATE_256:] + l -= _RATE_256 + } + } + + // Pad out the remainder with `0`s till it is rate sized. + if l > 0 { + tmp: [_RATE_MAX]byte // AAD is not confidential. + copy(tmp[:], ai) + switch st.rate { + case _RATE_128L: + absorb_hw_128l(st, tmp[:]) + case _RATE_256: + absorb_hw_256(st, tmp[:]) + } + } +} + +@(private = "file", enable_target_feature = "sse2", require_results) +z_hw_128l :: #force_inline proc "contextless" (st: ^State_HW) -> (x86.__m128i, x86.__m128i) { + z0 := x86._mm_xor_si128( + st.s6, + x86._mm_xor_si128( + st.s1, + x86._mm_and_si128(st.s2, st.s3), + ), + ) + z1 := x86._mm_xor_si128( + st.s2, + x86._mm_xor_si128( + st.s5, + x86._mm_and_si128(st.s6, st.s7), + ), + ) + return z0, z1 +} + +@(private = "file", enable_target_feature = "sse2", require_results) +z_hw_256 :: #force_inline proc "contextless" (st: ^State_HW) -> x86.__m128i { + return x86._mm_xor_si128( + st.s1, + x86._mm_xor_si128( + st.s4, + x86._mm_xor_si128( + st.s5, + x86._mm_and_si128(st.s2, st.s3), + ), + ), + ) +} + +@(private = "file", enable_target_feature = "sse2,aes") +enc_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check { + z0, z1 := z_hw_128l(st) + + t0 := intrinsics.unaligned_load((^x86.__m128i)(&xi[0])) + t1 := intrinsics.unaligned_load((^x86.__m128i)(&xi[16])) + update_hw_128l(st, t0, t1) + + out0 := x86._mm_xor_si128(t0, z0) + out1 := x86._mm_xor_si128(t1, z1) + intrinsics.unaligned_store((^x86.__m128i)(&ci[0]), out0) + intrinsics.unaligned_store((^x86.__m128i)(&ci[16]), out1) +} + +@(private = "file", enable_target_feature = "sse2,aes") +enc_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check { + z := z_hw_256(st) + + xi_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(xi))) + update_hw_256(st, xi_) + + ci_ := x86._mm_xor_si128(xi_, z) + intrinsics.unaligned_store((^x86.__m128i)(raw_data(ci)), ci_) +} + +@(private, enable_target_feature = "sse2,aes") +enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check { + ci, xi, l := dst, src, len(src) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + enc_hw_128l(st, ci, xi) + ci = ci[_RATE_128L:] + xi = xi[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + enc_hw_256(st, ci, xi) + ci = ci[_RATE_256:] + xi = xi[_RATE_256:] + l -= _RATE_256 + } + } + + // Pad out the remainder with `0`s till it is rate sized. + if l > 0 { + tmp: [_RATE_MAX]byte // Ciphertext is not confidential. + copy(tmp[:], xi) + switch st.rate { + case _RATE_128L: + enc_hw_128l(st, tmp[:], tmp[:]) + case _RATE_256: + enc_hw_256(st, tmp[:], tmp[:]) + } + copy(ci, tmp[:l]) + } +} + +@(private = "file", enable_target_feature = "sse2,aes") +dec_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check { + z0, z1 := z_hw_128l(st) + + t0 := intrinsics.unaligned_load((^x86.__m128i)(&ci[0])) + t1 := intrinsics.unaligned_load((^x86.__m128i)(&ci[16])) + out0 := x86._mm_xor_si128(t0, z0) + out1 := x86._mm_xor_si128(t1, z1) + + update_hw_128l(st, out0, out1) + intrinsics.unaligned_store((^x86.__m128i)(&xi[0]), out0) + intrinsics.unaligned_store((^x86.__m128i)(&xi[16]), out1) +} + +@(private = "file", enable_target_feature = "sse2,aes") +dec_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check { + z := z_hw_256(st) + + ci_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(ci))) + xi_ := x86._mm_xor_si128(ci_, z) + + update_hw_256(st, xi_) + intrinsics.unaligned_store((^x86.__m128i)(raw_data(xi)), xi_) +} + +@(private = "file", enable_target_feature = "sse2,aes") +dec_partial_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check { + tmp: [_RATE_128L]byte + defer mem.zero_explicit(&tmp, size_of(tmp)) + + z0, z1 := z_hw_128l(st) + copy(tmp[:], cn) + + t0 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) + t1 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[16])) + out0 := x86._mm_xor_si128(t0, z0) + out1 := x86._mm_xor_si128(t1, z1) + + intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), out0) + intrinsics.unaligned_store((^x86.__m128i)(&tmp[16]), out1) + copy(xn, tmp[:]) + + for off := len(xn); off < _RATE_128L; off += 1 { + tmp[off] = 0 + } + out0 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) // v0 + out1 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[16])) // v1 + update_hw_128l(st, out0, out1) +} + +@(private = "file", enable_target_feature = "sse2,aes") +dec_partial_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check { + tmp: [_RATE_256]byte + defer mem.zero_explicit(&tmp, size_of(tmp)) + + z := z_hw_256(st) + copy(tmp[:], cn) + + cn_ := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) + xn_ := x86._mm_xor_si128(cn_, z) + + intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), xn_) + copy(xn, tmp[:]) + + for off := len(xn); off < _RATE_256; off += 1 { + tmp[off] = 0 + } + xn_ = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) + update_hw_256(st, xn_) +} + +@(private, enable_target_feature = "sse2,aes") +dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check { + xi, ci, l := dst, src, len(src) + + switch st.rate { + case _RATE_128L: + for l >= _RATE_128L { + dec_hw_128l(st, xi, ci) + xi = xi[_RATE_128L:] + ci = ci[_RATE_128L:] + l -= _RATE_128L + } + case _RATE_256: + for l >= _RATE_256 { + dec_hw_256(st, xi, ci) + xi = xi[_RATE_256:] + ci = ci[_RATE_256:] + l -= _RATE_256 + } + } + + // Process the remainder. + if l > 0 { + switch st.rate { + case _RATE_128L: + dec_partial_hw_128l(st, xi, ci) + case _RATE_256: + dec_partial_hw_256(st, xi, ci) + } + } +} + +@(private, enable_target_feature = "sse2,aes") +finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) { + tmp: [16]byte + endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8) + endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8) + + t := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) + + t0, t1: x86.__m128i = ---, --- + switch st.rate { + case _RATE_128L: + t = x86._mm_xor_si128(st.s2, t) + for _ in 0 ..< 7 { + update_hw_128l(st, t, t) + } + + t0 = x86._mm_xor_si128(st.s0, st.s1) + t0 = x86._mm_xor_si128(t0, st.s2) + t0 = x86._mm_xor_si128(t0, st.s3) + + t1 = x86._mm_xor_si128(st.s4, st.s5) + t1 = x86._mm_xor_si128(t1, st.s6) + if len(tag) == TAG_SIZE_256 { + t1 = x86._mm_xor_si128(t1, st.s7) + } + case _RATE_256: + t = x86._mm_xor_si128(st.s3, t) + for _ in 0 ..< 7 { + update_hw_256(st, t) + } + + t0 = x86._mm_xor_si128(st.s0, st.s1) + t0 = x86._mm_xor_si128(t0, st.s2) + + t1 = x86._mm_xor_si128(st.s3, st.s4) + t1 = x86._mm_xor_si128(t1, st.s5) + } + switch len(tag) { + case TAG_SIZE_128: + t0 = x86._mm_xor_si128(t0, t1) + intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0) + case TAG_SIZE_256: + intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0) + intrinsics.unaligned_store((^x86.__m128i)(&tag[16]), t1) + } +} + +@(private) +reset_state_hw :: proc "contextless" (st: ^State_HW) { + mem.zero_explicit(st, size_of(st^)) +} diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index 482958a5f16..c540dbb3198 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -26,6 +26,7 @@ import topological_sort "core:container/topological_sort" import crypto "core:crypto" import aead "core:crypto/aead" +import aegis "core:crypto/aegis" import aes "core:crypto/aes" import blake2b "core:crypto/blake2b" import blake2s "core:crypto/blake2s" @@ -169,6 +170,7 @@ _ :: topological_sort _ :: crypto _ :: crypto_hash _ :: aead +_ :: aegis _ :: aes _ :: blake2b _ :: blake2s diff --git a/tests/benchmark/crypto/benchmark_crypto.odin b/tests/benchmark/crypto/benchmark_crypto.odin index 72e6d09328d..dfa491917de 100644 --- a/tests/benchmark/crypto/benchmark_crypto.odin +++ b/tests/benchmark/crypto/benchmark_crypto.odin @@ -8,6 +8,7 @@ import "core:strings" import "core:testing" import "core:time" +import "core:crypto/aegis" import "core:crypto/aes" import "core:crypto/chacha20" import "core:crypto/chacha20poly1305" @@ -164,6 +165,43 @@ benchmark_crypto :: proc(t: ^testing.T) { testing.expect(t, err == nil, name) benchmark_print(&str, name, options) } + { + name := "AEGIS-256 64 bytes" + options := &time.Benchmark_Options { + rounds = 1_000, + bytes = 64, + setup = _setup_sized_buf, + bench = _benchmark_aegis_256, + teardown = _teardown_sized_buf, + } + + key := [aegis.KEY_SIZE_256]byte { + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + } + ctx: aegis.Context + aegis.init(&ctx, key[:]) + + context.user_ptr = &ctx + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil, name) + benchmark_print(&str, name, options) + + name = "AEGIS-256 1024 bytes" + options.bytes = 1024 + err = time.benchmark(options, context.allocator) + testing.expect(t, err == nil, name) + benchmark_print(&str, name, options) + + name = "AEGIS-256 65536 bytes" + options.bytes = 65536 + err = time.benchmark(options, context.allocator) + testing.expect(t, err == nil, name) + benchmark_print(&str, name, options) + } { iters :: 10000 @@ -423,6 +461,26 @@ _benchmark_aes256_gcm :: proc( return nil } +_benchmark_aegis_256 :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + buf := options.input + iv: [aegis.IV_SIZE_256]byte + tag: [aegis.TAG_SIZE_128]byte = --- + + ctx := (^aegis.Context)(context.user_ptr) + + for _ in 0 ..= options.rounds { + aegis.seal(ctx, buf, tag[:], iv[:], nil, buf) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + return nil +} + @(private) benchmark_print :: proc(str: ^strings.Builder, name: string, options: ^time.Benchmark_Options, loc := #caller_location) { fmt.sbprintfln(str, "[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n", diff --git a/tests/core/crypto/test_core_crypto_aead.odin b/tests/core/crypto/test_core_crypto_aead.odin index 90eedc0b2d3..dfa251413f6 100644 --- a/tests/core/crypto/test_core_crypto_aead.odin +++ b/tests/core/crypto/test_core_crypto_aead.odin @@ -1,6 +1,8 @@ package test_core_crypto import "base:runtime" +import "core:crypto/aes" +import "core:crypto/aegis" import "core:crypto/aead" import "core:encoding/hex" import "core:testing" @@ -17,6 +19,10 @@ test_aead :: proc(t: ^testing.T) { for impl in supported_chacha_impls() { append(&chacha_impls, impl) } + aegis_impls := make([dynamic]aead.Implementation, context.temp_allocator) + for impl in supported_aegis_impls() { + append(&aegis_impls, impl) + } impls := [aead.Algorithm][dynamic]aead.Implementation{ .Invalid = nil, .AES_GCM_128 = aes_impls, @@ -24,6 +30,10 @@ test_aead :: proc(t: ^testing.T) { .AES_GCM_256 = aes_impls, .CHACHA20POLY1305 = chacha_impls, .XCHACHA20POLY1305 = chacha_impls, + .AEGIS_128L = aegis_impls, + .AEGIS_128L_256 = aegis_impls, + .AEGIS_256 = aegis_impls, + .AEGIS_256_256 = aegis_impls, } test_vectors := []struct{ @@ -224,6 +234,190 @@ test_aead :: proc(t: ^testing.T) { "bd6d179d3e83d43b9576579493c0e939572a1700252bfaccbed2902c21396cbb731c7f1b0b4aa6440bf3a82f4eda7e39ae64c6708c54c216cb96b72e1213b4522f8c9ba40db5d945b11b69b982c1bb9e3f3fac2bc369488f76b2383565d3fff921f9664c97637da9768812f615c68b13b52e", "c0875924c1c7987947deafd8780acf49", }, + // AEGIS-128L + // https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-11.txt + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "", + "00000000000000000000000000000000", + "c1c0e58bd913006feba00f4b3cc3594e", + "abe0ece80c24868a226a35d16bdae37a", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "", + "00000000000000000000000000000000", + "c1c0e58bd913006feba00f4b3cc3594e", + "25835bfbb21632176cf03840687cb968cace4617af1bd0f7d064c639a5c79ee4", + }, + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "", + "", + "", + "c2b879a67def9d74e6c14f708bbcc9b4", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "", + "", + "", + "1360dc9db8ae42455f6e5b6a9d488ea4f2184c4e12120249335c4ee84bafe25d", + }, + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "79d94593d8c2119d7e8fd9b8fc77845c5c077a05b2528b6ac54b563aed8efe84", + "cc6f3372f6aa1bb82388d695c3962d9a", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "79d94593d8c2119d7e8fd9b8fc77845c5c077a05b2528b6ac54b563aed8efe84", + "022cb796fe7e0ae1197525ff67e309484cfbab6528ddef89f17d74ef8ecd82b3", + }, + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d", + "79d94593d8c2119d7e8fd9b8fc77", + "5c04b3dba849b2701effbe32c7f0fab7", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d", + "79d94593d8c2119d7e8fd9b8fc77", + "86f1b80bfb463aba711d15405d094baf4a55a15dbfec81a76f35ed0b9c8b04ac", + }, + { + .AEGIS_128L, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829", + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637", + "b31052ad1cca4e291abcf2df3502e6bdb1bfd6db36798be3607b1f94d34478aa7ede7f7a990fec10", + "7542a745733014f9474417b337399507", + }, + { + .AEGIS_128L_256, + "10010000000000000000000000000000", + "10000200000000000000000000000000", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829", + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637", + "b31052ad1cca4e291abcf2df3502e6bdb1bfd6db36798be3607b1f94d34478aa7ede7f7a990fec10", + "b91e2947a33da8bee89b6794e647baf0fc835ff574aca3fc27c33be0db2aff98", + }, + // AEGIS-256 + // https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-11.txt + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "", + "00000000000000000000000000000000", + "754fc3d8c973246dcc6d741412a4b236", + "3fe91994768b332ed7f570a19ec5896e", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "", + "00000000000000000000000000000000", + "754fc3d8c973246dcc6d741412a4b236", + "1181a1d18091082bf0266f66297d167d2e68b845f61a3b0527d31fc7b7b89f13", + }, + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "", + "", + "", + "e3def978a0f054afd1e761d7553afba3", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "", + "", + "", + "6a348c930adbd654896e1666aad67de989ea75ebaa2b82fb588977b1ffec864a", + }, + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "f373079ed84b2709faee373584585d60accd191db310ef5d8b11833df9dec711", + "8d86f91ee606e9ff26a01b64ccbdd91d", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "f373079ed84b2709faee373584585d60accd191db310ef5d8b11833df9dec711", + "b7d28d0c3c0ebd409fd22b44160503073a547412da0854bfb9723020dab8da1a", + }, + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d", + "f373079ed84b2709faee37358458", + "c60b9c2d33ceb058f96e6dd03c215652", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "0001020304050607", + "000102030405060708090a0b0c0d", + "f373079ed84b2709faee37358458", + "8c1cc703c81281bee3f6d9966e14948b4a175b2efbdc31e61a98b4465235c2d9", + }, + { + .AEGIS_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829", + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637", + "57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67", + "ab8a7d53fd0e98d727accca94925e128", + }, + { + .AEGIS_256_256, + "1001000000000000000000000000000000000000000000000000000000000000", + "1000020000000000000000000000000000000000000000000000000000000000", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829", + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637", + "57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67", + "a3aca270c006094d71c20e6910b5161c0826df233d08919a566ec2c05990f734", + }, } for v, _ in test_vectors { algo_name := aead.ALGORITHM_NAMES[v.algo] @@ -337,3 +531,13 @@ test_aead :: proc(t: ^testing.T) { } } } + +supported_aegis_impls :: proc() -> [dynamic]aes.Implementation { + impls := make([dynamic]aes.Implementation, 0, 2, context.temp_allocator) + append(&impls, aes.Implementation.Portable) + if aegis.is_hardware_accelerated() { + append(&impls, aes.Implementation.Hardware) + } + + return impls +}