This repository has been archived by the owner on Jun 12, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 30
/
vector128.h
167 lines (137 loc) · 4.69 KB
/
vector128.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Wrappers for platform-specific 128-bit vectors.
#ifndef VECTOR128_H_
#define VECTOR128_H_
#include <stdint.h> // uint64_t
#if defined(__SSE2__) && defined(__AES__)
#define RANDEN_AESNI 1
#include <wmmintrin.h>
#elif defined(__powerpc__) && defined(__VSX__)
#define RANDEN_PPC 1
#define RANDEN_BIG_ENDIAN 1
#include <altivec.h>
#elif defined(__ARM_NEON) && defined(__ARM_FEATURE_CRYPTO)
#define RANDEN_ARM 1
#include <arm_neon.h>
#else
#error "Port"
#endif
#if defined(__clang__) || defined(__GNUC__)
#define RANDEN_INLINE inline __attribute__((always_inline))
#define RANDEN_RESTRICT __restrict__
#else
#define RANDEN_INLINE
#define RANDEN_RESTRICT
#endif
namespace randen {
#ifdef RANDEN_AESNI
class V {
public:
RANDEN_INLINE V() {} // Leaves v_ uninitialized.
RANDEN_INLINE V& operator=(const V other) {
raw_ = other.raw_;
return *this;
}
// Convert from/to intrinsics.
RANDEN_INLINE explicit V(const __m128i raw) : raw_(raw) {}
__m128i raw() const { return raw_; }
RANDEN_INLINE V& operator^=(const V other) {
raw_ = _mm_xor_si128(raw_, other.raw_);
return *this;
}
private:
// Note: this wrapper is faster than using __m128i directly.
__m128i raw_;
};
#elif defined(RANDEN_PPC)
// Already provides operator^=.
using V = vector unsigned long long;
#elif defined(RANDEN_ARM)
// Already provides operator^=.
using V = uint8x16_t;
#else
#error "Port"
#endif
constexpr int kLanes = sizeof(V) / sizeof(uint64_t);
// On big-endian platforms, byte-swap constants (e.g. round keys) to ensure
// results match little-endian platforms.
#ifdef RANDEN_BIG_ENDIAN
#define RANDEN_LE(a, b) __builtin_bswap64(b), __builtin_bswap64(a)
#else
#define RANDEN_LE(a, b) a, b
#endif
#ifdef RANDEN_BIG_ENDIAN
static RANDEN_INLINE V ReverseBytes(const V v) {
// Reverses the bytes of the vector.
const vector unsigned char perm = {15, 14, 13, 12, 11, 10, 9, 8,
7, 6, 5, 4, 3, 2, 1, 0};
return vec_perm(v, v, perm);
}
#endif
// WARNING: these load/store in native byte order. It is OK to load and then
// store an unchanged vector, but interpreting the bits as a number or input
// to AES will have platform-dependent results. Call ReverseBytes after load
// and/or before store #ifdef RANDEN_BIG_ENDIAN.
static RANDEN_INLINE V Load(const uint64_t* RANDEN_RESTRICT lanes,
const int block) {
#ifdef RANDEN_AESNI
const uint64_t* RANDEN_RESTRICT from = lanes + block * kLanes;
return V(_mm_load_si128(reinterpret_cast<const __m128i*>(from)));
#elif defined(RANDEN_PPC)
const V* RANDEN_RESTRICT from =
reinterpret_cast<const V*>(lanes + block * kLanes);
return vec_vsx_ld(0, from);
#elif defined(RANDEN_ARM)
const uint8_t* RANDEN_RESTRICT from =
reinterpret_cast<const uint8_t*>(lanes + block * kLanes);
return vld1q_u8(from);
#else
#error "Port"
#endif
}
static RANDEN_INLINE void Store(const V v, uint64_t* RANDEN_RESTRICT lanes,
const int block) {
#ifdef RANDEN_AESNI
uint64_t* RANDEN_RESTRICT to = lanes + block * kLanes;
_mm_store_si128(reinterpret_cast<__m128i * RANDEN_RESTRICT>(to), v.raw());
#elif defined(RANDEN_PPC)
V* RANDEN_RESTRICT to = reinterpret_cast<V*>(lanes + block * kLanes);
vec_vsx_st(v, 0, to);
#elif defined(RANDEN_ARM)
uint8_t* RANDEN_RESTRICT to =
reinterpret_cast<uint8_t*>(lanes + block * kLanes);
vst1q_u8(to, v);
#else
#error "Port"
#endif
}
// One round of AES. "round_key" is a public constant for breaking the
// symmetry of AES (ensures previously equal columns differ afterwards).
static RANDEN_INLINE V AES(const V state, const V round_key) {
#ifdef RANDEN_AESNI
// It is important to always use the full round function - omitting the
// final MixColumns reduces security [https://eprint.iacr.org/2010/041.pdf]
// and does not help because we never decrypt.
return V(_mm_aesenc_si128(state.raw(), round_key.raw()));
#elif defined(RANDEN_PPC)
return V(__builtin_crypto_vcipher(state, round_key));
#elif defined(RANDEN_ARM)
return vaesmcq_u8(vaeseq_u8(state, round_key));
#else
#error "Port"
#endif
}
} // namespace randen
#endif // VECTOR128_H_