This repository has been archived by the owner on Apr 7, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
sbox_jasmin_avx512.jazz
234 lines (233 loc) · 5.46 KB
/
sbox_jasmin_avx512.jazz
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
fn SubBytes_single(reg u512 in0 in1 in2 in3 in4 in5 in6 in7, reg u64 out)
{
reg u512 y14;
reg u512 y13;
reg u512 y9;
reg u512 y8;
reg u512 t0;
reg u512 y12;
reg u512 y1;
reg u512 t1;
reg u512 y4;
reg u512 y2;
reg u512 y5;
reg u512 y15;
reg u512 y20;
reg u512 t5;
reg u512 y3;
reg u512 t8;
reg u512 y6;
reg u512 y10;
reg u512 t2;
reg u512 y11;
reg u512 t3;
reg u512 t15;
reg u512 y7;
reg u512 y17;
reg u512 y16;
reg u512 t12;
reg u512 t10;
reg u512 t13;
reg u512 t16;
reg u512 t14;
reg u512 new0;
reg u512 t4;
reg u512 new1;
reg u512 new2;
reg u512 new3;
reg u512 new4;
reg u512 new5;
reg u512 y21;
reg u512 new6;
reg u512 new7;
reg u512 new8;
reg u512 new9;
reg u512 new10;
reg u512 new11;
reg u512 new12;
reg u512 new13;
reg u512 new14;
reg u512 new15;
reg u512 new16;
reg u512 new17;
reg u512 new18;
reg u512 t29;
reg u512 t33;
reg u512 z5;
reg u512 z14;
reg u512 new19;
reg u512 t42;
reg u512 z2;
reg u512 z11;
reg u512 new20;
reg u512 z6;
reg u512 z15;
reg u512 t44;
reg u512 t37;
reg u512 new21;
reg u512 z1;
reg u512 z10;
reg u512 new22;
reg u512 z0;
reg u512 z9;
reg u512 t40;
reg u512 tc4;
reg u512 tc5;
reg u512 t41;
reg u512 t43;
reg u512 z4;
reg u512 z13;
reg u512 t45;
reg u512 z8;
reg u512 z17;
reg u512 z3;
reg u512 z12;
reg u512 z7;
reg u512 z16;
reg u512 tc6;
reg u512 tc7;
reg u512 tc9;
reg u512 tc1;
reg u512 tc8;
reg u512 tc11;
reg u512 new23;
reg u512 tc14;
reg u512 new24;
reg u512 tc2;
reg u512 tc16;
reg u512 tc10;
reg u512 tc3;
reg u512 tc21;
reg u512 tc17;
reg u512 tc18;
reg u512 _tmp12_;
reg u512 _tmp3_;
reg u512 new25;
reg u512 _tmp4_;
reg u512 S0, S1, S2, S3, S4, S5, S6, S7;
y14 = #x86_VPXORQ(in3, in5);
y13 = #x86_VPXORQ(in0, in6);
y9 = #x86_VPXORQ(in0, in3);
y8 = #x86_VPXORQ(in0, in5);
t0 = #x86_VPXORQ(in2, in1);
y12 = #x86_VPXORQ(y13, y14);
y1 = #x86_VPXORQ(t0, in7);
t1 = #x86_VPXORQ(in4, y12);
y4 = #x86_VPXORQ(in3, y1);
y2 = #x86_VPXORQ(in0, y1);
y5 = #x86_VPXORQ(in6, y1);
y15 = #x86_VPXORQ(in5, t1);
y20 = #x86_VPXORQ(in1, t1);
t5 = #x86_VPANDQ(in7, y4);
y3 = #x86_VPXORQ(y8, y5);
t8 = #x86_VPANDQ(y1, y5);
y6 = #x86_VPXORQ(in7, y15);
y10 = #x86_VPXORQ(t0, y15);
t2 = #x86_VPANDQ(y12, y15);
y11 = #x86_VPXORQ(y9, y20);
t3 = #x86_VPANDQ(y3, y6);
t15 = #x86_VPANDQ(y8, y10);
y7 = #x86_VPXORQ(in7, y11);
y17 = #x86_VPXORQ(y10, y11);
y16 = #x86_VPXORQ(t0, y11);
t12 = #x86_VPANDQ(y9, y11);
t10 = #x86_VPANDQ(y2, y7);
t13 = #x86_VPANDQ(y14, y17);
t16 = #x86_VPXORQ(t15, t12);
t14 = #x86_VPXORQ(t12, t13);
new0 = #x86_VPXORQ(y20, t2);
t4 = #x86_VPXORQ(t3, new0);
new1 = #x86_VPXORQ(t14, t4);
new2 = #x86_VPXORQ(y8, t5);
new3 = #x86_VPXORQ(y10, new2);
new4 = #x86_VPXORQ(t2, new3);
new5 = #x86_VPXORQ(t16, new4);
y21 = #x86_VPANDNQ(y16, y13);
new6 = #x86_VPXORQ(y13, t8);
new7 = #x86_VPXORQ(y21, new6);
new8 = #x86_VPXORQ(t14, new7);
new9 = #x86_VPXORQ(in0, y21);
new10 = #x86_VPXORQ(t16, t10);
new11 = #x86_VPXORQ(new10, new9);
new12 = #x86_VPXORQ(new5, new1);
new13 = #x86_VPANDQ(new1, new8);
new14 = #x86_VPXORQ(new11, new8);
new15 = #x86_VPXORQ(new11, new13);
new16 = #x86_VPXORQ(new5, new13);
new17 = #x86_VPANDQ(new14, new16);
new18 = #x86_VPANDQ(new12, new15);
t29 = #x86_VPXORQ(new5, new18);
t33 = #x86_VPXORQ(new11, new17);
z5 = #x86_VPANDQ(y7, t29);
z14 = #x86_VPANDQ(y2, t29);
new19 = #x86_VPXORQ(new13, new17);
t42 = #x86_VPXORQ(t29, t33);
z2 = #x86_VPANDQ(in7, t33);
z11 = #x86_VPANDQ(y4, t33);
new20 = #x86_VPANDQ(new11, new19);
z6 = #x86_VPANDQ(y11, t42);
z15 = #x86_VPANDQ(y9, t42);
t44 = #x86_VPXORQ(new8, new20);
t37 = #x86_VPXORQ(t33, t44);
new21 = #x86_VPXORQ(new15, new20);
z1 = #x86_VPANDQ(y6, t37);
z10 = #x86_VPANDQ(y3, t37);
new22 = #x86_VPANDQ(t29, new21);
z0 = #x86_VPANDQ(y15, t44);
z9 = #x86_VPANDQ(y12, t44);
t40 = #x86_VPXORQ(new12, new22);
tc4 = #x86_VPXORQ(z2, z0);
tc5 = #x86_VPXORQ(z0, z1);
t41 = #x86_VPXORQ(t37, t40);
t43 = #x86_VPXORQ(t29, t40);
z4 = #x86_VPANDQ(y1, t40);
z13 = #x86_VPANDQ(y5, t40);
t45 = #x86_VPXORQ(t42, t41);
z8 = #x86_VPANDQ(y10, t41);
z17 = #x86_VPANDQ(y8, t41);
z3 = #x86_VPANDQ(y16, t43);
z12 = #x86_VPANDQ(y13, t43);
z7 = #x86_VPANDQ(y17, t45);
z16 = #x86_VPANDQ(y14, t45);
tc6 = #x86_VPXORQ(z4, z3);
tc7 = #x86_VPXORQ(tc4, z12);
tc9 = #x86_VPXORQ(z8, tc7);
tc1 = #x86_VPXORQ(z15, z16);
tc8 = #x86_VPXORQ(z7, tc6);
tc11 = #x86_VPXORQ(tc6, tc5);
new23 = #x86_VPXORQ(z5, tc4);
tc14 = #x86_VPXORQ(z3, new23);
new24 = #x86_VPXORQ(z13, tc14);
tc2 = #x86_VPXORQ(z10, tc1);
tc16 = #x86_VPXORQ(z6, tc8);
tc10 = #x86_VPXORQ(tc8, tc9);
tc3 = #x86_VPXORQ(z9, tc2);
tc21 = #x86_VPXORQ(z11, tc2);
tc17 = #x86_VPXORQ(z14, tc10);
S3 = #x86_VPXORQ(tc3, tc11);
(u512)[out + 192] = S3;
S0 = #x86_VPXORQ(tc16, tc3);
(u512)[out + 0] = S0;
S5 = #x86_VPXORQ(tc17, tc21);
(u512)[out + 320] = S5;
S4 = #x86_VPXORQ(tc14, S3);
(u512)[out + 256] = S4;
tc18 = #x86_VPXORQ(tc1, new24);
_tmp12_ = #x86_VPTERNLOGQ(15, tc18, tc18, tc18);
S7 = #x86_VPXORQ(z12, _tmp12_);
(u512)[out + 448] = S7;
S6 = #x86_VPXORQ(tc10, _tmp12_);
(u512)[out + 384] = S6;
_tmp3_ = #x86_VPTERNLOGQ(15, tc16, tc16, tc16);
S1 = #x86_VPXORQ(_tmp3_, S3);
(u512)[out + 64] = S1;
new25 = #x86_VPXORQ(z15, z17);
_tmp4_ = #x86_VPXORQ(_tmp3_, new25);
S2 = #x86_VPXORQ(tc17, _tmp4_);
(u512)[out + 128] = S2;
}
export fn SubBytes(reg u64 pin, reg u64 pout)
{
SubBytes_single((u512)[pin + 0], (u512)[pin + 64], (u512)[pin + 128], (u512)[pin + 192],
(u512)[pin + 256], (u512)[pin + 320], (u512)[pin + 384], (u512)[pin + 448], pout);
}