-
Notifications
You must be signed in to change notification settings - Fork 0
/
argon.py
362 lines (294 loc) · 11.2 KB
/
argon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
import math
from itertools import repeat
from multiprocessing import Pool
import numpy as np
from blake import Blake2b
def argon2(P: bytes,
S: bytes,
p: int,
tau: int,
m: int,
t: int,
V: int = 19,
K: bytes = b"",
X: bytes = b"",
Y: int = 0,
force_single_process=False):
"""
:param P: Message can have any length from 0 to 2^32 − 1 bytes
:param S: Nonce can have any length from 8 to 2^32 − 1 bytes
:param p: Degree of parallelism determines how many independent (but synchronizing) computational chains can be run.
It can be any integer value from 1 to 2^24 − 1
:param tau: Length of output. It can be any integer number of bytes from 4 to 2^32 − 1
:param m: Memory size can be any integer number of kilobytes from 8p to 2^32 − 1 (p: degree of parallelism)
:param t: Number of iterations can be any integer number from 1 to 2^32 − 1
:param V: version number 19
:param K: Secret value can have any length from 0 to 32 bytes
:param X: Associated data can have any length from 0 to 2^32 − 1 bytes
:param Y: Argon Type:
- for Argon2d: 0
- for Argon2i: 1
:param force_single_process: custom argument for benchmarking
:return:
"""
# check the required conditions:
# message length: 0 to 2^32 − 1 bytes
assert 0 <= len(P) < 2 ** 32
# salt/nonce: 8 to 2^32 − 1 bytes
assert 8 <= len(S) < 2 ** 32
# parallelism degree: 1 to 2^24 − 1
assert 1 <= p < 2 ** 24
# output length: 4 to 2^32 − 1
assert 4 <= tau < 2 ** 32
# memory size: 8p to 2^32 − 1 (p: degree of parallelism)
assert 8 * p <= m < 2 ** 32
# iterations: 1 to 2^32 − 1
assert 1 <= t < 2 ** 32
# argon2 version: 19
assert V == 19
# secret value length: 0 to 32 bytes
assert 0 <= len(K) <= 32
# associated data length: 0 to 2^32 − 1 bytes
assert 0 <= len(X) < 2 ** 32
# create first H_0 block
h = p.to_bytes(4, byteorder="little") \
+ tau.to_bytes(4, byteorder="little") \
+ m.to_bytes(4, byteorder="little") \
+ t.to_bytes(4, byteorder="little") \
+ V.to_bytes(4, byteorder="little") \
+ Y.to_bytes(4, byteorder="little")
h += len(P).to_bytes(4, byteorder="little") + P
h += len(S).to_bytes(4, byteorder="little") + S
h += len(K).to_bytes(4, byteorder="little") + K
h += len(X).to_bytes(4, byteorder="little") + X
H_0 = Blake2b.hash(h, hash_length=64)
# create the B matrix
m_p = math.floor(m / (4 * p)) * 4 * p
q = int(m_p / p)
B = [[None for _ in range(q)] for _ in range(p)]
# generate the first 2 columns
for i in range(0, p):
B[i][0] = H(H_0 + b'\0\0\0\0' + i.to_bytes(4, byteorder="little"), 1024)
B[i][1] = H(H_0 + b'\1\0\0\0' + i.to_bytes(4, byteorder="little"), 1024)
# this value is named S in the paper; renamed to not override nonce S
N_VERT_SLICES = 4
segment_length = int(q / N_VERT_SLICES)
if p > 1 and not force_single_process:
multiprocessing_pool = Pool(processes=p)
for r in range(0, t):
for vert_slice in range(0, N_VERT_SLICES):
if p > 1 and not force_single_process:
# run the block update for each segment in an own process
args = repeat(vert_slice), repeat(segment_length), repeat(r), repeat(Y), repeat(p), repeat(q), repeat(B)
segm_updates = multiprocessing_pool.starmap(update_segment, zip(range(0, p), *args))
# update B matrix in them main process
for update in segm_updates:
if update:
B_new, idxs = update
for i, j in idxs:
B[i][j] = B_new[i][j]
else:
for i in range(0, p):
# drop the return value since we dont have to merge the results
update_segment(i, vert_slice, segment_length, r, Y, p, q, B)
if p > 1 and not force_single_process:
multiprocessing_pool.close()
# calculate xor of the last column
B_final = B[0][q - 1]
for i in range(1, p):
B_final = xor(B_final, B[i][q - 1])
return H(B_final, tau)
def update_segment(i, vert_slice, segment_length, r, Y, p, q, B):
"""
Updates all column in the segment, this function can run in an own process/thread
because the B matrix segments are independent.
"""
block_idxs = []
# update each column of the segment
for segment_col_idx in range(segment_length):
j = vert_slice * segment_length + segment_col_idx
# in the first round we do not need update the first to column, so skip them
if (r == 0 and j >= 2) or r > 0:
block_idxs.append((i, j))
update_block(j, segment_length, segment_col_idx, vert_slice, r, Y, i, p, q, B)
if len(block_idxs) > 0:
return B, block_idxs
def update_block(j, segment_length, segment_col_idx, vert_slice, r, Y, i, p, q, B):
"""
Update the given block with indices i,j
"""
if Y == 0:
if not B[i][(j - 1)]:
print(1)
# Argon2d
J_1 = int.from_bytes(B[i][(j - 1)][:4], "little")
J_2 = int.from_bytes(B[i][(j - 1)][4:8], "little")
elif Y == 1:
# Argon2i
pass
# mapping J_1 and J_2 to the reference block index
# i_p is named l for the lane in the paper
if r == 0 and vert_slice == 0:
i_p = i
else:
i_p = J_2 % p
# start and end are mark the set of indices used for determining
# the reference block (in the paper it is named R)
# (see 3.3 mapping indices in th paper)
if r == 0:
start = 0
# first iteration
if i == i_p:
end = j - 1
else:
if segment_col_idx == 0:
end = vert_slice * segment_length - 1
else:
end = vert_slice * segment_length
else:
start = ((vert_slice + 1) * segment_length) % q
# current lane is l
if i == i_p:
end = q - segment_length + segment_col_idx - 1
else:
if segment_col_idx == 0:
end = q - segment_length - 1
else:
end = q - segment_length
x = int((J_1 ** 2) / (2 ** 32))
y = int((end * x) / (2 ** 32))
z = end - 1 - y
j_p = int((start + z) % q)
if r == 0:
# for the first round we only need the compress function
B[i][j] = C(B[i][j - 1], B[i_p][j_p])
else:
B[i][j] = xor(C(B[i][j - 1], B[i_p][j_p]), B[i][j])
def H(X: bytes, tau: int) -> bytes:
"""
Custom Argon2 hash function. Uses internal the Blake2b hash function.
"""
if tau <= 64:
return Blake2b.hash(tau.to_bytes(4, byteorder="little") + X, hash_length=tau)
else:
# generate 64 byte blocks
r = math.ceil(tau / 32) - 2
V_i = Blake2b.hash(tau.to_bytes(4, byteorder="little") + X, hash_length=64)
A = V_i[:32]
for i in range(1, r):
V_i = Blake2b.hash(V_i, hash_length=64)
A += V_i[:32]
# now hash the last partially remaining block
V_i = Blake2b.hash(V_i, hash_length=tau - 32 * r)
# add the whole hashed value not only the first 32 bytes
A += V_i
return A
def xor(x, y):
x = int.from_bytes(x, byteorder="little")
y = int.from_bytes(y, byteorder="little")
z = x ^ y
return z.to_bytes(1024, byteorder="little")
def C(X: bytes, Y: bytes):
"""
Compression function.
:param X: 1024 byte block
:param Y: 1024 byte block
:return: compression of X and Y
"""
R = xor(X, Y)
Q = []
for k in range(0, 1024, 128):
S = []
for i in range(k, k + 128, 16):
S.append(R[i: i + 16])
Q.extend(P(*S))
Z = []
for i in range(8):
S = []
for k in range(i, 64, 8):
S.append(Q[k])
Z.append(P(*S))
Z = np.array(Z).flatten("F")
return xor(Z, R)
def P(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7):
"""
Permutation function P.
"""
# S_i = v_{2*i+1} || v_{2*i}
# v_0 v_1 v_2 v_3
# v_4 v_5 v_6 v_7
# v_8 v_9 v_10 v_11
# v_12 v_13 v_14 v_15
# S_0 = v_1 || v_0
# S_1 = v_3 || v_2
# S_2 = v_5 || v_4
# S_3 = v_7 || v_6
# S_4 = v_9 || v_8
# S_5 = v_11 || v_10
# S_6 = v_13 || v_12
# S_7 = v_15 || v_14
v_0 = int.from_bytes(S_0[:8], "little")
v_1 = int.from_bytes(S_0[8:], "little")
v_2 = int.from_bytes(S_1[:8], "little")
v_3 = int.from_bytes(S_1[8:], "little")
v_4 = int.from_bytes(S_2[:8], "little")
v_5 = int.from_bytes(S_2[8:], "little")
v_6 = int.from_bytes(S_3[:8], "little")
v_7 = int.from_bytes(S_3[8:], "little")
v_8 = int.from_bytes(S_4[:8], "little")
v_9 = int.from_bytes(S_4[8:], "little")
v_10 = int.from_bytes(S_5[:8], "little")
v_11 = int.from_bytes(S_5[8:], "little")
v_12 = int.from_bytes(S_6[:8], "little")
v_13 = int.from_bytes(S_6[8:], "little")
v_14 = int.from_bytes(S_7[:8], "little")
v_15 = int.from_bytes(S_7[8:], "little")
def _G(a, b, c, d):
"""
Mixing function G based on Blake2b mix function G.
"""
a = (a + b + 2 * (a & 0xffffffff) * (b & 0xffffffff)) % (2 ** 64)
d = rotate_xor((d ^ a) % (2 ** 64), 32)
c = (c + d + 2 * (c & 0xffffffff) * (d & 0xffffffff)) % (2 ** 64)
b = rotate_xor((b ^ c) % (2 ** 64), 24)
a = (a + b + 2 * (a & 0xffffffff) * (b & 0xffffffff)) % (2 ** 64)
d = rotate_xor((d ^ a) % (2 ** 64), 16)
c = (c + d + 2 * (c & 0xffffffff) * (d & 0xffffffff)) % (2 ** 64)
b = rotate_xor((b ^ c) % (2 ** 64), 63)
return a, b, c, d
v_0, v_4, v_8, v_12 = _G(v_0, v_4, v_8, v_12)
v_1, v_5, v_9, v_13 = _G(v_1, v_5, v_9, v_13)
v_2, v_6, v_10, v_14 = _G(v_2, v_6, v_10, v_14)
v_3, v_7, v_11, v_15 = _G(v_3, v_7, v_11, v_15)
v_0, v_5, v_10, v_15 = _G(v_0, v_5, v_10, v_15)
v_1, v_6, v_11, v_12 = _G(v_1, v_6, v_11, v_12)
v_2, v_7, v_8, v_13 = _G(v_2, v_7, v_8, v_13)
v_3, v_4, v_9, v_14 = _G(v_3, v_4, v_9, v_14)
v_0 = v_0.to_bytes(8, byteorder="little")
v_1 = v_1.to_bytes(8, byteorder="little")
v_2 = v_2.to_bytes(8, byteorder="little")
v_3 = v_3.to_bytes(8, byteorder="little")
v_4 = v_4.to_bytes(8, byteorder="little")
v_5 = v_5.to_bytes(8, byteorder="little")
v_6 = v_6.to_bytes(8, byteorder="little")
v_7 = v_7.to_bytes(8, byteorder="little")
v_8 = v_8.to_bytes(8, byteorder="little")
v_9 = v_9.to_bytes(8, byteorder="little")
v_10 = v_10.to_bytes(8, byteorder="little")
v_11 = v_11.to_bytes(8, byteorder="little")
v_12 = v_12.to_bytes(8, byteorder="little")
v_13 = v_13.to_bytes(8, byteorder="little")
v_14 = v_14.to_bytes(8, byteorder="little")
v_15 = v_15.to_bytes(8, byteorder="little")
return v_0 + v_1, \
v_2 + v_3, \
v_4 + v_5, \
v_6 + v_7, \
v_8 + v_9, \
v_10 + v_11, \
v_12 + v_13, \
v_14 + v_15
def rotate_xor(x, r):
x = np.uint64(x)
r = np.uint64(r)
return int((x >> r) ^ (x << (np.uint64(64) - r)))