-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhash_cracker_ray_ready.py
181 lines (146 loc) · 6.02 KB
/
hash_cracker_ray_ready.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import argparse
import itertools
import math
from datetime import datetime
from hashlib import sha256
from typing import Optional
import ray
ray.init()
# some great reads about ray's wait API:
# https://medium.com/distributed-computing-with-ray/ray-tips-and-tricks-part-i-ray-wait-9ed7a0b9836d
# https://rise.cs.berkeley.edu/blog/ray-tips-for-first-time-users/
# some sample phrases to crack
#
# 'G0!3m': 070063ad89872aaa0fc0a2f170be5641e9c5d201d25b76703a4be3ee3848016c
# 'golem': 4c5cddb7859b93eebf26c551518c021a31fa0013b2c03afa5b541cbc8bd079a6
# '9Lm!': de6c0da53ac2bf2b6954e400767106011e4471db7a412cce0388e3441e0ad2ec
# `test`: 9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08
# `foo`: 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae
# `glm`: 2bf548d8056029c73e6e28132d19a3a277a49daf32b1c1ba7b0b7fc7e78bf5cd
# `be`: 46599c5bb5c33101f80cea8438e2228085513dbbb19b2f5ce97bd68494d3344d
# `x`: 2d711642b726b04401627ca9fbac32f5c8530fb1903cc4db02258717921a4881
# the character table that we want to use to construct possible phrases
CHARS = [
chr(c)
for c in itertools.chain(
range(ord("a"), ord("z") + 1),
range(ord("A"), ord("Z") + 1),
range(ord("0"), ord("9") + 1),
range(ord(" "), ord("/") + 1),
range(ord(":"), ord("@") + 1),
range(ord("["), ord("`") + 1),
range(ord("{"), ord("~") + 1),
)
]
start_time = datetime.now()
parser = argparse.ArgumentParser()
parser.add_argument(
"-l",
"--length",
type=int,
default=3,
help="brute force length, default: %(default)s",
)
parser.add_argument(
"-c",
"--num-chunks",
help="number of chunks to divide the range into, default=%(default)s",
type=int,
default=16,
)
parser.add_argument("hash", type=str)
args = parser.parse_args()
def str_to_int(value: str) -> int:
"""
Convert a string to its numerical value.
Each character in the string is treated as a non-zero digit
(the value of which is the index in the CHARS table)
in a base equal to the length of the CHARS table.
:param value: the string of "digits" to convert
:return: its integer value after conversion from base[len(chars)]
"""
base = len(CHARS) + 1
intval = 0
for position, digit in zip(
itertools.count(), [CHARS.index(v) + 1 for v in reversed(value)]
):
intval += digit * base**position
return intval
def int_to_str(intval: int, round_nulls=False) -> Optional[str]:
"""
Convert an integer value back to the equivalent string.
Treats the CHARS table as a table of "digits" in
a numerical system with a base equal to the length of the CHARS table.
:param intval: the integer value to convert
:param round_nulls: whether to round a "number" containing null values to the closest proper string.
if set to False, `int_to_str` will just return a `None`
:return: the resultant string, or `None` if the string contains "zeros" (in base len[CHARS])
"""
div = intval
output = ""
base = len(CHARS) + 1
while div > 0:
div, mod = divmod(div, base)
if mod > 0:
output = CHARS[mod - 1] + output
elif round_nulls:
# "round up"
output = CHARS[0] * (len(output) + 1)
else:
return None
return output
@ray.remote
def scan_range(searched_hash: str, start: int, end: int) -> Optional[str]:
"""
scan a specific range of the word space for a matching hash
the extents of the range are specified as integers which correspond to numbers,
the digits of which are characters from the CHARS table,
with a base equal to the length of the CHARS table.
that way, we can trivially divide a wider range into chunks of specific size.
:param searched_hash: the hash, for which we want to find a matching word
:param start: searched range start
:param end: searched range end
:return: a matching word, or `None` if no match was found within the range
"""
print(
f"scanning: {searched_hash}: {int_to_str(start, True)}, {int_to_str(end, True)}",
)
for i in range(start, end):
word = int_to_str(i)
if word:
word_hash = sha256(bytes(word, "utf-8")).hexdigest()
if word_hash == searched_hash:
return word
# we start with a string consisting of a single, first character from the character set
# e.g. `a`
start_space = str_to_int(CHARS[0])
# we traverse until a string consisting of the last character from the set, repeated `length` times
# since a range doesn't contain its upper extent, we specify the next "number"
# which is a string consisting of `length`+1 of the first character
# e.g. `aaaa` when the length if equal to 3
end_space = str_to_int(CHARS[0] * (args.length + 1))
# round the chunk size up so that at most the specified number of chunks
# is required to cover the whole range
chunk_size = math.ceil((end_space - start_space) / args.num_chunks)
results = []
for c in range(0, args.num_chunks):
# we don't want to search beyond the end of the range,
# so we clamp the start/end values to the whole range's bounds
start_chunk = min(end_space, start_space + c * chunk_size)
end_chunk = min(end_space, start_chunk + chunk_size)
# asynchronously submit all the jobs without waiting for any of them to complete
# the scan_range function is our hotspot, so we want to parallelize it as much as possible
chunk_result = scan_range.remote(args.hash, start_chunk, end_chunk)
# add object ids of all the jobs to the list
results.append(chunk_result)
# the below code was in the original version, it was blocking and didn't allow for parallelization
# if chunk_result:
# result = ray.get(chunk_result)
# break
# we only need the first result, so we synchronously wait for it to complete
completed, _ = ray.wait(results, num_returns=1)
if completed:
print(
f"finished in {datetime.now() - start_time},",
f"match found: {ray.get(completed)[0]}" if completed else f"match not found",
)