-
Notifications
You must be signed in to change notification settings - Fork 47
/
Search.cpp
385 lines (326 loc) · 9.52 KB
/
Search.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
// Search for binary signature pattern support
#include "SigMaker.h"
// Local search data container
struct SearchData
{
// Clone IDB byte database to RAM for fast pattern scanning
PBYTE buffer;
size_t size;
BOOL CloneIdb()
{
if (!buffer)
{
LOG_VERBOSE(__FUNCTION__ ": min_ea: 0x%llX, max_ea: 0x%llX, size: 0x%llX\n\n", (UINT64) inf.min_ea, (UINT64) inf.max_ea, (UINT64) (inf.max_ea - inf.min_ea));
// Allocate page buffer to encompass the whole the IDB region
size = (UINT64) (inf.max_ea - inf.min_ea);
buffer = (PBYTE) VirtualAlloc(NULL, size + 32, (MEM_COMMIT | MEM_RESERVE), PAGE_READWRITE);
if (buffer)
{
// Copy the IDB bytes to the buffer
// Simple loop much faster than: get_qword(), get_bytes(), etc.
// Note: For bytes that don't exist in the PE file, get_db_byte() will return 0xFF.
ea_t currentEa = inf.min_ea;
PBYTE ptr = buffer;
size_t count = size;
do
{
*ptr = (BYTE) get_db_byte(currentEa);
++currentEa, ++ptr, --count;
} while (count);
}
else
msg(MSG_TAG "** Failed to allocate the clone RAM buffer of size: 0x%llX ! **\n", size);
}
return buffer != NULL;
}
void Cleanup()
{
if (buffer)
{
VirtualFree(buffer, 0, MEM_RELEASE);
buffer = NULL;
}
}
// Most post 2013 Intel and 2015 AMD CPUs have "Advanced Vector Extensions 2" (AVX2) support
// 2022 86.65% https://store.steampowered.com/hwsurvey/
// https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2
BOOL TestAVX2Support()
{
enum { EAX, EBX, ECX, EDX };
int regs[4];
// Highest Function Parameter
__cpuid(regs, 0);
if (regs[EAX] >= 7)
{
// Extended Features
__cpuid(regs, 7);
return (regs[EBX] & /*AVX2*/ (1 << 5)) != 0;
}
return FALSE;
}
BOOL hasAVX2;
SearchData() : buffer(NULL), size(0)
{
hasAVX2 = TestAVX2Support();
}
~SearchData()
{
Cleanup();
}
} static searchData;
void SearchCleanup()
{
searchData.Cleanup();
}
//-------------------------------------------------------------------------------------------------
/*
AVX2 pattern scanner based on Wojciech Mula's avx2_strstr_anysize()
http://0x80.pl/articles/simd-strfind.html#generic-sse-avx2
Rules:
1) Expects input data to be at least align 32
2) SIG must be at least 3 byte in length
3) SIG must be trimmed (the first and last of the pattern can't be a wildcard/mask)
*/
static inline UINT32 get_first_bit_set(UINT32 x)
{
// Generates a single BSF instruction
unsigned long ret;
_BitScanForward(&ret, x);
return (UINT32) ret;
}
static inline UINT32 clear_leftmost_set(UINT32 value)
{
// Generates a single BLSR instruction
return value & (value - 1);
}
// Like memcmp() but takes a 3rd 'mask' argument
// Note: Tried optimizing, has little effect on cumulative scan speed
int memcmp_mask(const BYTE *buffer1, const BYTE *buffer2, const BYTE *mask2, size_t count)
{
while (count--)
{
if (*mask2)
{
if (*buffer1 != *buffer2)
return -1;
}
buffer1++, buffer2++, mask2++;
};
return 0;
}
// Find signiture pattern in memory
PBYTE FindSignatureAVX2(PBYTE data, size_t size, const SIG &sig, BOOL hasWildcards)
{
const BYTE *pat = sig.bytes.data();
size_t patLen = sig.bytes.size();
size_t patLen1 = (patLen - 1);
size_t patLen2 = (patLen - 2);
// Fill 'first' and 'last' with the first and last pattern byte respectively
const __m256i first = _mm256_set1_epi8(pat[0]);
const __m256i last = _mm256_set1_epi8(pat[patLen1]);
if(!hasWildcards)
{
// A little faster without wildcards
// Scan 32 bytes at the time..
for (size_t i = 0; i < size; i += 32)
{
// Load in the next 32 bytes of input first and last
// Can use align 32 bit read for first since the input is page aligned
const __m256i block_first = _mm256_load_si256((const __m256i*) (data + i));
const __m256i block_last = _mm256_loadu_si256((const __m256i*) (data + i + patLen1));
// Compare first and last data to get 32byte masks
const __m256i eq_first = _mm256_cmpeq_epi8(first, block_first);
const __m256i eq_last = _mm256_cmpeq_epi8(last, block_last);
// AND the equality masks and into a 32 bit mask
UINT32 mask = _mm256_movemask_epi8(_mm256_and_si256(eq_first, eq_last));
// Do pattern compare between first and last position if we got our first and last at this data position
while (mask != 0)
{
UINT32 bitpos = get_first_bit_set(mask);
if (memcmp(data + i + bitpos + 1, pat + 1, patLen2) == 0)
{
return data + i + bitpos;
}
mask = clear_leftmost_set(mask);
};
}
}
else
{
// Pattern scan with wildcards mask
const BYTE *msk = sig.mask.data();
for (size_t i = 0; i < size; i += 32)
{
const __m256i block_first = _mm256_load_si256((const __m256i*) (data + i));
const __m256i block_last = _mm256_loadu_si256((const __m256i*) (data + i + patLen1));
const __m256i eq_first = _mm256_cmpeq_epi8(first, block_first);
const __m256i eq_last = _mm256_cmpeq_epi8(last, block_last);
UINT32 mask = _mm256_movemask_epi8(_mm256_and_si256(eq_first, eq_last));
// Do a byte pattern w/mask compare between first and last position if we got our first and last
while (mask != 0)
{
UINT32 bitpos = get_first_bit_set(mask);
if (memcmp_mask(data + i + bitpos + 1, pat + 1, msk + 1, patLen2) == 0)
{
return data + i + bitpos;
}
mask = clear_leftmost_set(mask);
};
}
}
return NULL;
}
// ------------------------------------------------------------------------------------------------
// Find signiture pattern in memory
// Base memory search reference, about 10x slower than the AVX2 version
PBYTE FindSignature(PBYTE input, size_t inputLen, const SIG &sig, BOOL hasWildcards)
{
if (!hasWildcards)
{
// If no wildcards, faster to use a memcmp() type
const BYTE *pat = sig.bytes.data();
const BYTE *end = (input + inputLen);
const BYTE first = *pat;
size_t sigLen = sig.bytes.size();
// Setup last in the pattern length byte quick for rejection test
size_t lastIdx = (sigLen - 1);
BYTE last = pat[lastIdx];
for (PBYTE ptr = input; ptr < end; ++ptr)
{
if ((ptr[0] == first) && (ptr[lastIdx] == last))
{
if (memcmp(ptr+1, pat+1, sigLen-2) == 0)
return ptr;
}
}
}
else
{
const BYTE *pat = sig.bytes.data();
const BYTE *msk = sig.mask.data();
const BYTE *end = (input + inputLen);
const BYTE first = *pat;
size_t sigLen = sig.bytes.size();
size_t lastIdx = (sigLen - 1);
BYTE last = pat[lastIdx];
for (PBYTE ptr = input; ptr < end; ++ptr)
{
if ((ptr[0] == first) && (ptr[lastIdx] == last))
{
const BYTE *patPtr = pat+1;
const BYTE *mskPtr = msk+1;
const BYTE *memPtr = ptr+1;
BOOL found = TRUE;
for (int i = 0; (i < sigLen-2) && (memPtr < end); ++mskPtr, ++patPtr, ++memPtr, i++)
{
if (!*mskPtr)
continue;
if (*memPtr != *patPtr)
{
found = FALSE;
break;
}
}
if (found)
return ptr;
}
}
}
return NULL;
}
// ------------------------------------------------------------------------------------------------
// Reference version search
static SSTATUS SearchSignature(PBYTE input, size_t inputLen, const SIG &sig)
{
size_t sigSize = sig.bytes.size();
size_t len = inputLen;
size_t count = 0;
BOOL hasWildcards = sig.hasMask();
inputLen -= sigSize;
// Search for signature match..
PBYTE match = FindSignature(input, len, sig, hasWildcards);
while (match)
{
// Stop now if we've hit two matches
if (++count >= 2)
break;
++match;
len = (inputLen - (int) (match - input));
if (len < sigSize)
break;
// Next search
match = FindSignature(match, len, sig, hasWildcards);
};
SSTATUS status;
switch (count)
{
case 0: status = SSTATUS::NOT_FOUND; break;
case 1: status = SSTATUS::UNIQUE; break;
default: status = SSTATUS::NOT_UNIQUE; break;
};
// Only happens when there is an error in the search algorithm during development/testing
if (status == SSTATUS::NOT_FOUND)
{
msg("\n** " __FUNCTION__ ": Sig not found! **\n");
qstring tmp;
sig.ToIdaString(tmp);
msg("(%u) \"%s\"\n\n", (UINT32) sig.bytes.size(), tmp.c_str());
}
return status;
}
// Fast AVX2 based search
static SSTATUS SearchSignatureAVX2(PBYTE input, size_t inputLen, const SIG &sig)
{
size_t sigSize = sig.bytes.size();
size_t len = inputLen;
size_t count = 0;
BOOL hasWildcards = sig.hasMask();
inputLen -= sigSize;
PBYTE match = FindSignatureAVX2(input, len, sig, hasWildcards);
while (match)
{
if (++count >= 2)
break;
++match;
len = (inputLen - (int) (match - input));
if (len < sigSize)
break;
match = FindSignatureAVX2(match, len, sig, hasWildcards);
};
SSTATUS status;
switch (count)
{
case 0: status = SSTATUS::NOT_FOUND; break;
case 1: status = SSTATUS::UNIQUE; break;
default: status = SSTATUS::NOT_UNIQUE; break;
};
// Only happens when there is an error in the search algorithm during development/testing
if (status == SSTATUS::NOT_FOUND)
{
msg("\n** " __FUNCTION__ ": Sig not found! **\n");
qstring tmp;
sig.ToIdaString(tmp);
msg("(%u) \"%s\"\n\n", (UINT32) sig.bytes.size(), tmp.c_str());
}
return status;
}
// Search for signiture pattern, returning a status result
SSTATUS SearchSignature(const SIG &sig)
{
// Setup IDB RAM clone on first scan
if (!searchData.CloneIdb())
return SSTATUS::NOT_FOUND;
if (searchData.hasAVX2)
return SearchSignatureAVX2(searchData.buffer, searchData.size, sig);
else
{
static BOOL warnOnce = TRUE;
if ((settings.outputLevel >= SETTINGS::LL_VERBOSE) && warnOnce)
{
warnOnce = FALSE;
msg(__FUNCTION__ ": * Using non-AVX2 reference search *\n");
}
return SearchSignature(searchData.buffer, searchData.size, sig);
}
}