-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathOfficeMalGrabber.py
executable file
·328 lines (303 loc) · 14.8 KB
/
OfficeMalGrabber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 expandtab
import zipfile
import os.path
import sys
import imp
import argparse
import textwrap
import shutil
import json
import core.littleEndian as littleEndian
import core.OleFileIO_PL as OleFileIO_PL
__author__ = "holger huettl, jan goebel <goebel@pi-one.net>"
__version__ = "0.0.2"
def omg_unzip(path, extractionFolder):
extractionFolder = os.path.abspath(extractionFolder)
tempString = extractionFolder
if not os.path.exists(extractionFolder):
os.makedirs(extractionFolder)
zfile = zipfile.ZipFile(path)
zfile.extractall(extractionFolder)
zfile.close()
return extractionFolder
def getFat(binaryContent, sectorsize):
header = binaryContent[76:sectorsize]
fatSectors = []
current = 0
fatSect = littleEndian.readInt(header, current)
while not fatSect in (0xFFFFFFFEL, 0xFFFFFFFFL) and current < 512:
fatSectors += [fatSect+1]
current += 4
fatSect = littleEndian.readInt(header, current)
myFat = []
for fatSect in fatSectors:
current = 0
sect = binaryContent[fatSect*sectorsize:(fatSect+1)*sectorsize]
while current < sectorsize:
if littleEndian.readInt(sect, current) != 0xffffffff:
myFat += [littleEndian.readInt(sect, current)]
current += 4
return myFat
if __name__ == '__main__':
workdir = sys.path[0]
os.chdir(workdir)
helpText = '''
--fileName {name of file to scan for malware}
-f {name of file to scan for malware}
--recursive {folder to scan for office malware}
-r {folder to scan for office malware}
--extractionFolder {folder to use, if files are created/unzipped during scan}
-e {folder to use, if files are created/unzipped during scan}
'''
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter)
group = parser.add_mutually_exclusive_group()
group.add_argument('--fileName','-f', type=str, help='scan particular file')
group.add_argument('--recursive', '-r', type=str, help='recursively traverse given directory for files')
parser.add_argument('--extractionFolder', '-e', type=str, default='.', help='extract files such as macro code to this folder (default: current)')
parser.add_argument('--quiet','-q', action="store_true", help='disable noisy output')
parser.add_argument('--json','-j', action="store_true", help='output results in json format')
args = parser.parse_args()
json_response = []
filesToScan = []
if not args.recursive and not args.fileName:
parser.print_help()
sys.exit()
if args.recursive:
for dirname, dirnames, filenames in os.walk(args.recursive):
for file in filenames:
filesToScan.append(os.path.abspath(os.path.join(dirname, file)))
else:
filesToScan = [args.fileName]
for fileName in filesToScan:
json_result = {'filename': fileName, 'debug': [], 'detections': [], 'signatures': [], 'threat index': 0}
if not os.path.exists(fileName):
if args.json:
json_result['debug'].append("no such file: %s" % (fileName))
json_response.append(json_result)
else:
print "no such file: %s" % (fileName)
continue
#[PL]: added constants for Sector IDs (from AAF specifications)
MAXREGSECT = 0xFFFFFFFAL; # maximum SECT
DIFSECT = 0xFFFFFFFCL; # (-4) denotes a DIFAT sector in a FAT
FATSECT = 0xFFFFFFFDL; # (-3) denotes a FAT sector in a FAT
ENDOFCHAIN = 0xFFFFFFFEL; # (-2) end of a virtual stream chain
FREESECT = 0xFFFFFFFFL; # (-1) unallocated sector
MAGIC_VALUES = [MAXREGSECT, DIFSECT, FATSECT, ENDOFCHAIN, FREESECT]
line = '_'*60
doubleLine = '='*60
fileFormat = ''
docType = ''
if not args.quiet and not args.json:
print doubleLine
if args.json:
json_result['filename'] = fileName
else:
print 'scanning document-file:', fileName
if not args.quiet and not args.json:
print line
sys.stdout.flush()
try:
Module_VBA = imp.load_source('Module_VBA', 'modules/VBA/Module_VBA.py')
if not args.quiet and not args.json:
print "checking file format ...",
sys.stdout.flush()
if OleFileIO_PL.isOleFile(fileName):
fileFormat = '/OLE'
if not args.quiet and not args.json:
print fileFormat
sys.stdout.flush()
ole = OleFileIO_PL.OleFileIO(fileName)
'''attempt to scan for malware placed behind FAT-addressed storage
#the document being scanned is in the old OLE-format
#sectors are either 512 or 4096 bytes long.
#in any case they are a multiple of 512 bytes
with open(fileName, 'rb') as OLEfile:
buf = OLEfile.read()
if len(buf) % 512 != 0:
print 'filesize is not a multiple of 512 bytes. File might contain data behind FAT-addressed storage'
#check if there is more data behind the data addressed by FAT
fat = getFat(buf, ole.SectorSize)
if (len(fat)+1)*ole.SectorSize < len(buf):
print 'found not addressed data behind FAT-storage'
dumpFileName = fileName.split('.')[0] + 'Overhang.bin'
with open(dumpFileName,'wb') as dump:
dump.write(buf[(len(fat)+1)*ole.SectorSize:])
print 'saved overhang to:', dumpFileName
'''
if ole.exists('WordDocument'):
docType = '/word'
elif ole.exists('Workbook'):
docType = '/xl'
elif ole.exists('PowerPoint Document'):
docType = '/ppt'
else:
if args.json:
json_result['debug'].append('file seems to be neither .docx, .xlsx nor .pptx')
json_response.append(json_result)
else:
print 'file seems to be neither .docx, .xlsx nor .pptx'
#skip this file as it is probably an activeX.bin
continue
try:
if not args.json:
print ".: checking for macro code ...",
extractor = Module_VBA.VBA_Mod(fileName, 1, docType, args, json_result)
extractor.extractMacroCode()
except Exception as e:
if args.json:
json_result['debug'].append('%s' % (e))
else:
print e
try:
if not args.json:
print ".: checking for flash objects ...",
Module_Flashobject = imp.load_source('Module_Flashobject', 'modules/flash/Module_Flashobject.py')
flashMod = Module_Flashobject.Flash_Mod(fileName, 1, docType, args, json_result)
flashMod.locateFlashObjects()
except Exception as e:
if args.json:
json_result['debug'].append('%s' % (e))
else:
print e
try:
if not args.json:
print ".: checking for javascript code ...",
Module_Javascript = imp.load_source('Module_Javascript', 'modules/javascript/Module_Javascript.py')
JSMod = Module_Javascript.JS_Mod(fileName, 1, docType, args, json_result)
JSMod.locateJavascriptSource()
except Exception as e:
if args.json:
json_result['debug'].append('%s' % (e))
else:
print e
extractionFolder = None
else:
#the document being scanned is in the new xml-based format
fileFormat = '/XML'
if not args.quiet and not args.json:
print fileFormat
sys.stdout.flush()
""" determine folder where to extract XML parts """
folderName = fileName.rsplit('.', 1)[0].rsplit('/', 1)[-1]
folderName = args.extractionFolder + '/' + folderName
if args.extractionFolder == '.':
folderName = fileName.rsplit('.', 1)[0]
if zipfile.is_zipfile(fileName):
if not args.quiet and not args.json:
print "extracting file ...",
sys.stdout.flush()
try:
extractionFolder = omg_unzip(fileName, folderName)
except zipfile.BadZipfile:
if args.json:
json_result['debug'].append('failed to extract XML-based document:', fileName)
json_response.append(json_result)
else:
print
print 'failed to extract XML-based document:', fileName
continue
if not args.quiet and not args.json:
print "done"
sys.stdout.flush()
else:
if args.json:
json_result['debug'].append("this is not a zip file")
json_response.append(json_result)
else:
print "this is not a zip file"
continue
if os.path.exists(os.path.join(folderName, "word")):
docType = '/word'
elif os.path.exists(os.path.join(folderName, "xl")):
docType = '/xl'
elif os.path.exists(os.path.join(folderName, "ppt")):
docType = '/ppt'
else:
if args.json:
json_result['debug'].append('could not determine filetype, skipping this file (%s)' % (folderName))
json_response.append(json_result)
else:
print 'could not determine filetype, skipping this file (%s)' % (folderName)
continue
#search for VBA-Macros
if not args.quiet and not args.json:
print "searching for VBA ...",
sys.stdout.flush()
extractor = Module_VBA.VBA_Mod(folderName, 0, docType, args, json_result)
extractor.extractMacroCode()
#search for flash-objects
if not args.quiet and not args.json:
print "searching for FLASH ...",
sys.stdout.flush()
Module_Flashobject = imp.load_source('Module_Flashobject', 'modules/flash/Module_Flashobject.py')
#import modules/flash/Module_Flashobject
flashMod = Module_Flashobject.Flash_Mod(folderName, 0, docType, args, json_result)
flashMod.locateFlashObjects()
#search for javascript aka MS scriptlett-component
if not args.quiet and not args.json:
print "searching for JavaScript ...",
sys.stdout.flush()
Module_Javascript = imp.load_source('Module_Javascript', 'modules/javascript/Module_Javascript.py')
#import modules/javascript/Module_Javascript
JSMod = Module_Javascript.JS_Mod(folderName, 0, docType, args, json_result)
JSMod.locateJavascriptSource()
#load and run cve-detection-plugins, which are suitable for the current document file
#put your plugins in the corresponding folder in modules/CVE_detection/fileFormat/docType
#a plugin is must have a wrapper-function, which calls the constructor of the actual module
#this wrapper is called: getNewInstance(fileName, docType) and takes:
#fileName: path to the document being scanned
#doctype: defines wether the document is a word-, excel- or powerpoint-document
#be sure too add this wrapper to your newly created plugins
if not args.quiet and not args.json:
print ".: loading plugins ...",
sys.stdout.flush()
pluginLoader = imp.load_source('pluginLoader', 'modules/CVE_detection/pluginLoader.py')
detectors = pluginLoader.pluginLoader(fileFormat, docType, fileName, extractionFolder, args, json_result)
if not args.quiet and not args.json:
print "done"
sys.stdout.flush()
detectors.runDetectors()
if not args.quiet and not args.json:
print line
if extractionFolder:
try:
pass
#shutil.rmtree(extractionFolder)
except StandardError as e:
print e
if args.json:
if len(json_result['signatures'])>0:
json_result['threat index'] += 5
if len(json_result['detections'])>0:
json_result['threat index'] += 2
json_response.append(json_result)
except IOError as e:
for arg in e.args:
if not isinstance(arg, str):
continue
if 'malformed OLE' in arg:
if args.json:
json_result['debug'].append('WARNING: document: ' + os.path.abspath(fileName) + ' seems to be damaged!')
json_result['debug'].append('this might be an indicator for embedded malware')
else:
print 'WARNING: document: ' + os.path.abspath(fileName) + ' seems to be damaged!'
print 'this might be an indicator for embedded malware'
break
else:
if args.json:
json_result['debug'].append('document: ' + os.path.abspath(fileName) + ' caused ' + str(type(e)))
json_result['debug'].append(e.args)
else:
print 'document: ' + os.path.abspath(fileName) + ' caused ' + str(type(e))
print e.args
break
if not args.quiet and not args.json:
print line
if args.json:
json_response.append(json_result)
continue
if args.json:
print json.dumps(json_response, sort_keys=False, indent=4, separators=(',', ': '))