-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgui.py
272 lines (226 loc) · 8.79 KB
/
gui.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#GUI for (PE) Malware Detection using UI (Production=Client)
#IMPORTANT: please make sure both 'malware_models.pkl' and 'gui.py' files are in the same folder
import pickle
import pandas as pd
import PySimpleGUI as sg #pip install PySimpleGUI
from xgboost import XGBClassifier #pip install xgboost==0.90
import itertools
import yara #pip install yara-python
import numpy as np
# Getting back the models and the features encoder data:
with open('malware_models.pkl','rb') as f: # Python 3: open(..., 'rb')
model_name, model_list, f1_score_all, accuracy_all, newlist = pickle.load(f)
sg.theme('Dark Blue 3') # please make your windows colorful
#reading file path
layout = [[sg.Text('Select PE File (.exe)')],
#[sg.Text('Source for Folders', size=(15, 1)), sg.InputText(), sg.FolderBrowse()],
[sg.Text('Path ', size=(15, 1)), sg.InputText(), sg.FileBrowse()],
[sg.Submit(), sg.Cancel()]]
window = sg.Window('Select File...', layout)
event, values = window.read()
window.close()
#folder_path, file_path = values[0], values[1] # get the data from the values dictionary
file_path = values[0]
test_dataset=pd.DataFrame([file_path],columns=['hash']) #type 0==benign | 1==malware
#print(file_path)
#configs for yara rules
#Path to the folder containing downloaded files in the first part
rules_path = 'yara_rules/'
#Read yara rules files
peid_rules = yara.compile(rules_path + 'peid.yar')
packer_rules = yara.compile(rules_path + 'packer.yar')
crypto_rules = yara.compile(rules_path + 'crypto_signatures.yar')
antidebug_antivm_rules = yara.compile(rules_path + 'antidebug_antivm.yar')
capabilities_rules = yara.compile(rules_path + 'capabilities.yar')
#rules lists are here
rules_list=[peid_rules,packer_rules,crypto_rules,
antidebug_antivm_rules,capabilities_rules]
rules_names_list=["peid_rules","packer_rules","crypto_rules",
"antidebug_antivm_rules","capabilities_rules"]
#doing analsys here
#function for genrating features from yar rules
def add_feature_yara(dataframe,rules,rules_names):
for index_rule in range(len(rules)):
feature_list=[]
for index_hash in list(dataframe['hash']):
result=rule_scan_hash(index_hash,rules[index_rule])
feature_list.append(result)
dataframe[rules_names[index_rule]+'_feature']=feature_list
def rule_scan_hash(hash_var,rule):
try:
return rule.match(hash_var)
except:
return []
#PE Features functions
#function for PE
def pe_list_imported_DLLs(hash_var):
try:
pe = pefile.PE(hash_var)
# print("[*] Listing imported DLLs...")
list_dll=[]
for entry in pe.DIRECTORY_ENTRY_IMPORT:
# print('\t' + entry.dll.decode('utf-8'))
list_dll.append(entry.dll.decode('utf-8'))
return list_dll
except:
list_dll=[]
return list_dll
def pe_list_imported_DLLs_imports(hash_var,dll_name):
try:
pe = pefile.PE(hash_var)
# print("[*] Listing imported DLLs imports...")
list_imports=[]
for entry in pe.DIRECTORY_ENTRY_IMPORT:
temp_dll_name = entry.dll.decode('utf-8')
if temp_dll_name == dll_name:
print("[*] Kernel32.dll imports:")
for func in entry.imports:
# print("\t%s at 0x%08x" % (func.name.decode('utf-8'), func.address))
list_imports.append(func.name.decode('utf-8'))
return list_imports
except:
list_imports=[]
return list_imports
def pe_sections_name(hash_var):
try:
pe = pefile.PE(hash_var)
list_section=[]
for section in pe.sections:
# print(section.Name.decode('utf-8'))
try:
list_section.append(re.sub(r'\x00', '',section.Name.decode('utf-8'))) #remove \x00 from the section name
except:
list_section.append('faild')
return list_section
except:
list_section=[]
return list_section
def add_feature_PE(dataframe):
# for index_rule in range(len(rules)):
#adding sections_name
feature_list_sections_name=[]
for index_hash in list(dataframe['hash']):
result=pe_sections_name(index_hash)
feature_list_sections_name.append(result)
dataframe['sections_name_feature']=feature_list_sections_name
#adding list_imported_DLLs
feature_list_dll=[]
for index_hash in list(dataframe['hash']):
result=pe_list_imported_DLLs(index_hash)
feature_list_dll.append(result)
dataframe['imported_DLLs_feature']=feature_list_dll
#(this section takes very long time)
# #adding list_imported_DLLs_imports
# feature_list_dll_import=[]
# for index_hash in list(dataframe['hash']):
# for index_dll in feature_list_dll:
# result = pe_list_imported_DLLs_imports(index_hash,index_dll)
# feature_list_dll_import.append(result)
# dataframe['imported_DLLs_imports_feature']=feature_list_dll_import
return dataframe
#function for geting features list
def get_new_columns(colum_values):
#get unige values of the colum
newlist=[]
for x in itertools.chain.from_iterable(np.unique(colum_values)):
if x not in newlist:
newlist.append(x)
newlist = [str(s) for s in newlist]
return newlist
#function for one-hot encoding features list
def encode_lists(colum_values):
newlist = get_new_columns(colum_values)
#one hot encode
list_indexs=[]
for x in colum_values:
list_zero=[0]*len(newlist)
if len(colum_values)>0:
for i in range(len(x)):
index = newlist.index(str(x[i]))
list_zero[index]=1
list_indexs.append(list_zero)
newdf=pd.DataFrame(list_indexs,columns=newlist)
return newdf
# #appling encoding for all coulmns
for col in test_dataset.columns[1:]:
if col == test_dataset.columns[1]:
colum_values=test_dataset[col].values
features_df_test=encode_lists(colum_values)
else:
colum_values=test_dataset[col].values
features_df_test=pd.concat([features_df_test, encode_lists(colum_values)], axis=1)
newlist_test=list(features_df_test.columns)
# print(newlist)
features_to_pridict=[]
for x in range(len(features_df_test)):
list_zero=[0]*len(newlist)
for col in newlist_test:
if col in newlist:
result=list(features_df_test[col].values)[x]
index = newlist.index(col)
list_zero[index]=result
features_to_pridict.append(list_zero)
pred=[]
for model in range(len(model_list)):
pred.append(model_list[model].predict(features_to_pridict)[0])
# layout the window
layout = [[sg.Text('Extracting Features')],
[sg.ProgressBar(2, orientation='h', size=(20, 20), key='progressbar')],
[sg.Cancel()]]
# create the window`
window = sg.Window('Analyse', layout)
progress_bar = window['progressbar']
# loop that would normally do something useful
while True:
# check to see if the cancel button was clicked and exit loop if clicked
event, values = window.read(timeout=10)
if event == 'Cancel' or event == sg.WIN_CLOSED:
break
add_feature_yara(test_dataset,rules_list,rules_names_list)
progress_bar.UpdateBar(1)
event, values = window.read(timeout=10)
if event == 'Cancel' or event == sg.WIN_CLOSED:
break
test_dataset=add_feature_PE(test_dataset)
progress_bar.UpdateBar(2)
break
# done with loop... need to destroy the window as it's still open
window.close()
### Show Results
def get_color(val):
if val>0:
return 'red'
else:
return 'green'
def print_value(val):
if val>0:
return 'Malicious'
else:
return 'Benign'
window = sg.Window('Results') # blank window
col12= [[sg.Text(print_value(pred[0]),text_color=get_color(pred[0]))],
[sg.Text(print_value(pred[1]),text_color=get_color(pred[1]))],
[sg.Text(print_value(pred[2]),text_color=get_color(pred[2]))],
[sg.Text(print_value(pred[3]),text_color=get_color(pred[3]))],
[sg.Text(print_value(pred[4]),text_color=get_color(pred[4]))],
[sg.Text(print_value(pred[5]),text_color=get_color(pred[5]))],
[sg.Text(print_value(pred[6]),text_color=get_color(pred[6]))]]
col11 =[[sg.Text('XGBoost detected: ')],
[sg.Text('Random Forest detected:')],
[sg.Text('Decision Tree detected:')],
[sg.Text('AdaBoost detected: ')],
[sg.Text('Naive Bayes detected: ')],
[sg.Text('SGD detected: ')],
[sg.Text('MLP detected: ')]]
col2 = [[sg.Text(' Risk Level')],[sg.VerticalSeparator(10),sg.ProgressBar(7, orientation='v', size=(20, 60), key='progressbar')]]
layout = [[sg.Column(col11),sg.Column(col12),sg.VerticalSeparator(),sg.Column(col2)],
[sg.Text(f'\n\n Final Score (RISK) {int(np.sum(pred)/len(pred)*100)}% ({np.sum(pred)} out of 7)\n\n')],
[sg.OK()]]
# Display the window and get values
window = sg.Window('Results', layout)
while True:
event, values = window.read(timeout=10)
if event == 'OK' or event == sg.WIN_CLOSED:
break
window['progressbar'].update(np.sum(pred))
window.close()