forked from sgfvamll/XihuLunJian-AI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_config.py
120 lines (109 loc) · 3.44 KB
/
train_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
args = dict(
models = [
{
"name": 'RandomForestClassifier',
'params': {
'n_estimators': [60],
'max_depth': [12],
'min_samples_split': [6],
'min_samples_leaf': [2],
# 'max_features': [12],
'random_state': [123456],
'class_weight': [{0:1, 1:10}]
},
},
{
"name": 'ExtraTreesClassifier',
'params': {
# 'n_estimators': [70],
# 'max_depth': [13],
# 'min_samples_split': [8],
# 'min_samples_leaf': [2],
# 'max_features': [13],
# 'random_state': [123456],
'class_weight': [{0:1, 1:5}],
},
},
{
'name': 'AdaBoostClassifier',
'params': {
'n_estimators': [400],
'learning_rate': [0.5],
'random_state': [123456],
'algorithm': ['SAMME.R']
},
},
{
'name': 'GradientBoostingClassifier',
'params': {
'n_estimators': [60],
'max_depth': [12],
'min_samples_split': [6],
'min_samples_leaf': [2],
'max_features': [0.7],
'random_state': [123456]
# fit(X, y, sample_weight=None, monitor=None)
},
},
{
'name': 'BaggingClassifier',
'params': {
'n_estimators': [70],
'max_features': [0.8],
'random_state': [123456],
},
},
# {
# 'name': 'KNeighborsClassifier',
# 'params': {
# 'weights': ['distance'],
# 'algorithm': ['auto'],
# 'n_neighbors': [3]
# },
# },
# {
# 'name':'XGBClassifier',
# 'params': {
# }
# },
# {
# 'name': 'LogisticRegressionCV',
# 'params': {
# 'class_weight': ['balanced'],
# 'solver': ['saga'],
# 'max_iter': [3000],
# 'scoring': ['f1'],
# }
# },
],
scoring = "f1",
cv = 5,
n_jobs = 6,
# 设置为True,DataPrepare会重新生成train_train和train_test数据集
# 在调试、训练特征时使用,
TRAIN_Features = False,
# 训练集的比例
TRAIN_Percent = 0.6,
# 控制train.py、test.py加载train或test数据还是整个数据
NEED_TEST = False,
test_config = {
"models": [
'ExtraTreesClassifier',
'AdaBoostClassifier',
'GradientBoostingClassifier',
'RandomForestClassifier',
'BaggingClassifier',
# 'KNeighborsClassifier',
# 'LogisticRegressionCV'
# 'XGBClassifier'
],
"scoring" : "f1",
"cv" : 5,
"n_jobs" : 3
},
drop_cols = ['srcAddress', 'destAddress', 'tlsSubject', 'appProtocol', 'tlsIssuerDn', 'tlsSni', 'eventId'] +
['C', 'ST', 'L', 'O', 'OU', 'CN'] +
['label'] +
['tls_star', 'tls_XX', 'C_len', 'tls_some_state', 'unknown_len', 'tls_default', 'serialNumber_len'] +
['CN_len', 'ST_len', 'tlsVersion'] + ['ST', 'L', 'O', 'emailAddress', 'serialNumber'],
)