-
Notifications
You must be signed in to change notification settings - Fork 0
/
SGST_model_MC.py
251 lines (216 loc) · 14.2 KB
/
SGST_model_MC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
'''
author:Donglin Zhou
time:2021.12.15
function:SGST-model
使用情感引导和价格序列引导的未来股市长期价格预测
使用的情感数据为MC字典
'''
from new_attention import *
from sparse_attention import *
# 前馈神经网络
class PointWiseFeedForward(torch.nn.Module):
def __init__(self, hidden_units, dropout_rate):
super(PointWiseFeedForward, self).__init__()
# 一维卷积,输入向量维度,输出向量维度kernel_size卷积步长
self.conv1 = torch.nn.Conv1d(hidden_units, hidden_units, kernel_size=1)
self.dropout1 = torch.nn.Dropout(p=dropout_rate)
self.relu = torch.nn.ReLU()
self.conv2 = torch.nn.Conv1d(hidden_units, hidden_units, kernel_size=1)
self.dropout2 = torch.nn.Dropout(p=dropout_rate)
def forward(self, inputs):
# 最后两维转置,然后进行一维卷积,然后dropout,然后relu激活函数,然后再进行一次一维卷积
outputs = self.dropout2(self.conv2(self.relu(self.dropout1(self.conv1(inputs.transpose(-1, -2))))))
# as Conv1D requires (N, C, Length) 转置回(128,200,50)
outputs = outputs.transpose(-1, -2)
outputs += inputs
return outputs
class SGST(torch.nn.Module):
def __init__(self, args):
super(SGST, self).__init__()
self.dev = args.device
self.attention_layernorms = torch.nn.ModuleList()
self.attention_layers = torch.nn.ModuleList()
self.forward_layernorms = torch.nn.ModuleList()
self.forward_layers = torch.nn.ModuleList()
self.feature1_attention_layernorms = torch.nn.ModuleList()
self.feature1_attention_layers = torch.nn.ModuleList()
self.feature1_forward_layernorms = torch.nn.ModuleList()
self.feature1_forward_layers = torch.nn.ModuleList()
self.feature2_attention_layernorms = torch.nn.ModuleList()
self.feature2_attention_layers = torch.nn.ModuleList()
self.feature2_forward_layernorms = torch.nn.ModuleList()
self.feature2_forward_layers = torch.nn.ModuleList()
self.feature3_attention_layernorms = torch.nn.ModuleList()
self.feature3_attention_layers = torch.nn.ModuleList()
self.feature3_forward_layernorms = torch.nn.ModuleList()
self.feature3_forward_layers = torch.nn.ModuleList()
self.pos_attention_layernorms = torch.nn.ModuleList()
self.pos_attention_layers = torch.nn.ModuleList()
self.pos_forward_layernorms = torch.nn.ModuleList()
self.pos_forward_layers = torch.nn.ModuleList()
self.neg_attention_layernorms = torch.nn.ModuleList()
self.neg_attention_layers = torch.nn.ModuleList()
self.neg_forward_layernorms = torch.nn.ModuleList()
self.neg_forward_layers = torch.nn.ModuleList()
self.sub_attention_layernorms = torch.nn.ModuleList()
self.sub_attention_layers = torch.nn.ModuleList()
self.sub_forward_layernorms = torch.nn.ModuleList()
self.sub_forward_layers = torch.nn.ModuleList()
self.sparse_attention_layers = torch.nn.ModuleList()
self.seqs_embedd = torch.nn.Linear(1, args.hidden_units).cuda()
self.feature1_embedd = torch.nn.Linear(1, args.hidden_units).cuda()
self.feature2_embedd = torch.nn.Linear(1, args.hidden_units).cuda()
self.feature3_embedd = torch.nn.Linear(1, args.hidden_units).cuda()
self.pos_embedd = torch.nn.Linear(1, args.hidden_units).cuda()
self.neg_embedd = torch.nn.Linear(1, args.hidden_units).cuda()
self.sub_embedd = torch.nn.Linear(1, args.hidden_units).cuda()
self.seq_last_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature1_last_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature2_last_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature3_last_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.price_concat_layernorm = torch.nn.LayerNorm(args.hidden_units * 7, eps=1e-8).cuda()
self.price_lstm_layer = torch.nn.LSTM(input_size=args.hidden_units * 7, hidden_size=args.hidden_units,
batch_first=True).cuda()
self.price_final_emd = torch.nn.Linear(args.hidden_units*2, 1).cuda()
self.final_norm = torch.nn.LayerNorm(args.hidden_units*2, eps=1e-8)
self.sparse_lstm_layer = torch.nn.LSTM(input_size=args.hidden_units, hidden_size=args.hidden_units,
batch_first=True).cuda()
self.seqs_final = torch.nn.Linear(args.hidden_units, 1).cuda()
# 2层
for _ in range(args.num_blocks):
# 归一化层
new_attn_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.attention_layernorms.append(new_attn_layernorm)
new_attn_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature1_attention_layernorms.append(new_attn_layernorm)
new_attn_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature2_attention_layernorms.append(new_attn_layernorm)
new_attn_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature3_attention_layernorms.append(new_attn_layernorm)
new_attn_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.pos_attention_layernorms.append(new_attn_layernorm)
new_attn_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.neg_attention_layernorms.append(new_attn_layernorm)
new_attn_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.sub_attention_layernorms.append(new_attn_layernorm)
# attention_layers添加多头注意力层
new_attn_layer = Multihead_attention(num_units=args.hidden_units, num_heads=args.num_heads,
dropout_rate=args.dropout_rate, causality=True)
self.attention_layers.append(new_attn_layer)
new_attn_layer = Multihead_attention(num_units=args.hidden_units, num_heads=args.num_heads,
dropout_rate=args.dropout_rate, causality=True)
self.feature1_attention_layers.append(new_attn_layer)
new_attn_layer = Multihead_attention(num_units=args.hidden_units, num_heads=args.num_heads,
dropout_rate=args.dropout_rate, causality=True)
self.feature2_attention_layers.append(new_attn_layer)
new_attn_layer = Multihead_attention(num_units=args.hidden_units, num_heads=args.num_heads,
dropout_rate=args.dropout_rate, causality=True)
self.feature3_attention_layers.append(new_attn_layer)
new_attn_layer = Multihead_attention(num_units=args.hidden_units, num_heads=args.num_heads,
dropout_rate=args.dropout_rate, causality=True)
self.pos_attention_layers.append(new_attn_layer)
new_attn_layer = Multihead_attention(num_units=args.hidden_units, num_heads=args.num_heads,
dropout_rate=args.dropout_rate, causality=True)
self.neg_attention_layers.append(new_attn_layer)
new_attn_layer = Multihead_attention(num_units=args.hidden_units, num_heads=args.num_heads,
dropout_rate=args.dropout_rate, causality=True)
self.sub_attention_layers.append(new_attn_layer)
# FNN
new_fwd_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.forward_layernorms.append(new_fwd_layernorm)
new_fwd_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature1_forward_layernorms.append(new_fwd_layernorm)
new_fwd_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature2_forward_layernorms.append(new_fwd_layernorm)
new_fwd_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.feature3_forward_layernorms.append(new_fwd_layernorm)
new_fwd_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.pos_forward_layernorms.append(new_fwd_layernorm)
new_fwd_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.neg_forward_layernorms.append(new_fwd_layernorm)
new_fwd_layernorm = torch.nn.LayerNorm(args.hidden_units, eps=1e-8)
self.sub_forward_layernorms.append(new_fwd_layernorm)
new_fwd_layer = PointWiseFeedForward(args.hidden_units, args.dropout_rate)
self.forward_layers.append(new_fwd_layer)
new_fwd_layer = PointWiseFeedForward(args.hidden_units, args.dropout_rate)
self.feature1_forward_layers.append(new_fwd_layer)
new_fwd_layer = PointWiseFeedForward(args.hidden_units, args.dropout_rate)
self.feature2_forward_layers.append(new_fwd_layer)
new_fwd_layer = PointWiseFeedForward(args.hidden_units, args.dropout_rate)
self.feature3_forward_layers.append(new_fwd_layer)
new_fwd_layer = PointWiseFeedForward(args.hidden_units, args.dropout_rate)
self.pos_forward_layers.append(new_fwd_layer)
new_fwd_layer = PointWiseFeedForward(args.hidden_units, args.dropout_rate)
self.neg_forward_layers.append(new_fwd_layer)
new_fwd_layer = PointWiseFeedForward(args.hidden_units, args.dropout_rate)
self.sub_forward_layers.append(new_fwd_layer)
# 稀疏注意力
new_spare_att = sparseAttention(nb_head=args.num_heads, size_per_head=args.hidden_units // args.num_heads,
rate=args.sparse_rate, key_size=None)
# 添加稀疏注意力
self.sparse_attention_layers.append(new_spare_att)
def log2feats(self, seqs, feature1, feature2, feature3, pos_seqs, neg_seqs, sub_seqs):
global sparse_attention
seqs = self.seqs_embedd(seqs)
feature1 = self.feature1_embedd(feature1)
feature2 = self.feature2_embedd(feature2)
feature3 = self.feature3_embedd(feature3)
pos_seqs = self.pos_embedd(pos_seqs)
neg_seqs = self.neg_embedd(neg_seqs)
sub_seqs = self.sub_embedd(sub_seqs)
senti_emb_concat = torch.cat((pos_seqs, neg_seqs, sub_seqs), -1)
for i in range(len(self.attention_layers)):
seqs_Q = self.attention_layernorms[i](seqs)
feature1_Q = self.feature1_attention_layernorms[i](feature1)
feature2_Q = self.feature2_attention_layernorms[i](feature2)
feature3_Q = self.feature3_attention_layernorms[i](feature3)
pos_seqs_Q = self.pos_attention_layernorms[i](pos_seqs)
neg_seqs_Q = self.neg_attention_layernorms[i](neg_seqs)
sub_seqs_Q = self.sub_attention_layernorms[i](sub_seqs)
seqs_ = self.attention_layers[i](seqs_Q, seqs, seqs)
feature1 = self.feature1_attention_layers[i](feature1_Q, seqs, seqs)
feature2 = self.feature1_attention_layers[i](feature2_Q, seqs, seqs)
feature3 = self.feature1_attention_layers[i](feature3_Q, seqs, seqs)
pos_seqs = self.pos_attention_layers[i](pos_seqs_Q, seqs, seqs)
neg_seqs = self.neg_attention_layers[i](neg_seqs_Q, seqs, seqs)
sub_seqs = self.sub_attention_layers[i](sub_seqs_Q, seqs, seqs)
seqs = self.forward_layernorms[i](seqs_)
seqs = self.forward_layers[i](seqs)
feature1 = self.feature1_forward_layernorms[i](feature1)
feature1 = self.feature1_forward_layers[i](feature1)
feature2 = self.feature2_forward_layernorms[i](feature2)
feature2 = self.feature2_forward_layers[i](feature2)
feature3 = self.feature3_forward_layernorms[i](feature3)
feature3 = self.feature3_forward_layers[i](feature3)
pos_seqs = self.pos_forward_layernorms[i](pos_seqs)
pos_seqs = self.pos_forward_layers[i](pos_seqs)
neg_seqs = self.neg_forward_layernorms[i](neg_seqs)
neg_seqs = self.neg_forward_layers[i](neg_seqs)
sub_seqs = self.sub_forward_layernorms[i](sub_seqs)
sub_seqs = self.sub_forward_layers[i](sub_seqs)
sparse_attention = self.sparse_attention_layers[i](senti_emb_concat)
log_feats = self.seq_last_layernorm(seqs)
log_feature1 = self.feature1_last_layernorm(feature1)
log_feature2 = self.feature1_last_layernorm(feature2)
log_feature3 = self.feature1_last_layernorm(feature3)
log_price = torch.cat((log_feats,log_feature1, log_feature2,log_feature3,pos_seqs,neg_seqs,sub_seqs),-1)
log_price = self.price_concat_layernorm(log_price)
p_ge_price, (p_ge_ht, p_ge_ct) = self.price_lstm_layer(log_price)
p_ge_states, (p_ge_ht, p_ge_ct) = self.sparse_lstm_layer(sparse_attention)
log_feats = torch.cat((p_ge_price, p_ge_states), -1)
log_feats = self.final_norm(log_feats)
log_feats = self.price_final_emd(log_feats)
seqs = self.seqs_final(seqs)
return log_feats, seqs
def forward(self, seqs, feature1, feature2, feature3, pos_seqs, neg_seqs, con_seqs):
seqs = torch.Tensor(seqs).cuda()
feature1, feature2, feature3 = torch.Tensor(feature1).cuda(),torch.Tensor(feature2).cuda(),torch.Tensor(feature3).cuda()
pos_seqs, neg_seqs, con_seqs = torch.Tensor(pos_seqs).cuda(), torch.Tensor(neg_seqs).cuda(), torch.Tensor(con_seqs).cuda()
log_feats, seqs = self.log2feats(seqs, feature1, feature2, feature3, pos_seqs, neg_seqs, con_seqs)
return log_feats, seqs
def predict(self, seqs, feature1, feature2, feature3, pos_seqs, neg_seqs, con_seqs):
seqs = torch.Tensor(seqs).cuda()
feature1, feature2, feature3 = torch.Tensor(feature1).cuda(), torch.Tensor(feature2).cuda(), torch.Tensor(feature3).cuda()
pos_seqs, neg_seqs, con_seqs = torch.Tensor(pos_seqs).cuda(), torch.Tensor(neg_seqs).cuda(), torch.Tensor(con_seqs).cuda()
log_feats, seqs = self.log2feats(seqs, feature1, feature2, feature3, pos_seqs, neg_seqs, con_seqs)
return log_feats, seqs