-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtrain.py
113 lines (97 loc) · 4.38 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
import argparse
import pickle as cPickle
import torch
from model import MwAN
from preprocess import process_data
from utils import *
parser = argparse.ArgumentParser(description='PyTorch implementation for Multiway Attention Networks for Modeling '
'Sentence Pairs of the AI-Challenges')
parser.add_argument('--data', type=str, default='data/',
help='location directory of the data corpus')
parser.add_argument('--threshold', type=int, default=5,
help='threshold count of the word')
parser.add_argument('--epoch', type=int, default=50,
help='training epochs')
parser.add_argument('--emsize', type=int, default=128,
help='size of word embeddings')
parser.add_argument('--nhid', type=int, default=128,
help='hidden size of the model')
parser.add_argument('--batch_size', type=int, default=32, metavar='N',
help='batch size')
parser.add_argument('--log_interval', type=int, default=300,
help='# of batches to see the training error')
parser.add_argument('--dropout', type=float, default=0.2,
help='dropout applied to layers (0 = no dropout)')
parser.add_argument('--cuda', action='store_true',
help='use CUDA')
parser.add_argument('--save', type=str, default='model.pt',
help='path to save the final model')
args = parser.parse_args()
# vocab_size = process_data(args.data, args.threshold)
vocab_size = 98745
model = MwAN(vocab_size=vocab_size, embedding_size=args.emsize, encoder_size=args.nhid, drop_out=args.dropout)
print('Model total parameters:', get_model_parameters(model))
if args.cuda:
model.cuda()
optimizer = torch.optim.Adamax(model.parameters())
with open(args.data + 'train.pickle', 'rb') as f:
train_data = cPickle.load(f)
with open(args.data + 'dev.pickle', 'rb') as f:
dev_data = cPickle.load(f)
dev_data = sorted(dev_data, key=lambda x: len(x[1]))
print('train data size {:d}, dev data size {:d}'.format(len(train_data), len(dev_data)))
def train(epoch):
model.train()
data = shuffle_data(train_data, 1)
total_loss = 0.0
for num, i in enumerate(range(0, len(data), args.batch_size)):
one = data[i:i + args.batch_size]
query, _ = padding([x[0] for x in one], max_len=50)
passage, _ = padding([x[1] for x in one], max_len=350)
answer = pad_answer([x[2] for x in one])
query, passage, answer = torch.LongTensor(query), torch.LongTensor(passage), torch.LongTensor(answer)
if args.cuda:
query = query.cuda()
passage = passage.cuda()
answer = answer.cuda()
optimizer.zero_grad()
loss = model([query, passage, answer, True])
loss.backward()
total_loss += loss.item()
optimizer.step()
if (num + 1) % args.log_interval == 0:
print('|------epoch {:d} train error is {:f} eclipse {:.2f}%------|'.format(epoch,
total_loss / args.log_interval,
i * 100.0 / len(data)))
total_loss = 0
def test():
model.eval()
r, a = 0.0, 0.0
with torch.no_grad():
for i in range(0, len(dev_data), args.batch_size):
one = dev_data[i:i + args.batch_size]
query, _ = padding([x[0] for x in one], max_len=50)
passage, _ = padding([x[1] for x in one], max_len=500)
answer = pad_answer([x[2] for x in one])
query, passage, answer = torch.LongTensor(query), torch.LongTensor(passage), torch.LongTensor(answer)
if args.cuda:
query = query.cuda()
passage = passage.cuda()
answer = answer.cuda()
output = model([query, passage, answer, False])
r += torch.eq(output, 0).sum().item()
a += len(one)
return r * 100.0 / a
def main():
best = 0.0
for epoch in range(args.epoch):
train(epoch)
acc = test()
if acc > best:
best = acc
with open(args.save, 'wb') as f:
torch.save(model, f)
print ('epcoh {:d} dev acc is {:f}, best dev acc {:f}'.format(epoch, acc, best))
if __name__ == '__main__':
main()