This repository has been archived by the owner on Oct 12, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 24
/
sts_tools.py
89 lines (75 loc) · 3.29 KB
/
sts_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# coding: utf8
import stst
nlp = stst.StanfordNLP('http://localhost:9000')
def train_sts(model):
train_file = './data/stsbenchmark/sts-train.csv'
train_instances = stst.load_parse_data(train_file, nlp)
model.train(train_instances, train_file)
def dev_sts(model):
dev_file = './data/stsbenchmark/sts-dev.csv'
dev_instances = stst.load_parse_data(dev_file, nlp)
model.test(dev_instances, dev_file)
dev_pearsonr = stst.eval_output_file(model.output_file)
print('Dev:', dev_pearsonr)
return dev_pearsonr
def test_sts(model):
test_file = './data/stsbenchmark/sts-test.csv'
test_instances = stst.load_parse_data(test_file, nlp)
model.test(test_instances, test_file)
test_pearsonr = stst.eval_output_file(model.output_file)
print('Test:', test_pearsonr)
return test_pearsonr
def hill_climbing(model, choose_list=[]):
chooses = choose_list
feature_list= model.feature_list
visited = [True if x in choose_list else False for x in range(len(feature_list))]
for idx in range(len(choose_list), len(feature_list)):
choose_index = -1
best_score = 0.0
best_test_score = 0.0
chooses.append(-1)
for i in range(len(feature_list)):
if visited[i] == False:
chooses[idx] = i
feature = [feature_list[s] for s in chooses]
# print(len(feature_list))
model.feature_list = feature
train_sts(model)
cur_score = dev_sts(model)
test_score = test_sts(model)
stst.record('./records.csv', cur_score, test_score, model)
if best_score < cur_score:
choose_index = i
best_score = cur_score
best_test_score = test_score
chooses[idx] = choose_index
visited[choose_index] = True
# feature = [ feature_list[s] for s in chooses]
print('Best Score: %.2f %%, %.2f%%,choose Feature %s' % (best_score * 100, best_test_score * 100,
feature_list[choose_index].feature_name))
def feature_ablation(model):
feature_list = model.feature_list
train_sts(model)
cur_score, info = test_sts(model)
print('Cur Score: %.2f %% , All Features' % (cur_score * 100))
for idx in range(len(feature_list)):
feature = [feature_list[s] for s in filter(lambda x:x != idx, range(len(feature_list))) ]
model.feature_list = feature
train_sts(model)
cur_score, info = test_sts(model)
print('Cur Score: %.2f %%, except Feature %s' % (cur_score * 100, feature_list[idx].feature_name))
def feature_importance(model):
feature_list = model.feature_list
train_sts(model)
dev_score = dev_sts(model)
test_score = test_sts(model)
print('Cur Score: Dev: %.2f %% , Test: %.2f %% , All Features' % (dev_score * 100, test_score * 100))
for idx in range(len(feature_list)):
feature = [feature_list[idx]]
model.feature_list = feature
train_sts(model)
dev_score = dev_sts(model)
test_score = test_sts(model)
print('Cur Score: Dev: %.2f %% , Test: %.2f %% , Only Feature %s' % (dev_score * 100, test_score * 100, feature[0].feature_name))
if __name__ == '__main__':
pass