-
Notifications
You must be signed in to change notification settings - Fork 20
/
best_n_ensemble.py
67 lines (49 loc) · 1.67 KB
/
best_n_ensemble.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
this glue script finds the N best models so far, generates predictions with them (if this has not happened yet) and averages them.
"""
import glob
import os
import cPickle as pickle
import numpy as np
import sys
import os
if len(sys.argv) != 3:
sys.exit("Usage: best_n_ensemble.py <number_of_models> <target_path>")
N = int(sys.argv[1]) # number of models to average
tgt_path = sys.argv[2]
paths = glob.glob("results/results-*.pkl")
data = []
for path in paths:
dirname = os.path.dirname(path)
basename = os.path.basename(path)
with open(path, 'r') as f:
d = pickle.load(f)
del d['params'] # save memory!
d['path'] = path
data.append(d)
all_aucs = [r['evaluation']['auc'] for r in data]
indices = np.argsort(all_aucs)
nbest_indices = indices[-N:]
paths = [data[k]['path'] for k in nbest_indices]
aucs = [data[k]['evaluation']['auc'] for k in nbest_indices]
print "Paths for n best results files so far:"
for path, auc in zip(paths, aucs):
print "AUC %.4f %s" % (auc, path)
print
print "Generating predictions"
pred_paths = []
for path in paths:
pred_basename = os.path.basename(path).replace("results-", "predictions-").replace(".pkl", ".txt")
pred_path = os.path.join("predictions", pred_basename)
pred_paths.append(pred_path)
if os.path.exists(pred_path):
print "Predictions file %s already exists, not regenerating" % pred_path
continue
command = "epython generate_predictions.py %s %s" % (path, pred_path)
print command
os.system(command)
print
print "Averaging predictions"
pred_paths_str = " ".join(pred_paths)
command = "epython averager.py %s %s" % (pred_paths_str, tgt_path)
os.system(command)