-
Notifications
You must be signed in to change notification settings - Fork 32
/
svmlin.rb
executable file
·130 lines (90 loc) · 3.37 KB
/
svmlin.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env ruby
require 'fileutils'
# run svmlin on training / test data
# TODO:
# run liblinear on the generated files if we skip lines
# compute class weights and fraction r, add to liblinear and svmlin
# grid search liblinear
# run tsvm linear svm and also compare
# extract Accuracy from svmlin and store in an array
# only report top N
SVMLIN_DIR = "~/packages/svmlin-v1.0"
LIBLINEAR_DIR = "~/packages/liblinear-1.94"
SVMLIN = "#{SVMLIN_DIR}/svmlin"
name = ARGV.first
train_examples = name+".examples"
train_labels = name+".labels"
test_examples = name+".t.examples"
test_labels = name+".t.labels"
svmlin_examples = "svmlin.#{name}.examples"
svmlin_labels = "svmlin.#{name}.labels"
svmlin_output = "svmlin.#{name}.output"
File.open(train_examples,'w') do |f_train|
File.open(train_labels,'w') do |f_labels|
# TODO: add feature for testing cluster expansion
# i need the total number of features
File.open(name) do |f|
f.each do |line|
toks = line.chomp.split(/\s+/,2)
next unless toks.size > 1
f_train << toks.last << "\n"
f_labels << toks.first << "\n"
end
end
end
end
FileUtils.cp(train_examples,svmlin_examples)
FileUtils.cp(train_labels,svmlin_labels)
File.open(svmlin_examples,'a') do |f_svmlin_examples|
File.open(svmlin_labels,'a') do |f_svmlin_labels|
File.open(test_examples,'w') do |f_test_examples|
File.open(test_labels,'w') do |f_test_labels|
File.open(name+".t") do |f|
f.each do |line|
toks = line.chomp.split(/\s+/,2)
next unless toks.size > 1
f_test_examples << toks.last << "\n"
f_svmlin_examples << toks.last << "\n"
f_test_labels << toks.first << "\n"
f_svmlin_labels << "0\n"
end
end
end
end
end
end
# We should do better than the liblinear (or svmlin) baseline (or lower bound)
# but we do not expect to do much better than the test upper bound
# (not quite right, we really want the test reconstruction accuracy)
#
# liblinear baseline, not optimized
$stdout << "liblinear baseline (lower bound): "
cmd = "#{LIBLINEAR_DIR}/train #{name}"
system cmd
cmd = "#{LIBLINEAR_DIR}/predict #{name}.t #{name}.model liblinear.#{name}.out"
system cmd
$stdout << "linlinear upper bound: "
# test upper bound
cmd = "#{LIBLINEAR_DIR}/train #{name}.t"
system cmd
cmd = "#{LIBLINEAR_DIR}/predict #{name}.t #{name}.t.model liblinear.t.#{name}.out"
system cmd
# svmlin baseline, not optimized
$stdout << "svmlin baseline (lower bound): "
cmd = "#{SVMLIN} -A 1 #{train_examples} #{train_labels}"
system cmd
cmd = "#{SVMLIN} -f #{train_examples}.weights #{test_examples} #{test_labels}"
system cmd
# add in the svmlin non-transductive basline
#cmd = "#{SVMLIN} -A 1 #{name}.t.examples #{name}.t.labels "
#system cmd
# need a more extensive grid search
w_seq = "seq -w 0.0001 0.0002 0.002"
u_seq = "seq -w 0.001 0.005 0.4"
dir = "A2W{1}U{2}"
dir_cmd = "rm -rf #{dir}; mkdir #{dir}; cd #{dir}"
train_cmd = "#{SVMLIN} -A 2 -W {1} -U {2} ../#{svmlin_examples} ../#{svmlin_labels} > /dev/null"
eval_cmd = "echo A2W{1}U{2}; #{SVMLIN} -f #{svmlin_examples}.weights ../#{test_examples} ../#{test_labels} | grep -i acc"
parallel_cmd = "parallel '#{dir_cmd}; #{train_cmd};#{eval_cmd}' ::: $(#{w_seq}) ::: $(#{u_seq})"
puts parallel_cmd
system parallel_cmd