-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
313 lines (285 loc) · 10.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
from wolframclient.evaluation import WolframLanguageSession
from learners.NN import NNLearner
from learners.Tree import TreeLearner
import pandas as pd
import os
import time
from datetime import datetime
from cegis import cegis_one_prog, get_var_types
import sys
import json
import argparse
import copy
parser = argparse.ArgumentParser()
parser.add_argument("-sub", "--subinv", help="put 'yes' (or 'y') if \
you want to learn subinvariant; otherwise, the program learns exact invariant")
parser.add_argument("-cached", "--cached", help="put 'yes' (or 'y') \
if you want to use cached program traces as the data; otherwise, \
the program would sample new data")
parser.add_argument("-nruns", "--nruns", help="number of runs from each initial\
state", type = int)
parser.add_argument("-nstates", "--nstates", help="number of initial states \
we sample for each benchmark", type = int)
parser.add_argument("-test", "--test", help="put 'yes' (or 'y') if running smoke test")
args = parser.parse_args()
# By default, the program will learn exact invariants
if str(args.subinv).lower() in ["yes", "y"]:
exact = False
else:
exact = True
# By default, the program will sample new data
if str(args.cached).lower() in ["yes", "y"]:
sample_new_data = False
else:
sample_new_data = True
if str(args.test).lower() in ["yes", "y"]:
smoke_test = True
else:
smoke_test = False
if args.nruns is None:
NUM_runs = 500
else:
NUM_runs = args.nruns
if args.nstates is None:
NUM_init_states = 500
else:
NUM_init_states = args.nruns
PATH = os.path.realpath("")
assumed_shape = "post + [G] * model"
"""
Given:
[features_log]: a list of strings denoting
expressions generated for the multiplicative models
(we call it `log mode` because it's implemented through
taking the logarithms of the data)
[features_linear]: a list of strings denoting
expressions generated for the linear models (`linear mode`)
[log_not_split]: a list of strings in [features_log] that denote
expressions involving probability variables
(in general, we don't want the predicates in the model trees
to split on probabilities)
[linear_not_split]: a list of strings in [features_linear] that denote
expressions involving probability variables
[config]: a dictionary loaded from a .json file under `\configuration_files`
[config] is not used in [prepare_tree_learner]
but we have it so [prepare_tree_learner], [prepare_NN_sub],
[prepare_NN_exact] have the same type
Returns:
[Learners]: a list of objects that take in data and output models (see class
[Learner] of `learners/abstract_learner.py`; we call them
learners). More specifically, [prepare_tree_learner(...)]
returns two learners of class [TreeLearner] (see
`learners/Tree.py`. ) They both train model trees in the
conventional way (using divde and conquer), with the objective
of finding a model tree that translates to an exact invariant.
One learner assumes linear models on the leaves, and
the another learner assumes multiplicative models on the leaves.
"""
def prepare_tree_learner(
features_log, features_linear, log_not_split, linear_not_split, config
) -> list:
splittables_log = [
i for i in range(len(features_log)) if features_log[i] not in log_not_split
]
splittables_linear = [
i
for i in range(len(features_linear))
if features_linear[i] not in linear_not_split
]
learners = []
learners.append(
TreeLearner(
features_linear,
splittables_linear,
False,
True,
max_depth=2,
min_samples_leaf=1,
)
)
learners.append(
TreeLearner(
features_log, splittables_log, True, True, max_depth=2, min_samples_leaf=1
)
)
return learners
"""
Given:
Same as for [prepare_tree_learner]
Returns:
[Learners]: Same type as for [prepare_tree_learner], but [prepare_NN_sub(...)]
returns two learners that train the **neural model trees** by
gradient descent, with the objective of finding a model tree that
translates to an subinvariant. One learner assumes linear models on
the leaves, and the another learner assumes multiplicative models
on the leaf.
"""
def prepare_NN_sub(
features_log, features_linear, log_not_split, linear_not_split, config
) -> list:
splittables_log = []
splittables_linear = []
# Uncomment if you want the neural model trees to make split
# (See documentations `Some choices: ... ` in `learners/NN.py`)
# vars = [var for types in get_var_types(config).values() for var in types]
# splittables_log = [
# i for i in range(len(features_log)) if features_log[i] not in log_not_split
# ]
# splittables_linear = [
# i
# for i in range(len(features_linear))
# if features_linear[i] not in linear_not_split
# ]
learners = []
# train model trees as neural networks
learners.append(
NNLearner(
features_log,
splittables_log,
assumed_shape,
subinv=True,
fit_logspace=False,
fit_intercept=True,
)
)
learners.append(
NNLearner(
features_linear,
splittables_linear,
assumed_shape,
subinv=True,
fit_logspace=True,
fit_intercept=True,
)
)
return learners
"""
Given:
Same as for [prepare_tree_learner]
Returns:
[Learners]: Same type as for [prepare_NN_sub], except that the returned learners
have the objective to return a model tree that translates to an
exact invariant.
[prepare_NN_exact] is not used in the paper.
"""
# def prepare_NN_exact(
# features_log, features_linear, log_not_split, linear_not_split, config
# ) -> list:
# splittables_log = []
# splittables_linear = []
# # Uncomment if you want the neural model trees to make split
# # (See documentations `Some choices: ... ` in `learners/NN.py`)
# # vars = [var for types in get_var_types(config).values() for var in types]
# # splittables_log = [
# # i for i in range(len(features_log)) if features_log[i] not in log_not_split
# # ]
# # splittables_linear = [
# # i
# # for i in range(len(features_linear))
# # if features_linear[i] not in linear_not_split
# # ]
# learners = []
# # train model trees as neural networks
# learners.append(
# NNLearner(
# features_log,
# splittables_log,
# assumed_shape,
# subinv=False,
# fit_logspace=False,
# fit_intercept=True,
# )
# )
# learners.append(
# NNLearner(
# features_linear,
# splittables_linear,
# assumed_shape,
# subinv=False,
# fit_logspace=True,
# fit_intercept=True,
# )
# )
# return learners
"""
[get_config] loads the json object in [progname]'s configuration file
"""
def get_config(progname):
with open(os.path.join(PATH, "configuration_files", progname + ".json"), "r") as f:
config = json.load(f)
return config
def config_select_pre(config, idx):
config_idx = copy.deepcopy(config)
config_idx["wp"]["pre"] = config_idx["wp"]["pre"][idx]
return config_idx
if exact:
print("Mode: EXACT INVARIANTS")
prepare_learners = prepare_tree_learner
header = ["generated_inv", "post", "sampling_time", "learning_time", "verifying_time", \
"total_time"]
else:
print("Mode: SUBINVARIANTS")
prepare_learners = prepare_NN_sub
header = ["generated_subinv", "pre", "post", "sampling_time", "learning_time", "verifying_time", \
"total_time"]
session = WolframLanguageSession()
with open(os.path.join(PATH, "program_list.txt"), "r") as f:
prognames = f.read().strip().split("\n")
if smoke_test:
prognames = ["Geo0"]
results = {}
for progname in prognames:
config = get_config(progname)
post = config["wp"]["post"]
guard = config["wp"]["guard"]
pre_lst = config["wp"]["pre"]
nsubtasks = 1 if exact else len(pre_lst)
for idx in range(nsubtasks): # iterate through each preexpectation in the list
config_idx = config_select_pre(config, idx)
pre = pre_lst[idx]
if exact:
print(" Benchmark name: {}; Post-exp: {}".format(progname, post))
else:
print(" Benchmark name: {}; Post-exp: {}; Pre-exp: {}".format(progname, post, pre))
start_time = time.time()
try:
inv, st, lt, vt = cegis_one_prog(
progname,
config_idx,
prepare_learners,
NUM_runs,
NUM_init_states,
exact,
sample_new_data,
session,
assumed_shape,
)
total = time.time() - start_time
complete_inv = f"{post} + {guard} * {inv}"
if exact:
print("For {}: we get {}\n".format(progname, complete_inv))
results[progname] = [complete_inv, post,st,lt,vt,total]
else:
print("For {} with preexpetation {}: we get {}\n".format\
(progname, pre_lst[idx],complete_inv))
results["{}_{}".format(progname, str(idx))] = [complete_inv, pre, post,st,lt,vt,total]
# we save eagerly so even if the program crashes at a benchmark
# we have previous results all saved
df = pd.DataFrame.from_dict(results, orient="index", columns=header)
df.to_csv(os.path.join(PATH, "results", "{}-{}.csv".format\
(prognames[0], "exact" if exact else "sub")))
except KeyboardInterrupt:
print("Caught a KeyboardInterrupt")
sys.exit(0)
# except:
# if exact:
# print("For {}: we get an unexpected exception".format(progname))
# results[progname] = ["error", post, "error","error","error", "error"]
# else:
# print("For {} with preexpetation {}: we get an unexpected\
# exception".format(progname, pre_lst[idx]))
# results["{}_{}".format(progname, str(idx))] = ["error", pre, "error","error","error", "error"]
# df = pd.DataFrame.from_dict(results, orient="index", columns=header)
# df.to_csv(os.path.join(PATH, "results", "{}-{}.csv".format\
# (prognames[0], "exact" if exact else "sub")))
session.terminate()