-
Notifications
You must be signed in to change notification settings - Fork 1
/
read_traces.py
105 lines (87 loc) · 2.35 KB
/
read_traces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from random import random, randint, shuffle
from GLOBAL_VARS import *
def read_data(path):
"""
Read datasets from `path`.
"""
G = set() # set of possible goals
Sigma = set() # vocabulary of the automaton
T = [] # scenarios (i.e., list of traces with their respective goal)
# reading the traces
num_traces = {}
f = open(path)
for l in f:
if MULTILABEL:
tau,gs = tuple(l.rstrip().split(";"))
# adding the goal to the set of available goals
gs = gs.strip().split(",")
for g in gs:
G.add(g)
if g not in num_traces:
num_traces[g] = 0
num_traces[g] += 1
tau = tau.split(",")
T.append((tau, gs))
for sigma in tau:
Sigma.add(sigma)
else:
# reading the trace and its goal
tau,g = tuple(l.rstrip().split(";"))
# adding the goal to the set of available goals
G.add(g)
if g not in num_traces:
num_traces[g] = 0
num_traces[g] += 1
tau = tau.split(",")
T.append((tau,g))
for sigma in tau:
Sigma.add(sigma)
f.close()
return G, Sigma, T
def read_data_split(path):
"""
Split into training and validation sets.
20% of data for each goal goes into validation.
"""
assert(not MULTILABEL)
G = set() # set of possible goals
Sigma = set() # vocabulary of the automaton
T_train = [] # scenarios (i.e., list of traces with their respective goal)
T_validation = []
# reading the traces
traces = {}
f = open(path)
for l in f:
# reading the trace and its goal
tau,g = tuple(l.rstrip().split(";"))
# adding the goal to the set of available goals
G.add(g)
if g not in traces:
traces[g] = []
tau = tau.split(",")
traces[g].append(tau)
for sigma in tau:
Sigma.add(sigma)
f.close()
for g in G:
n_valid = max(1, int(len(traces[g]) * 0.2))
for i in range(n_valid):
T_validation.append((traces[g][i], g))
for i in range(n_valid, len(traces[g])):
T_train.append((traces[g][i], g))
return G, Sigma, T_train, T_validation
def count_observations(train_path, test_path):
f_train, f_test = open(train_path), open(test_path)
count = 0
total = 0
for l in f_train:
# reading the trace and its goal
tau,g = tuple(l.rstrip().split(";"))
count += len(tau.split(","))
total += 1
for l in f_test:
# reading the trace and its goal
tau,g = tuple(l.rstrip().split(";"))
count += len(tau.split(","))
total += 1
return count, count / total, total