-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_cpd_cmd_line.py
105 lines (93 loc) · 6.81 KB
/
run_cpd_cmd_line.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import argparse
import numpy as np
import networkx as nx
import running_cpd as my_cpd
import utils as my_ut
from datetime import datetime
def run_cpd_algorithms(kwargs_namespace: argparse.Namespace):
### OVERALL INITIALIZATION
# logging
pred_dir = kwargs_namespace.pred_dir
verbose = kwargs_namespace.verbose
now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
my_ut.create_parent_and_dump_json(pred_dir, f"experiment_metadata_{now}.json", my_ut.turn_all_list_of_dict_into_str(kwargs_namespace.__dict__), indent=4)
# parsing
signal_path = kwargs_namespace.signal_path
graph_path = kwargs_namespace.graph_path
seg_length_hyp = kwargs_namespace.min_size_hypothesis
exp_id_list = [str(id) for id in range(kwargs_namespace.exp_id_min, kwargs_namespace.exp_id_max)]
if kwargs_namespace.eeg_movement:
exp_id_list = ['0' + str(exp_id) for exp_id in exp_id_list if len(str(exp_id)) == 1] + [str(exp_id) for exp_id in exp_id_list if len(str(exp_id)) == 2]
# COST FUNCTION INITIALIZATION
# graph stationarity-based
if kwargs_namespace.statio:
statio_name = "statio_pred.json"
statio_json_path = my_cpd.init_pred_saving(pred_dir, statio_name)
# standard_mle
if kwargs_namespace.standard_mle:
mle_name = "standard_mle_pred.json"
mle_json_path = my_cpd.init_pred_saving(pred_dir, mle_name)
# r_glasso
if kwargs_namespace.glasso:
lasso_buffer_path = f"data_1/.temp/glasso_subsignal_{kwargs_namespace.glasso_buffer_id}"
lasso_pen_mult_coef = kwargs_namespace.glasso_pen_multcoef
lasso_name = f"lasso_penmultcoef_{lasso_pen_mult_coef}_pred.json"
lasso_json_path = my_cpd.init_pred_saving(pred_dir, lasso_name)
# r_covcp
if kwargs_namespace.r_covcp:
r_covcp_stable_set_length = kwargs_namespace.r_covcp_stable_set_length
r_covcp_window_size = [kwargs_namespace.r_covcp_window_size]
r_covcp_level = kwargs_namespace.r_covcp_level
r_covcp_nb_cores = kwargs_namespace.r_covcp_nb_cores
r_covcp_seed = kwargs_namespace.r_covcp_seed
covcp_name = f"covcp_windsize_{r_covcp_window_size[0]}_stableset_{r_covcp_stable_set_length}.json"
covcp_json_path = my_cpd.init_pred_saving(pred_dir, covcp_name)
# RUNNING CPD ALGORTIHMS
for exp_id in exp_id_list:
print(f"\n\tOver exp_id={exp_id}...")
# loading data
if kwargs_namespace.eeg_movement:
G = nx.from_numpy_array(np.load(graph_path, allow_pickle=False))
volunteer_id = (signal_path).split('/')[-1]
signal_file_path = f'{signal_path}/volunteer{volunteer_id}_exp{exp_id}'
gt_bkps = my_ut.open_json(f"{signal_file_path}_bkps.json")
signal = np.load(f"{signal_file_path}_signal.npy", allow_pickle=False)
min_size = my_ut.get_min_size_for_hyp(G.number_of_nodes(), hyp=seg_length_hyp)
else:
signal_file_path = f"{signal_path}/{exp_id}"
G, signal, gt_bkps, min_size = my_ut.load_data(graph_path, signal_path, exp_id, seg_length_hyp)
if kwargs_namespace.statio:
my_cpd.run_numba_statio_normal_cost_and_store_res(G, signal, gt_bkps, min_size, statio_json_path, exp_id)
if kwargs_namespace.standard_mle:
my_cpd.run_numba_standard_mle_normal_cost_and_store_res(signal, gt_bkps, min_size, mle_json_path, exp_id)
if kwargs_namespace.r_covcp:
my_cpd.run_r_covcp_cpd_algo_and_store(signal_file_path, gt_bkps, covcp_json_path, r_covcp_stable_set_length, min_size, r_covcp_window_size, r_covcp_level, exp_id, r_covcp_nb_cores, r_covcp_seed)
if kwargs_namespace.glasso:
my_cpd.run_r_glasso_cpd_algo_and_store(signal, gt_bkps, lasso_json_path, exp_id, lasso_pen_mult_coef, lasso_buffer_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# data to run over: signal, graphs, exp_ids...
parser.add_argument("signal_path", help="The relative path to the folder containing the signal and bkps files")
parser.add_argument("exp_id_min", type=int, help='The lower bound of the target experiments ids')
parser.add_argument("exp_id_max", type=int, help='The upper bound of the target experiments ids (excluded)')
parser.add_argument("min_size_hypothesis", choices=['minimal', 'large'], default="minimal", help='the minimal size of the stationarity segment for the cpd algorithms. large --> min_size=n_dim*(n_dim-1)/2, minimal --> min_size=n_dim')
parser.add_argument("--graph-path", help="The relative path to the folder containing the graph files")
parser.add_argument("--eeg-movement", action="store_true", help="Whether to run over the real dataset EEG imagerymovement")
# cost functions to run
parser.add_argument("--statio", action="store_true", help="Whether to run the graph stationarity based cost function")
parser.add_argument("--standard-mle", action="store_true", help="Whether to run the mle cost function")
parser.add_argument("--glasso", action="store_true", help="Whether to run the Graph Lasso cost function")
parser.add_argument("--r-covcp", action="store_true", help="Whether to run the r_covcp cost function")
# logging and output
parser.add_argument("pred_dir", help="The relative path to the folder for result saving")
parser.add_argument("-v", "--verbose", type=int, choices=[0, 1], default=0, help="Verbosity level")
# cost functions hyper-parameters
parser.add_argument("--glasso-pen-multcoef", nargs='?', const=4., type=float, default=4., help="The multiplicative factor applied to the penalty coefficient used for graph lasso, equal to glasso-pen-multcoef * sqrt(log(n_dim) / n_samples)")
parser.add_argument("--glasso-buffer-id", nargs='?', const=1, type=int, default=1, help="Buffer file id for graph lasso intermediate file saving, must be chosen carefully so that each cpd thread has its own buffer id")
parser.add_argument("--r-covcp-stable-set-length", nargs='?', const=80, type=int, default=80, help='The number of samples (from the first one) contained in the bootstrap set for the r_covcp method, must not contain bkp and should be chosen on the knowledge of the signal')
parser.add_argument("--r-covcp-window-size", nargs='?', const=80, type=int, default=80, help='The window size used to compute the covariance matrix estimator and then the statistic')
parser.add_argument("--r-covcp-level", nargs='?', const=0.3, type=float, default=0.3, help='The level of the test used to compute the threshold for cpd')
parser.add_argument("--r-covcp-nb-cores", nargs='?', const=1, type=int, default=1, choices=[1, 2, 3], help='The number of cores allocated to the R library covcp for cpd solving')
parser.add_argument("--r-covcp-seed", nargs='?', const=42, type=int, default=42, help='Random seed used in the algorithm from the R library covcp')
args = parser.parse_args()
run_cpd_algorithms(args)