-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsurvedhelper.py
171 lines (148 loc) · 8.9 KB
/
survedhelper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from Model.experiments import FlchainExperiment, MetabricExperiment, NwtcoExperiment, SupportExperiment
from lifelines.utils import concordance_index
import random
import gc
import pandas as pd
import numpy as np
import sys, os, inspect
from Utils.helper import configure_logger
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
def surved_fit(exp_model, i, events_weight=1, censored_weight=1, kl_loss_weight=0.0001, c_index_lb_weight=0, epochs=10000, patience=1000, batch_size=256, final_test=False, n_folds=100):
LR = 0.001
if final_test:
VAL_IDS_LIST = range(n_folds)
exp = exp_model(exp_name=f'SurVED_Final_Test_{i}',
events_weight=events_weight, censored_weight=censored_weight,
surv_mse_loss_weight=1, kl_loss_weight=kl_loss_weight, c_index_lb_weight=c_index_lb_weight,
max_epochs=epochs, patience=patience, surved_lr=LR, batch_size=batch_size,
latent_size=4, activation='tanh',
num_samples=100, verbose=False
)
ci_test, ci_val, ci_train, ci_tests, ci_vals, ci_trains, y_test_preds = exp.run_final_test(val_ids_lst=VAL_IDS_LIST)
else:
TEST_VAL_IDS_LIST = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 0)]
exp = exp_model(exp_name=f'SurVED_{i}',
events_weight=events_weight, censored_weight=censored_weight,
surv_mse_loss_weight=1, kl_loss_weight=kl_loss_weight, c_index_lb_weight=c_index_lb_weight,
max_epochs=epochs, patience=patience, surved_lr=LR, batch_size=batch_size,
latent_size=4, activation='tanh',
num_samples=100, verbose=False
)
ci_test, ci_val, ci_train, ci_tests, ci_vals, ci_trains, y_test_preds = exp.run_cv(test_val_ids_lst=TEST_VAL_IDS_LIST)
del exp
gc.collect()
return ci_test, y_test_preds
def random_search(exp_model,df, logdir, epochs=10000, patience=1000, batch_size=256, no_change=10):
logger = configure_logger(exp_model, logdir)
selected = []
events_weight = 0.0001
censored_weight = 0.1
c_index_lb_weight = 0.1
kl_loss_weight = 0.05
best_c_index = 0
best_events_weight = events_weight
best_censored_weight = censored_weight
best_c_index_lb_weight = c_index_lb_weight
best_kl_loss_weight = kl_loss_weight
best_i = 0
counter = 0
i = 0
num_selected = 0
while ((counter < no_change) and (num_selected < 100)):
i += 1
logger.info(f'{i} - Testing events_weight: {events_weight}, censored_weight: {censored_weight}, c_index_lb_weight: {c_index_lb_weight}, kl_loss_weight: {kl_loss_weight}')
if (events_weight, censored_weight, c_index_lb_weight, kl_loss_weight) not in list(
map(tuple, df.iloc[:, 1:-1].values)): # selected:
selected.append((events_weight, censored_weight, c_index_lb_weight, kl_loss_weight))
counter += 1
c_index, y_test_preds = surved_fit(exp_model=exp_model, i=i,
events_weight=events_weight, censored_weight=censored_weight,
kl_loss_weight=kl_loss_weight, c_index_lb_weight=c_index_lb_weight,
epochs=epochs, patience=patience, batch_size=batch_size
)
logger.info(c_index)
df.loc[len(df)] = [i, events_weight, censored_weight, c_index_lb_weight, kl_loss_weight, c_index]
df.to_csv(f'{exp_model.__name__}_results.csv', index=False)
if (c_index > best_c_index):
counter = 0
num_selected = 0
best_c_index = c_index
best_i = i
logger.info(f'New best c-index: {str(c_index)}')
logger.info('=================================================================')
best_events_weight = events_weight
best_censored_weight = censored_weight
best_c_index_lb_weight = c_index_lb_weight
best_kl_loss_weight = kl_loss_weight
else:
#print('Already Selected')
num_selected += 1
# random.seed(i)
events_weight = random.choice([0, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 0.8, 0.9, 1])
censored_weight = random.choice([0, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 0.8, 0.9, 1])
c_index_lb_weight = random.choice([0, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 0.8, 0.9, 1])
kl_loss_weight = random.choice([0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1])
return best_events_weight, best_censored_weight, best_c_index_lb_weight, best_kl_loss_weight, best_i, best_c_index
def surved_fit_change_censoring(i, pe, events_only, action, events_weight=1, censored_weight=1, kl_loss_weight=0.0001, c_index_lb_weight=0, epochs=10000, patience=1000, batch_size=256):
LR = 0.001
exp_support = SupportExperiment(exp_name=f'SurVED_Final_Test_{i}',
drop_percentage=pe, events_only=events_only, action=action,
events_weight=events_weight, censored_weight=censored_weight,
surv_mse_loss_weight=1, kl_loss_weight=kl_loss_weight, c_index_lb_weight=c_index_lb_weight,
max_epochs=epochs, patience=patience, surved_lr=LR, batch_size=batch_size,
latent_size=4, activation='tanh',
num_samples=100, verbose=False
)
model = exp_support.run_fold(test_id=None, val_id=0, fold_id=i, is_tuning=False)
c_index = concordance_index(model.y_test, model.y_test_pred, model.e_test)
return c_index, model.y_test_pred, model.y_test, model.e_test
def surved_change_size_only(events_weight=1, censored_weight=1, kl_loss_weight=0.0001, c_index_lb_weight=0, epochs=10000, patience=1000, batch_size=256):
cis = []
y_preds = []
# for changing the size only [0.601317957166392, 0.5093904448105436, 0.36210873146622735, 0]
# for changing the events (drop or censor)[0.20, 0.35, 0.50, 'full']:
# pe = 0.50
for i, pe in enumerate([0.60, 0.51, 0.36, 'full']):
ci, y_pred, y_test, e_test = surved_fit_change_censoring(i=i, pe=pe, action='drop', events_only=False, events_weight=events_weight, censored_weight=censored_weight, kl_loss_weight=kl_loss_weight, c_index_lb_weight=c_index_lb_weight, epochs=epochs, patience=patience, batch_size=batch_size)
cis.append(ci)
df = pd.DataFrame()
df['y_pred'] = y_pred
df['y_test'] = y_test
df['e_test'] = e_test
df.to_csv(f'{currentdir}/surved_final_results_change_size_only_{pe}.csv', index=False)
print(cis)
return np.mean(cis)
def surved_change_censoring_only(events_weight=1, censored_weight=1, kl_loss_weight=0.0001, c_index_lb_weight=0, epochs=10000, patience=1000, batch_size=256):
cis = []
y_preds = []
# for changing the size only [0.601317957166392, 0.5093904448105436, 0.36210873146622735, 0]
# for changing the events (drop or censor)[0.20, 0.35, 0.50, 'full']:
# pe = 0.50
for i, pe in enumerate([0.20, 0.35, 0.50, 'full']):
ci, y_pred, y_test, e_test = surved_fit_change_censoring(i=i, pe=pe, action='censor', events_only=True, events_weight=events_weight, censored_weight=censored_weight, kl_loss_weight=kl_loss_weight, c_index_lb_weight=c_index_lb_weight, epochs=epochs, patience=patience, batch_size=batch_size)
cis.append(ci)
df = pd.DataFrame()
df['y_pred'] = y_pred
df['y_test'] = y_test
df['e_test'] = e_test
df.to_csv(f'{currentdir}/surved_final_results_change_censoring_only_{pe}.csv', index=False)
print(cis)
return np.mean(cis)
def surved_change_censoring_and_size(events_weight=1, censored_weight=1, kl_loss_weight=0.0001, c_index_lb_weight=0, epochs=10000, patience=1000, batch_size=256):
cis = []
y_preds = []
# for changing the size only [0.601317957166392, 0.5093904448105436, 0.36210873146622735, 0]
# for changing the events (drop or censor)[0.20, 0.35, 0.50, 'full']:
# pe = 0.50
for i, pe in enumerate([0.20, 0.35, 0.50, 'full']):
ci, y_pred, y_test, e_test = surved_fit_change_censoring(i=i, pe=pe, action='drop', events_only=True, events_weight=events_weight, censored_weight=censored_weight, kl_loss_weight=kl_loss_weight, c_index_lb_weight=c_index_lb_weight, epochs=epochs, patience=patience, batch_size=batch_size)
cis.append(ci)
df = pd.DataFrame()
df['y_pred'] = y_pred
df['y_test'] = y_test
df['e_test'] = e_test
df.to_csv(f'{currentdir}/surved_final_results_change_censoring_and_size_{pe}.csv', index=False)
print(cis)
return np.mean(cis)