-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_template.py
173 lines (137 loc) · 7.38 KB
/
model_template.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import numpy as np
import yaml
from FRETboard.helper_functions import numeric_timestamp, discrete2continuous
# FRETBOARD MODEL TEMPLATE
# Using this template you can create your own FRETboard model. Just follow the instructions listed in comments
# for each method.
# You are completely free to implement any model, however as FRETboard is meant to rapidly estimate and re-estimate models
# to react to the user's input we advise to keep your model implementations light, e.g. fully training a neural network
# from the ground up might work but will probably slow down your analysis a lot.
#
# If you have written a model that you think might be useful for others, please consider submitting it as a pull request
# on the project github page:
#
# https://github.com/cvdelannoy/FRETboard.git)
class Classifier(object):
""" A template class for creating new algorithms for FRETboard
"""
def __init__(self, nb_states, data, **kwargs):
"""
:param nb_states: number of states to detect
:param data: object of class MainTable
:param gui: Gui object [optional]
:param buffer: int, size of buffer area around regular classes [required if gui not given]
"""
# Required parameters for training
self.nb_states = nb_states
self.feature_list = kwargs['features'] # list names of features that should be used in training/inference
# These parameters are passed on from the GUI, usage optional
self.buffer = kwargs['buffer'] # int number of points used in calculating the sliding-window features
# Parameters necessary for proper functioning of algorithm. Do not change!
self.timestamp = numeric_timestamp()
self.data = data
self.trained = None
self.framerate = None
# --- training ---
def train(self, data_dict, supervision_influence=1.0):
"""
Generate trained classifier
"""
# Determine framerate for input data, leave as is
if self.framerate is None:
self.framerate = 1 / np.concatenate([data_dict[tr].time.iloc[1:].to_numpy() -
data_dict[tr].time.iloc[:-1].to_numpy() for tr in data_dict]).mean()
# Write your code for generating a classifier object here. The only restrictions are:
# - it should use the data in the data_dict argument
# - it should store the object in self.trained
# - the predict function of this class should be able to use it to run inference
self.trained = None # <-- classifier object goes here!
# Parameters necessary for proper functioning of algorithm. Do not change!
self.timestamp = numeric_timestamp()
def predict(self, trace_df):
"""
Predict labels for a trace
:trace_df: DataFrame containing all features of a single trace as its columns
:returns:
state_list:
logprob_list: list of floats of length len(idx) containing posterior log-probabilities
"""
# Write your code for predicting classification for the given traces here.
state_array = None # <- Redefine as numpy array of states, of which length equals len(trace_df)
mean_logprob = None # <- Redefine as mean log-probability for the prediction
return state_array, mean_logprob
# --- parameters and performance measures ---
def get_data_tm(self, trace_dict, out_labels, nb_bootstrap_iters):
"""
Calculate bootstrapped confidence intervals on data-derived transition matrix values and convert to
continous transition rates
"""
# This method produces a transition rate matrix from classified data, with bootstrapped CIs. It should
# work if predict is correctly implemented.
state_order_dict = {state.name: idx for idx, state in enumerate(self.trained.states)}
# actual estimate
trace_list = list(trace_dict.values())
nb_traces = len(trace_list)
actual_tm = self.tm_from_seq(trace_list)
# CIs
tm_array = []
for n in range(nb_bootstrap_iters):
bs_idx = np.random.choice(nb_traces, size=nb_traces)
tm = self.tm_from_seq([trace_list[it] for it in bs_idx])
tm_array.append(tm)
tm_mat = np.stack(tm_array, axis=-1)
sd_mat = np.std(tm_mat, axis=-1)
mu_mat = np.mean(tm_mat, axis=-1)
ci_mat = np.tile(np.expand_dims(mu_mat, -1), (1, 1, 2))
ci_mat[:, :, 0] -= sd_mat * 2
ci_mat[:, :, 1] += sd_mat * 2
return actual_tm, ci_mat
def tm_from_seq(self, seq_list):
# Necessary for get_data_tm(), leave as is
tm_out = np.zeros([self.nb_states, self.nb_states], dtype=int)
for seq in seq_list:
seq_pred = seq.predicted.astype(int)
for tr in zip(seq_pred[:-1], seq_pred[1:]):
tm_out[tr[0], tr[1]] += 1
return discrete2continuous(tm_out / np.expand_dims(tm_out.sum(axis=1), -1), self.framerate)
def get_mus(self, feature):
mu_list = None # <-- Return a list of mean peak values for all classes, for a feature of a given name
return mu_list
# --- saving/loading models ---
def get_params(self):
# Should return a representation of the trained model in text format, as a string. In principle,
# The only requirement is that load_params() can read it and produce the exact same model again, however the
# lines below give you a head start by defining some parameters. The load_params() method
# similarly contains some code to read these out again.
mod_txt = '' # <- store model representation as string here
some_other_stuff = 'foobar\n' # <- example variable to show how other information can be stored
feature_txt = '\n'.join(self.feature_list)
div = '\nSTART_NEW_SECTION\n'
out_txt = ('CoolModelName' # <- give your model a name (no spaces) so the load_params() function can immediately check if the model loaded is the correct one
+ div + mod_txt
+ div + some_other_stuff
+ div + feature_txt
+ div + f'nb_states: {str(self.nb_states)}\n'
f'dbscan_epsilon: {self.data.eps}\n'
f'supervision_influence: {self.supervision_influence}\n'
f'framerate: {self.framerate}')
return out_txt
def load_params(self, file_contents):
# Should load the model from a string as it is stored by get_params(). The lines below should give you a head
# start again.
(mod_check,
model_txt,
some_other_stuff, # <- example variable, see get_params()
feature_txt, # <- features
misc_txt) = file_contents.split('\nSTART_NEW_SECTION\n')
if mod_check != 'CoolModelName':
error_msg = '\nERROR: loaded model parameters are not for this type of model!'
raise ValueError(error_msg)
self.feature_list = feature_txt.split('\n')
misc_dict = yaml.load(misc_txt, Loader=yaml.SafeLoader)
if misc_dict['dbscan_epsilon'] == 'nan': misc_dict['dbscan_epsilon'] = np.nan
self.nb_states = misc_dict['nb_states']
self.data.eps = misc_dict['dbscan_epsilon']
self.supervision_influence = misc_dict['supervision_influence']
self.framerate = misc_dict['framerate']
self.timestamp = numeric_timestamp()