-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathmultimodal_autoencoder.py
1220 lines (1037 loc) · 55.9 KB
/
multimodal_autoencoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import sys
import copy
import os
import time
import math
CODE_PATH = os.path.dirname(os.getcwd())
sys.path.append(CODE_PATH)
DEFAULT_MAIN_DIRECTORY = '/Your/path/here/'
import data_funcs
def reload_files():
reload(data_funcs)
def weight_variable(shape, name, var_type='normal', const=1):
"""Initializes a tensorflow weight variable.
Args:
shape: An array representing shape of the weight variable
name: A string name given to the variable.
var_type: can be either 'normal', for weights following a Gaussian
distribution around 0, or 'xavier', for the Xavier method
const: Numeric value that controls the range of the weights within
the Xavier method.
Returns: Tensor variable for the weights
"""
if var_type == 'xavier':
""" Xavier initialization of network weights.
Taken from: https://gist.github.com/blackecho/3a6e4d512d3aa8aa6cf9
https://stackoverflow.com/questions/33640581/how-to-do-xavier-initialization-on-tensorflow
"""
assert len(shape) == 2
low = -const * np.sqrt(6.0 / (shape[0] + shape[1]))
high = const * np.sqrt(6.0 / (shape[0] + shape[1]))
initial = tf.random_uniform((shape[0], shape[1]), minval=low, maxval=high)
else:
initial = tf.truncated_normal(shape, stddev=1.0 / math.sqrt(float(shape[0])), dtype=tf.float32)
return tf.Variable(initial, name=name)
def bias_variable(shape, name):
"""Initializes a tensorflow bias variable to a small constant value.
Args:
shape: An array representing shape of the weight variable
name: A string name given to the variable.
Returns: a Tensor variabel for the biases"""
initial = tf.constant(0.1, shape=shape, dtype=tf.float32)
return tf.Variable(initial, name=name)
class MultimodalAutoencoder:
def __init__(self, filename=None, layer_sizes=[128,64,32], variational=True,
tie_weights=True, batch_size=10, learning_rate=.0001,
dropout_prob=1.0, weight_penalty=0.0, activation_func='softsign',
loss_func='sigmoid_cross_entropy', decay=True, decay_steps=1000,
decay_rate=0.95, clip_gradients=True, classification_layer_sizes=None,
classification_filename=None, weight_initialization='xavier',
normalization='between_0_and_1', intelligent_noise=True,
num_modalities_to_drop=1,
subdivide_physiology=True, fill_missing_with=0.0, mask_with=-1.0,
checkpoint_dir=DEFAULT_MAIN_DIRECTORY + 'temp_saved_models/',
model_name='multimodal_autoencoder', extra_data_filename=None,
data_loader=None, classification_data_loader=None, verbose=True):
'''Initialize the class by loading the required datasets and building
the graph.
Args:
filename: A string file path containing data to load.
layer_sizes: A list of sizes of the neural network layers in the
encoding portion of the network. Will be mirrored for decoding
portion.
variational: A boolean that if True will build a Variational
Autoencoder model. If False, model will simply be a denoising
autoencoder.
tie_weights: A boolean. If True, the net will use the same weights
for the encoder and decoder.
batch_size: number of training examples in each training batch.
learning_rate: The initial learning rate used in stochastic
gradient descent.
dropout_prob: The probability that a node in the network will not
be dropped out during training. Set to < 1.0 to apply dropout,
1.0 to remove dropout.
weight_penalty: The coefficient of the L2 weight regularization
applied to the loss function. Set to > 0.0 to apply weight
regularization, 0.0 to remove.
activation_func: String representing the activation function used
on neurons. Could be 'relu', 'tanh', 'softsign', etc.
loss_func: Distance function that measures reconstruction error.
decay: A bool for whether or not to apply learning rate decay.
decay_steps: Number of training steps after which to decay the
learning rate.
decay_rate: Rate at which the learning rate decays.
clip_gradients: A bool indicating whether or not to clip gradients.
This is effective in preventing very large gradients from skewing
training, and preventing your loss from going to inf or nan.
classification_layer_sizes: A list of sizes of neural network layers
that will be attached to the embedding layer and designed to
perform classification. If None, the network will not perform
classification.
classification_filename: A file where classification data is located.
weight_initialization: If 'normal' will initialize weights using the
typical truncated normal distribution. If 'xavier' will use the
xavier method.
normalization: Method for normalizing the features. Can be 'z_score'
or 'between_0_and_1'.
subdivide_physiology: A boolean. If True, will break the physiology
modality into smaller pieces, each of which can be missing
independently.
intelligent_noise: If True, the denoising autoencoder will drop out
modalities using a distribution designed to match that of the
training data. Otherwise it will drop modalities uniformly at
random.
num_modalities_to_drop: If not using intelligent noise, how many
modalities to drop
fill_missing_with: Value to use for filling the missing entries
throughout the data array.
mask_with: Value to use for blanking out whole modalities.
checkpoint_dir: The directly where the model will save checkpoints,
saved files containing trained network weights.
model_name: Name of the model being trained. Used in saving
model checkpoints.
extra_data_filename: A string where additional data that has been
genuinely corrupted with noise can be found. If provided, can
be used for extra testing.
data_loader: A DataLoader class object which already has pre-loaded
data.
classification_data_loader: A DataLoader class object which already
has pre-loaded classification data.
verbose: Set to True to see output statements about model construction
and training.
'''
# Hyperparameters
self.layer_sizes = layer_sizes
self.embedding_size = layer_sizes[-1]
self.tie_weights = tie_weights
self.variational = variational
self.batch_size = batch_size
self.learning_rate = learning_rate
self.dropout_prob = dropout_prob
self.weight_penalty = weight_penalty
self.weight_initialization = weight_initialization
self.classification_layer_sizes = classification_layer_sizes
self.classification_filename = classification_filename
self.normalization = normalization
self.fill_missing_with = fill_missing_with
self.mask_with = mask_with
self.clip_gradients = clip_gradients
self.activation_func = activation_func
self.loss_func = loss_func
self.decay = decay
self.decay_steps = decay_steps
self.decay_rate = decay_rate
self.optimizer = tf.train.AdamOptimizer
# Logistics
self.checkpoint_dir = checkpoint_dir
self.filename = filename
self.model_name = model_name
self.record_every_nth = 50
self.save_every_nth = 100000
self.subdivide_physiology = subdivide_physiology
self.intelligent_noise = intelligent_noise
self.num_modalities_to_drop = num_modalities_to_drop
self.extra_data_filename = extra_data_filename
self.verbose = verbose
# Override settings necessary for VAE
if self.variational:
if self.verbose: print "Building VAE. Will use 0-1 normalization, cross entropy loss, and will not tie weights.\n"
self.tie_weights = False
self.normalization = 'between_0_and_1'
self.loss_func = 'sigmoid_cross_entropy'
if self.normalization == 'z_score' and (loss_func == 'cross_entropy'
or loss_func == 'sigmoid_cross_entropy'):
print "ERROR! Cannot use cross entropy loss with z-score data. Changing normalization method to 0-1"
self.normalization = 'between_0_and_1'
# Extract the data from the filename
if data_loader is not None:
self.data_loader = data_loader
elif filename is not None:
self.data_loader = data_funcs.DataLoader(filename, supervised=False,
subdivide_physiology_features=subdivide_physiology,
normalize_and_fill=False,
normalization=self.normalization,
fill_missing_with=self.fill_missing_with)
else:
print "ERROR! Must set either filename or data_loader to a value so that MMAE has access to data."
return
self.extra_noisy_data_loader = None
if self.intelligent_noise:
print "Using intelligent noise"
self.noise_type_percentages = [ 0.64018104, 0.03168217, 0.25119437, 0.07694242]
self.noise_types = [[],
['call','sms','screen'],
['location'],
['location','call','sms','screen']]
if self.classification_layer_sizes is not None:
if self.verbose: print "Okay, preparing model to perform classification"
self.train_acc = []
self.val_acc = []
self.classification_train_loss = []
self.classification_val_loss = []
self.classification_learning_rate = .0001
self.classification_batch_size = 100
self.classification_dropout_prob = self.dropout_prob
self.classification_activation_func = self.activation_func
self.classification_weight_penalty = 0.0
self.classification_loss_func = 'sigmoid_cross_entropy'
if classification_data_loader is None:
self.classification_data_loader = data_funcs.DataLoader(self.classification_filename,
supervised=True,
subdivide_physiology_features=subdivide_physiology,
normalize_and_fill=False,
normalization=self.normalization,
fill_missing_with=self.fill_missing_with)
else:
self.classification_data_loader = classification_data_loader
# Set up tensorflow computation graph.
self.graph = tf.Graph()
self.build_graph()
# Set up and initialize tensorflow session.
self.initialize_session()
# Use for plotting evaluation.
self.train_loss = []
self.val_loss = []
def rebuild_reinitialize(self):
"""Resets the tensorflow graph to start training from scratch."""
# Set up tensorflow computation graph.
self.graph = tf.Graph()
self.build_graph()
# Set up and initialize tensorflow session.
self.initialize_session()
# Use for plotting evaluation.
self.train_loss = []
self.val_loss = []
def initialize_network_weights(self):
"""Constructs Tensorflow variables for the weights and biases
in each layer of the graph. These variables will be updated
as the network learns.
The number of layers and the sizes of each layer are defined
in the class's layer_sizes field.
"""
# Construct encoder and decoder layers.
enc_sizes = []
dec_sizes = []
self.encode_weights = []
self.decode_weights = []
self.encode_biases = []
self.decode_biases = []
for i in range(len(self.layer_sizes)):
if i==0:
input_len = self.data_loader.num_feats # X second dimension
else:
input_len = self.layer_sizes[i-1]
output_len = self.layer_sizes[i]
layer_weights = weight_variable([input_len, output_len],
name='weights' + str(i),
var_type=self.weight_initialization)
self.encode_weights.append(layer_weights)
if self.tie_weights:
self.decode_weights.append(tf.transpose(layer_weights))
else:
decode_weights_l = weight_variable([output_len, input_len],
name='decode_weights' + str(i),
var_type=self.weight_initialization)
self.decode_weights.append(decode_weights_l)
layer_biases_enc = bias_variable([output_len], name='encode_biases' + str(i))
layer_biases_dec = bias_variable([input_len], name='decode_biases' + str(i))
self.encode_biases.append(layer_biases_enc)
self.decode_biases.append(layer_biases_dec)
enc_sizes.append((str(input_len) + "x" + str(output_len), str(output_len)))
dec_sizes.append((str(output_len) + "x" + str(input_len), str(input_len)))
if self.variational:
self.variance_weights = weight_variable([self.layer_sizes[-2], self.embedding_size],
name='variance_weights',
var_type=self.weight_initialization)
self.variance_bias = bias_variable([self.embedding_size], name='variance_bias')
self.decode_weights.reverse()
self.decode_biases.reverse()
if self.verbose:
print("Okay, making a neural net with the following structure:")
dec_sizes.reverse()
print(enc_sizes + dec_sizes)
# Construct classification layers of the network if necessary.
if self.classification_layer_sizes is not None:
self.classification_weights = []
self.classification_biases = []
classif_sizes = []
for i in range(len(self.classification_layer_sizes)+1):
if i==0:
input_len = self.embedding_size
else:
input_len = self.classification_layer_sizes[i-1]
if i==len(self.classification_layer_sizes):
if self.classification_data_loader.num_labels is not None:
output_len = self.classification_data_loader.num_labels
else:
output_len = 2
else:
output_len = self.classification_layer_sizes[i]
layer_weights = weight_variable([input_len, output_len],
name='classification_weights' + str(i),
var_type=self.weight_initialization)
layer_biases = bias_variable([output_len], name='classification_biases' + str(i))
self.classification_weights.append(layer_weights)
self.classification_biases.append(layer_biases)
classif_sizes.append((str(input_len) + "x" + str(output_len), str(output_len)))
if self.verbose:
print("Okay, adding additional classification layers with the following structure:")
print(classif_sizes)
def build_graph(self):
"""Constructs the tensorflow computation graph containing all variables
that will be trained."""
if self.verbose: print '\nBuilding computation graph...'
with self.graph.as_default():
# Data placeholder
self.noisy_X = tf.placeholder(tf.float32, name="noisy_X")
self.true_X = tf.placeholder(tf.float32, name="true_X")
# Enhancements
self.tf_dropout_prob = tf.placeholder(tf.float32)
self.global_step = tf.Variable(0)
if self.decay:
self.tf_learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step,
self.decay_steps, self.decay_rate)
else:
self.tf_learning_rate = self.learning_rate
# Network weights/parameters that will be learned
self.initialize_network_weights()
# Pass data through the network to get an embedding
self.embedding = self.encode(self.noisy_X)
# If this is a Variational Autoencoder, need to sample embedding
# from Normal distribution with mean of the previous embedding and
# learned variance
if self.variational:
with tf.name_scope('sample_embedding'):
self.epsilon = tf.random_normal(tf.shape(self.log_var), 0, 1, name='epsilon')
self.embedding = self.embedding + self.epsilon * tf.exp(self.log_var)
# Pass embedding through the decode portion of the network
self.decoded_X = self.decode(self.embedding)
# Compute the reconstruction loss.
with tf.name_scope('loss'):
if self.loss_func == 'mean_squared':
self.squared_errors = tf.square(self.decoded_X - self.true_X)
self.reconstruction_loss = tf.sqrt(tf.reduce_mean(self.squared_errors))
elif self.loss_func == 'cross_entropy':
self.reconstruction_loss = - tf.reduce_sum(self.true_X * tf.log(self.decoded_X))
elif self.loss_func == 'sigmoid_cross_entropy':
self.reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(
logits=self.decoded_X, labels=self.true_X))
self.decoded_X = tf.nn.sigmoid(self.decoded_X)
# Add weight decay regularization term to loss.
with tf.name_scope('weight_regularization'):
self.reg_loss = self.weight_penalty * sum([tf.nn.l2_loss(w) for w in self.encode_weights])
self.reg_loss += self.weight_penalty * sum([tf.nn.l2_loss(w) for w in self.decode_weights])
if self.variational:
self.reg_loss += self.weight_penalty * tf.nn.l2_loss(self.variance_weights)
# Add variation loss, if necessary.
if self.variational:
with tf.name_scope('variational_loss'):
self.kl_divergence = -0.5 * tf.reduce_sum(1 + 2 * self.log_var
- tf.pow(self.embedding, 2)
- tf.exp(2 * self.log_var),
reduction_indices=1)
self.total_loss = tf.reduce_mean(self.reconstruction_loss + self.kl_divergence) + self.reg_loss
else:
self.total_loss = self.reconstruction_loss + self.reg_loss
# Training step with optimizer.
self.opt_step = self.optimizer(self.tf_learning_rate).minimize(self.total_loss)
# Additional classification layers
if self.classification_layer_sizes is not None:
self.build_classification_graph()
# Logistics
self.init = tf.global_variables_initializer()
def build_classification_graph(self):
"""Builds additional tensorflow graph layers to perform classification."""
with tf.name_scope('classification'):
# Data placeholders.
self.true_Y = tf.placeholder(tf.float32, name="true_Y")
self.int_true_Y = tf.cast(self.true_Y, dtype=tf.int32)
# Run classification portion of network from embedding.
self.logits = self.classify(self.embedding)
# Compute classification loss.
if self.classification_loss_func == 'sigmoid_cross_entropy':
self.classification_loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.true_Y))
else:
print "Using softmax CE loss for classification"
self.classification_loss = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.int_true_Y))
# Add weight decay regularization term to loss
self.classification_loss += self.classification_weight_penalty * sum([tf.nn.l2_loss(w) for w in self.classification_weights])
self.classification_opt_step = self.optimizer(self.classification_learning_rate).minimize(self.classification_loss)
# Code for making predictions and evaluating them.
self.class_probabilities = tf.nn.sigmoid(self.logits)
if self.classification_loss_func == 'sigmoid_cross_entropy':
self.predictions = tf.cast(tf.round(self.class_probabilities), dtype=tf.int32)
else:
self.predictions = tf.cast(tf.argmax(self.class_probabilities, axis=1), dtype=tf.int32)
self.correct_prediction = tf.equal(self.predictions, self.int_true_Y)
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
def encode(self, X):
"""Runs data through the encoder portion of the network.
Args:
X: A tensor containing input data.
Returns: A tensor embedding
"""
hidden = X
for i in range(len(self.encode_weights)):
with tf.name_scope('enc_layer' + str(i)) as scope:
if self.variational and i == len(self.encode_weights) - 1:
self.log_var = tf.matmul(hidden, self.variance_weights) + self.variance_bias
hidden = tf.matmul(hidden, self.encode_weights[i]) + self.encode_biases[i]
if i < len(self.encode_weights)-1:
# Apply activation function
hidden = self.apply_activation(hidden)
# Apply dropout
hidden = tf.nn.dropout(hidden, self.tf_dropout_prob)
return hidden
def apply_activation(self, X, activation_func=None):
"""Applies a particular tensorflow activation function based on
a string description.
Args:
X: A tensor to be passed through the activation function
activation_func: A string description. If None, will use the class
default.
"""
if activation_func is None:
activation_func = self.activation_func
if activation_func == 'relu':
return tf.nn.relu(X)
elif activation_func == 'tanh':
return tf.nn.tanh(X)
elif activation_func == 'softsign':
return tf.nn.softsign(X)
elif activation_func == 'softplus':
return tf.nn.softplus(X)
return X # linear
def decode(self, embedding):
"""Runs an embedding through the decoder portion of the network.
Args:
embedding: A tensor containing an embedding created by the encoder.
Returns: A tensor X', the decoded version of the embedding
"""
X = embedding
for i,w in enumerate(self.decode_weights):
with tf.name_scope('dec_layer' + str(i)) as scope:
# tf.matmul is a simple fully connected layer.
X = tf.matmul(X, w) + self.decode_biases[i]
if i < len(self.decode_weights)-1:
# Apply activation function
X = self.apply_activation(X)
# Apply dropout
X = tf.nn.dropout(X, self.tf_dropout_prob)
return X
def classify(self, embedding):
"""Runs an embedding vector through the classification layers of the
network.
Args:
embedding: A tensor embedding.
"""
X = embedding
for i,w in enumerate(self.classification_weights):
with tf.name_scope('classification_layer' + str(i)) as scope:
# tf.matmul is a simple fully connected layer.
X = tf.matmul(X, w) + self.classification_biases[i]
if i < len(self.decode_weights)-1:
# Apply activation function
X = self.apply_activation(X,
activation_func=self.classification_activation_func)
# Apply dropout
X = tf.nn.dropout(X, self.tf_dropout_prob)
return X
def initialize_session(self):
"""Initializes a tensorflow session and saver before training the network."""
self.session = tf.Session(graph=self.graph)
self.session.run(self.init)
with self.graph.as_default():
self.saver = tf.train.Saver()
def train(self, num_steps=30000, record_every_nth=None, save_every_nth=None):
"""Trains using stochastic gradient descent (SGD).
Runs batches of training data through the model for a given
number of steps.
Args:
num_steps: The number of times a batch of training data will be used
to train the network.
record_every_nth: The number of steps before it will evaluate and save
the current training and validation loss.
save_every_nth: The number of steps before it will save a checkpoint of
the model.
"""
self.set_record_save(record_every_nth, save_every_nth)
with self.graph.as_default():
for step in range(num_steps):
# Grab a batch of data to feed into the placeholders in the graph.
X = self.data_loader.get_unsupervised_train_batch(self.batch_size)
noisy_X = self.add_noise_to_batch(X)
feed_dict = {self.noisy_X: noisy_X, self.true_X: X,
self.tf_dropout_prob: self.dropout_prob}
# Output/save the training and validation performance every few steps.
if step % self.record_every_nth == 0:
train_loss, val_loss = self.evaluate_performance(feed_dict)
self.train_loss.append(train_loss)
self.val_loss.append(val_loss)
if self.verbose:
print "Training iteration", step
print "\t Training loss", train_loss
print "\t Validation loss", val_loss
if step > 0 and step % self.save_every_nth == 0:
# Save a checkpoint of the model
self.save_model()
# Update parameters in the direction of the gradient computed by
# the optimizer.
_ = self.session.run([self.opt_step], feed_dict)
def set_record_save(self, record_every_nth, save_every_nth):
"""Set the number of steps before the model records and saves its progress.
Args:
record_every_nth: The number of steps before it will evaluate and save
the current training and validation loss.
save_every_nth: The number of steps before it will save a checkpoint of
the model.
"""
if record_every_nth is not None:
self.record_every_nth = record_every_nth
if save_every_nth is not None:
self.save_every_nth = save_every_nth
def train_classification(self, num_steps=30000, record_every_nth=None, save_every_nth=None):
"""Perform supervised training of the embedding and classification layers via training labels.
Args:
num_steps: The number of times a batch of data will be used for training.
record_every_nth: The number of steps before it will evaluate and save
the current training and validation loss.
save_every_nth: The number of steps before it will save a checkpoint of
the model.
"""
self.set_record_save(record_every_nth, save_every_nth)
with self.graph.as_default():
for step in range(num_steps):
# Get a batch of training data and associated labels
X, Y = self.classification_data_loader.get_supervised_train_batch(self.classification_batch_size)
noisy_X = self.add_noise_to_batch(X)
feed_dict = {self.noisy_X: noisy_X, self.true_Y: Y,
self.tf_dropout_prob: self.classification_dropout_prob}
# Output/save the training and validation performance every few steps.
if step % self.record_every_nth == 0:
train_loss, train_acc, val_loss, val_acc = self.evaluate_classification_performance(feed_dict)
self.train_acc.append(train_acc)
self.val_acc.append(val_acc)
self.classification_train_loss.append(train_loss)
self.classification_val_loss.append(val_loss)
if self.verbose:
print "Training iteration", step
print "\t Training loss", train_loss
print "\t Validation loss", val_loss
print "\t Training accuracy", train_acc
print "\t Validation accuracy", val_acc
if step > 0 and step % self.save_every_nth == 0:
# Save a checkpoint of the model
self.save_model()
# Update parameters in the direction of the gradient computed by
# the optimizer.
_ = self.session.run([self.classification_opt_step], feed_dict)
def mask_modality(self, X, row, mod_i):
"""Given a design matrix X, will mask all data from the set of features
associated with modality mod_i in a certain row.
Args:
X: A numpy matrix.
row: The index of the row in which the modality should be masked.
mod_i: The index of the modality to mask.
Returns:
The X matrix with the modality masked.
"""
# Computes the column indices for the masked modality
start_i = self.data_loader.modality_start_indices[mod_i]
end_i = self.data_loader.modality_start_indices[mod_i+1]
# Masks the modality with the class's mask value, self.mask_with
X[row,start_i:end_i] = self.mask_with * np.ones(end_i-start_i)
return X
def add_noise_to_batch(self, X, missing_modes=[]):
"""Alters a batch of data X so that it contains noise.
Args:
X: A 2D numpy array containing the batch of data.
missing_modes: A list of modality indices used to specify which
modalities can go missing.
Returns:
A new numpy array containing the noisy batch.
"""
new_X = copy.deepcopy(X)
num_feats = np.shape(new_X)[1]
for i in range(len(new_X)):
# randomly 0 out 5% of the data
idx = np.random.choice(num_feats, size=int(num_feats*.05))
new_X[i,idx] = 0
# drop out a modality or modalities
if self.intelligent_noise:
# Intelligent or structured noise is used to make modalities go missing in the
# same proportion that they are missing in the real noisy data.
noise_i = np.argmax(np.random.multinomial(1, pvals=self.noise_type_percentages))
missing_modalities = self.noise_types[noise_i]
if len(missing_modes)>0:
missing_modalities = missing_modes
for m in missing_modalities:
mod_i = self.data_loader.modality_names.index(m)
new_X = self.mask_modality(new_X, i, mod_i)
else:
# If not using intelligent noise, just drop a modality randomly.
for _ in range(self.num_modalities_to_drop):
mod_i = np.random.randint(0, self.data_loader.num_modalities)
new_X = self.mask_modality(new_X, i, mod_i)
return new_X
def evaluate_performance(self, train_feed_dict=None):
"""Tests the reconstruction performance of the autoencoder on a batch of training
and validation data.
Args:
train_feed_dict: If a dictionary containing training data has already been
constructed in the caller function, can send it here for efficiency.
Returns:
2 floats: the training loss and the validation loss."""
# Get a batch of training data if one was not sent to the function.
if train_feed_dict is None:
X = self.data_loader.get_unsupervised_train_batch(self.batch_size)
train_feed_dict = {self.noisy_X: X, self.true_X: X, self.tf_dropout_prob: 1.0}
# Grab a batch of validation data too.
val_X = self.data_loader.get_unsupervised_val_batch(200)
noisy_val_X = self.add_noise_to_batch(val_X)
val_feed_dict = {self.noisy_X: noisy_val_X,
self.true_X: val_X,
self.tf_dropout_prob: 1.0} # no dropout during evaluation
# Compute the losses.
train_loss, step = self.session.run([self.reconstruction_loss,
self.global_step],
train_feed_dict)
val_loss = self.session.run(self.reconstruction_loss,
val_feed_dict)
# Normalize the loss by the size of the batch for comparison purposes
if 'entropy' in self.loss_func:
train_loss = train_loss/len(train_feed_dict[self.true_X])
val_loss = val_loss/len(val_X)
return train_loss, val_loss
def evaluate_classification_performance(self, train_feed_dict=None):
"""Tests the classification performance on training and validation data.
Args:
train_feed_dict: If a dictionary containing training data has already been
constructed in the caller function, can send it here for efficiency.
Returns:
4 floats: training loss, training accuracy, validation loss, validation
accuracy
"""
if train_feed_dict is None:
X, Y = self.classification_data_loader.get_supervised_train_batch(self.classification_batch_size)
train_feed_dict = {self.noisy_X: X, self.true_Y: Y,
self.tf_dropout_prob: self.dropout_prob}
val_X, val_Y = self.classification_data_loader.get_supervised_val_batch(200)
val_feed_dict = {self.noisy_X: val_X,
self.true_Y: val_Y,
self.tf_dropout_prob: 1.0} # no dropout during evaluation
train_loss, train_acc = self.session.run([self.classification_loss,
self.accuracy],train_feed_dict)
val_loss, val_acc = self.session.run([self.classification_loss,
self.accuracy], val_feed_dict)
return train_loss, train_acc, val_loss, val_acc
def save_model(self, file_name=None, directory=None):
"""Saves a checkpoint of the model and a .npz file with stored rewards.
Args:
file_name: String name to use for the checkpoint and rewards files.
Defaults to self.model_name if None is provided.
directory: Directory where the checkpoint will be saved. Defaults to
self.checkpoint_dir if None is provided.
"""
if self.verbose: print "Saving model..."
if file_name is None:
file_name = self.model_name
if directory is None:
directory = self.checkpoint_dir
else:
save_dir = directory + file_name
os.mkdir(save_dir)
directory = save_dir + '/'
save_loc = os.path.join(directory, file_name + '.ckpt')
training_epochs = len(self.train_loss) * self.record_every_nth
self.saver.save(self.session, save_loc, global_step=training_epochs)
npz_name = os.path.join(directory,
file_name + '-' + str(training_epochs))
np.savez(npz_name,
train_loss=self.train_loss,
val_loss=self.val_loss,
layer_sizes=self.layer_sizes,
variational=self.variational,
dropout_prob=self.dropout_prob,
weight_penalty=self.weight_penalty,
activation_func=self.activation_func,
loss_func=self.loss_func,
weight_initialization=self.weight_initialization)
def load_saved_model(self, directory=None, checkpoint_name=None,
npz_file_name=None):
"""Restores this model from a saved checkpoint.
Args:
directory: Path to directory where checkpoint is located. If
None, defaults to self.output_dir.
checkpoint_name: The name of the checkpoint within the
directory.
npz_file_name: The name of the .npz file where the stored
rewards are saved. If None, will not attempt to load stored
rewards.
"""
print "-----Loading saved model-----"
if directory is None:
directory = self.output_dir
if checkpoint_name is not None:
checkpoint_file = os.path.join(directory, checkpoint_name)
else:
checkpoint_file = tf.train.latest_checkpoint(directory)
print "Looking for checkpoin in directory", directory
if checkpoint_file is None:
print "Error! Cannot locate checkpoint in the directory"
return
else:
print "Found checkpoint file:", checkpoint_file
if npz_file_name is not None:
npz_file_name = os.path.join(directory, npz_file_name)
print "Attempting to load saved reward values from file", npz_file_name
npz_file = np.load(npz_file_name)
self.train_loss = list(npz_file['train_loss'])
self.val_loss = list(npz_file['val_loss'])
if self._print_if_saved_setting_differs(self.layer_sizes, 'layer_sizes', npz_file):
self.layer_sizes = npz_file['layer_sizes']
if self._print_if_saved_setting_differs(self.variational, 'variational', npz_file):
self.variational = npz_file['variational']
if self._print_if_saved_setting_differs(self.dropout_prob, 'dropout_prob', npz_file):
self.dropout_prob = npz_file['dropout_prob']
if self._print_if_saved_setting_differs(self.weight_penalty, 'weight_penalty', npz_file):
self.weight_penalty = npz_file['weight_penalty']
if self._print_if_saved_setting_differs(self.activation_func, 'activation_func', npz_file):
self.activation_func = npz_file['activation_func']
if self._print_if_saved_setting_differs(self.loss_func, 'loss_func', npz_file):
self.loss_func = npz_file['loss_func']
if self._print_if_saved_setting_differs(self.weight_initialization, 'weight_initialization', npz_file):
self.weight_initialization = npz_file['weight_initialization']
# Re-initializes the tensorflow graph based on the hyperparameters loaded from the file
self.graph = tf.Graph()
self.build_graph()
self.initialize_session()
self.saver.restore(self.session, checkpoint_file)
def _print_if_saved_setting_differs(self, class_var, setting_name, npz_file):
"""If the saved hyperparameter in an npz_file is different than the current
class setting, will print an error message.
Args:
class_var: The variable containing the current setting for this
hyperparameter in the class.
setting_name: The string name of the setting, used to index the npz_file
npz_file: An variable containing data loaded from an npz file
Returns:
A Boolean that will be true if the setting is different.
"""
if setting_name not in npz_file.keys():
print "ERROR! The setting", setting_name, "is not in the saved model file."
print "Using default value:", class_var
print ""
return False
equal = True
if type(class_var) is list:
if len(class_var) != len(npz_file[setting_name]):
equal = False
else:
for i in range(len(class_var)):
if class_var[i] != npz_file[setting_name][i]:
equal = False
elif class_var != npz_file[setting_name]:
equal = False
if not equal:
print "WARNING! Saved setting for", setting_name, "is different!"
print "\tModel's current value for", setting_name, "is", class_var
print "\tBut it was saved as", npz_file[setting_name]
print "Overwriting setting", setting_name, "with new value:", npz_file[setting_name]
print ""
return True
def set_classification_params(self, weight_penalty=None, learning_rate=None,
dropout_prob=None, activation_func=None, batch_size=None,
loss_func=None, suppress_warning=False):
"""Sets all of the model's classification hyperparameters if classification will be used. If
a hyperparameter is not included in the arguments will use the class default.
Note: if these settings are changed, the model we need to re-construct the computation graph,
erasing any learned weights that are unsaved.
Args:
weight_penalty: The strength of the L2 weight regularization penalty.
learning_rate: The initial learning rate used for training with classification.
dropout_prob: The probability that a node in the network will not
be dropped out during training. Set to < 1.0 to apply dropout,
1.0 to remove dropout.
activation_func: A string naming the activation function used in the classification
portion of the network.
batch_size: Number of samples in a classification training batch.
loss_func: The classification loss function to use.
suppress_warning: A Boolean which if true, will not print a statement warning that the graph
will be reset.
"""
self.classification_learning_rate = learning_rate if learning_rate is not None else self.classification_learning_rate
self.classification_dropout_prob = dropout_prob if dropout_prob is not None else self.classification_dropout_prob
self.classification_activation_func = activation_func if activation_func is not None else self.classification_activation_func
self.classification_weight_penalty = weight_penalty if weight_penalty is not None else self.classification_weight_penalty
self.classification_batch_size = batch_size if batch_size is not None else self.classification_batch_size
self.classification_loss_func = loss_func if loss_func is not None else self.classification_loss_func
if not suppress_warning:
print "In order for these changes to take effect, the model will now reconstruct the computation graph. Unsaved changes will be lost."
self.rebuild_reinitialize()
def predict(self, X):
"""Gets the autoencoder's reconstructed version of some data X
Args:
X: a matrix of data in the same format as the training
data.
Returns:
Reconstructed version of the data
"""
feed_dict = {self.noisy_X: X,
self.true_X: X,
self.tf_dropout_prob: 1.0} # no dropout during evaluation
reconstruction, loss = self.session.run([self.decoded_X,
self.reconstruction_loss],
feed_dict)
if 'entropy' in self.loss_func:
loss = loss / len(X)
return reconstruction, loss
def plot_training_progress(self):
"""Plots the training and validation performance as evaluated
throughout training.
"""
x = [self.record_every_nth * i for i in np.arange(len(self.train_loss))]
plt.figure()
plt.plot(x,self.train_loss)
plt.plot(x,self.val_loss)
plt.legend(['Train', 'Validation'], loc='best')
plt.xlabel('Training epoch')
plt.ylabel('Loss')
plt.show()
def plot_classification_training_progress(self):
"""Plots the training and validation progress as evaluated
throughout training the classification layers.
"""
x = [self.record_every_nth * i for i in np.arange(len(self.train_acc))]
plt.figure()
plt.plot(x,self.train_acc)
plt.plot(x,self.val_acc)
plt.legend(['Train', 'Validation'], loc='best')
plt.xlabel('Training epoch')
plt.ylabel('Accuracy')
plt.show()
x = [self.record_every_nth * i for i in np.arange(len(self.classification_train_loss))]
plt.figure()
plt.plot(x,self.classification_train_loss)
plt.plot(x,self.classification_val_loss)
plt.legend(['Train', 'Validation'], loc='best')
plt.xlabel('Training epoch')
plt.ylabel('Classification loss')
plt.show()
def test_on_validation(self):
"""Returns performance on the model's validation set.
Returns: Float loss"""
loss = self.get_performance_on_data(self.data_loader.val_X)
print "Final loss on validation data is:", loss
return loss
def test_on_test(self):
"""Returns performance on the model's test set.
Returns: Float loss"""
print "WARNING! Only test on the test set when you have finished choosing all of your hyperparameters!"
print "\tNever use the test set to choose hyperparameters!!!"