-
Notifications
You must be signed in to change notification settings - Fork 0
/
finger_resnet_batch.py
361 lines (316 loc) · 15.6 KB
/
finger_resnet_batch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
import numpy as np
import tensorflow as tf
import time
from data_loaders import FingerDataBatch
class ResNet:
def __init__(self, blocks, batchsize, epoch_num, dropout=.5, lr=.001, augment=True, normalize=True, debug=False):
"""
Defines the hyperparameters, creates the datasets, and Tensorflow placeholders
"""
self.start = time.time()
tf.reset_default_graph()
# Initialize the hyperparameters
self.batch_size = batchsize
self.learning_rate = lr
self.epochs = epoch_num
self.debug = debug
self.do_augment = augment
self.normalize = normalize
self.blocks = blocks
# Batch normalization parameters
self.norm_beta = 0.0
self.norm_gamma = 1.0
self.norm_epsilon = 0.001
self.means = tf.Variable(tf.zeros([1,1,1,4]), dtype=tf.float32, trainable=False, name='means')
self.varis = tf.Variable(tf.zeros([1,1,1,4]), dtype=tf.float32, trainable=False, name='variances')
# Create the data class
if self.debug: print("Loading data...")
self.data = FingerDataBatch(self.batch_size)
# Create data sets and a session that can be accessed throughout the class
self.session = tf.Session()
# Defines placeholders and overall network variables
self.images = tf.placeholder(tf.float32, [None, 300, 300, 3], name='images')
self.labels = tf.placeholder(tf.int32, [None], name='labels')
self.is_training = tf.placeholder_with_default(1, [], name='is_training')
self.dropout_rate = tf.placeholder(tf.float32, shape=[], name="dropout_rate")
def one_hot(self, lbls):
"""
Transforms labels into one hot vectors via TF node.
Args:
lbls: Labels to be converted.
Returns:
one_hots: One hot encoded labels.
"""
one_hots = tf.one_hot(lbls, 5)
return one_hots
def augment(self, inpt):
"""
Augments the data through random brightness, contrast, and flipping changes.
Args:
inpt: Images to be augmented.
Returns:
bright: Augmented images to be passed on.
"""
flip = tf.map_fn(lambda img: tf.image.random_flip_left_right(img), inpt, name='flip')
bright = tf.map_fn(lambda img: tf.image.random_brightness(img, .25), flip, name='brightness')
# contrast = tf.map_fn(lambda img: tf.image.random_contrast(img, 0, .5), bright, name='contrast')
return bright
def normalize_images(self, inpt):
"""
Normalizes images PER IMAGE by subtracting mean and dividing by standard deviation
Args:
inpt: Images to be normalized
Returns:
Normalized images
"""
return tf.map_fn(lambda img: tf.image.per_image_standardization(img), inpt, name='normalization')
def flatten(self, inpt):
"""
Flattens a tensor into 1 dimension. Used after the convolutional layers are done.
Args:
inpt: Tensor to be flattened.
Returns:
flat: Flattened tensor.
"""
shape = inpt.get_shape().as_list()
flat = tf.reshape(inpt, [-1, shape[1] * shape[2] * shape[3]], name='flatten')
return flat
def batch_normalize(self, inpt):
"""
Normalizes the batches. If in training, uses batch norm, if in test, uses global mean and variance found
over the course of training.
Args:
inpt: Images to be batch normalized.
Returns:
Batch normalized images.
"""
def training(inpt):
mean, var = tf.nn.moments(inpt, [1, 2, 3], keep_dims=True)
tf.assign(self.means, self.means + mean, name='update_mean')
tf.assign(self.varis, self.varis + var, name='update_varis')
return tf.nn.batch_normalization(inpt, mean, var, self.norm_beta, self.norm_gamma, self.norm_epsilon, name='batchnorm')
def not_training(inpt):
mean = tf.reduce_mean(self.means, name='global_mean')
var = tf.reduce_mean(self.varis, name='global_var')
return tf.nn.batch_normalization(inpt, mean, var, self.norm_beta, self.norm_gamma, self.norm_epsilon, name='batchnorm_global')
return tf.cond(tf.equal(self.is_training, 1), lambda: training(inpt), lambda: not_training(inpt), name='batchnorm_cond')
def max_pool(self, inpt):
"""
A 2x2 strided max pooling operation
Args:
inpt: Images to be max pooled
Returns:
Max pooled images
"""
return tf.nn.max_pool(inpt, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME", name='maxpool')
def convolution_layer(self, inpt, filters, in_depth=1):
"""
3x3 Convolutional layer.
Args:
inpt: Images to be convolved.
filters: Number of filter kernels to create
in_depth: The number of incoming filter maps/channels
Returns:
Convolved images.
"""
conv_weight = tf.get_variable("conv_weights", [3, 3, in_depth, filters],
initializer=tf.random_normal_initializer(stddev=.001))
conv_bias = tf.get_variable("conv_biases", filters, initializer=tf.constant_initializer(0.0))
# Not relu'd because the last layer is relu'd after concatenation with the residual
return tf.nn.conv2d(inpt, conv_weight, strides=[1, 1, 1, 1], padding="SAME") + conv_bias
def convolution_layer_1x1(self, inpt):
"""
1x1 Convolutional layer. Changes filter map depth to 1.
Args:
inpt: Images to be convolved.
Returns:
Convolved images with 1 filter map.
"""
conv_weight = tf.get_variable("1x1_conv_weights", [1, 1, inpt.shape[3], 1],
initializer=tf.random_normal_initializer(stddev=.001))
conv_bias = tf.get_variable("1x1_conv_biases", 1, initializer=tf.constant_initializer(0.0))
return tf.nn.relu((tf.nn.conv2d(inpt, conv_weight, strides=[1, 1, 1, 1], padding="SAME") + conv_bias))
def residual_block(self, inpt, name):
"""
The ResNet part: creates two convolutional layers and adds the input of the block to the output.
Since the channels of the input is larger than 1, it is "flattened" by doing a 1x1 convolution before adding it.
Args:
inpt: Images to be convolved.
Returns:
Convolved images.
"""
# Incoming input will always have multiple channels
with tf.variable_scope("residual_conv"+name):
residual = self.convolution_layer_1x1(inpt)
residual = self.batch_normalize(residual)
with tf.variable_scope("conv{}-1".format(name)):
first_layer = self.convolution_layer(inpt, 16, inpt.shape[3])
first_layer = tf.nn.relu(first_layer)
first_layer = self.batch_normalize(first_layer)
with tf.variable_scope("conv{}-2".format(name)):
second_layer = self.convolution_layer(first_layer, 8, 16) # 32->8 due to OOM
second_layer = self.batch_normalize(second_layer)
res_connect = tf.add(second_layer, residual)
if self.debug: print("CONV BLOCK CREATED")
return tf.nn.relu(res_connect)
def fully_connected(self, inpt, neurons):
"""
A fully connected layer with dropout.
Args:
inpt: Input to be fed forward.
neurons: How many neurons to create in the layer
Returns:
Output of layer.
"""
def keep(a): # Helper function to return activation for the tf.cond
return a
fc_weight = tf.get_variable("fc_weights", [inpt.shape[1], neurons],
initializer=tf.random_normal_initializer(stddev=0.001))
fc_bias = tf.get_variable("fc_biases", [neurons], initializer=tf.constant_initializer(0.0))
activation = tf.nn.relu((tf.matmul(inpt, fc_weight) + fc_bias))
# Only dropout when training
dropped = tf.cond(tf.equal(self.is_training, 1),
lambda: tf.nn.dropout(activation, self.dropout_rate),
lambda: keep(activation), name='dropout_cond')
return dropped
def output_layer(self, inpt):
"""
The output layer which returns the 5 classes as a vector for input into the softmax cross entropy function.
Args:
inpt: Input to be fed forward.
Returns:
Output of network in logits form.
"""
out_weight = tf.get_variable("out_weights", [inpt.shape[1], 5],
initializer=tf.random_normal_initializer(stddev=0.001))
out_bias = tf.get_variable("out_biases", [5], initializer=tf.constant_initializer(0.0))
activation = tf.matmul(inpt, out_weight) + out_bias
return activation
def inference(self):
"""
Feeds forward the images through the network.
Returns:
Output of network in logits form.
"""
# Alter brightness and contrast randomly
if self.do_augment:
images = self.augment(self.images)
else:
images = self.images # for compatibility with the above code
if self.normalize:
images = self.normalize_images(images)
# First convolutional layer
with tf.variable_scope("conv"):
first_conv = self.convolution_layer(images, 32, 3)
throughput = self.max_pool(first_conv)
# Residual blocks
for i, block in enumerate(range(self.blocks)):
with tf.variable_scope("conv_block{}".format(i + 1)):
throughput = self.residual_block(throughput, str(i))
# Fully connected layer
with tf.variable_scope("fc"):
flattened = self.flatten(throughput)
fc = self.fully_connected(flattened, 100)
# Output layer
with tf.variable_scope("output"):
output = self.output_layer(fc)
return output
def loss_function(self, logits):
"""
Cross entropy loss
Args:
logits: output of the network, unactivated
Returns:
Mean loss
"""
return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.one_hot_labels, logits=logits))
def backprop(self, output):
"""
The backpropagation step with Adam optimizer. Also computes metrics and creates summary statistics.
"""
self.one_hot_labels = self.one_hot(self.labels)
self.loss = self.loss_function(output)
tf.summary.scalar("Loss", self.loss)
with tf.variable_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
self.minimize_loss = optimizer.minimize(self.loss)
with tf.variable_scope("accuracy"):
# Accuracy calculation: see if the position of the maximum value of the label is the same as the maximum value of the output
correct_bools = tf.equal(tf.argmax(output, 1), tf.argmax(self.one_hot_labels, 1))
# If so, it is a one, if not, it is a zero. Take the average of those 1s and 0s to get the accuracy
self.accuracy = tf.reduce_mean(tf.cast(correct_bools, tf.float32))
tf.summary.scalar("Accuracy", self.accuracy)
self.merged_summaries = tf.summary.merge_all()
def train(self, continue_training=False):
"""
Train the network with the training data.
Can be used to pick up training at a later date with continue_training.
Args:
continue_training: Set to true to load previous weights and then train
"""
print("Generating dataflow graph...")
output = self.inference()
self.backprop(output)
print("Dataflow graph complete.")
# name of the run e.g. cb4b100d5lr001train = training for 4 conv blocks, 100 batch size, dropout 50%, learning rate .001
run = 'cb{}b{}d{}lr{}'.format(self.blocks, self.batch_size, str(self.dropout_rate)[2:], str(self.learning_rate)[2:])
train_writer = tf.summary.FileWriter("./summaries/"+run+"train", tf.get_default_graph())
validation_writer = tf.summary.FileWriter("./summaries/"+run+"validation", tf.get_default_graph())
print("Data writers created.")
saver = tf.train.Saver(max_to_keep=5)
if continue_training:
print("Continuing training...")
saver.restore(self.session, tf.train.latest_checkpoint('./checkpoints/train'))
print("Weights restored...")
else:
self.session.run(tf.global_variables_initializer())
step = 0
prev_acc = 0
t_size, v_size, tt_size = self.data.get_sizes()
print("Ready for training...")
for e in range(self.epochs):
print("Epoch:", e+1)
for b in range(t_size // self.batch_size):
x, y = self.data.get_training_batch()
summary, _acc, _loss, _ = self.session.run([self.merged_summaries, self.accuracy, self.loss,
self.minimize_loss], feed_dict={self.images: x,
self.labels: y,
self.is_training: 1,
self.dropout_rate: .5})
train_writer.add_summary(summary, step)
if self.debug: print(_acc)
# Validate every 50 steps
# if step % 50 == 0:
# for bv in range(v_size // self.batch_size):
# v_x, v_y = self.data.get_validation_batch()
# # is_training is left on to allow us to use all available data to better find the real moments
# summary, v_acc, _loss = self.session.run([self.merged_summaries, self.accuracy, self.loss],
# feed_dict={self.images: v_x, self.labels: v_y,
# self.is_training: 1, self.dropout_rate: 1})
# validation_writer.add_summary(summary, step)
# print("Current STEP:", step)
# print("Validation accuracy:", v_acc)
# print("Validation loss:", _loss)
# if v_acc > prev_acc: #and step WHAT IS AND STEP?
# saver.save(self.session, "./checkpoints/fingers-{:.2f}-step".format(v_acc), global_step=step)
# prev_acc = v_acc
step += 1
saver.save(self.session, "./checkpoints/fingers-{:.2f}-end-step".format(v_acc), global_step=step)
self.data.delete_temp_files()
end = time.time()
print("Finished.")
print('\nTotal time elapsed: {:.2f} minutes'.format(end - self.start))
def test(self):
"""
Test on the test data. Only use when training is complete.
Returns:
test_acc: Test accuracy.
"""
raise NotImplementedError
# saver = tf.train.Saver()
# saver.restore(self.session, tf.train.latest_checkpoint('./checkpoints/train'))
# saver = tf.train.import_meta_graph(checkpoints_file_name + '.meta')
# saver.restore(sess, checkpoints_file_name)
if __name__ == "__main__":
model = ResNet(blocks=4, batchsize=64, epoch_num=10, dropout=.5, lr=.001, debug=False)
model.train()