-
Notifications
You must be signed in to change notification settings - Fork 395
/
layers.py
389 lines (317 loc) · 14.4 KB
/
layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
# import keras
from tensorflow import keras
import tensorflow as tf
import numpy as np
class BatchNormalization(keras.layers.BatchNormalization):
"""
Identical to keras.layers.BatchNormalization, but adds the option to freeze parameters.
"""
def __init__(self, freeze, *args, **kwargs):
self.freeze = freeze
super(BatchNormalization, self).__init__(*args, **kwargs)
# set to non-trainable if freeze is true
self.trainable = not self.freeze
def call(self, inputs, training=None, **kwargs):
# return super.call, but set training
if not training:
return super(BatchNormalization, self).call(inputs, training=False)
else:
return super(BatchNormalization, self).call(inputs, training=(not self.freeze))
def get_config(self):
config = super(BatchNormalization, self).get_config()
config.update({'freeze': self.freeze})
return config
class wBiFPNAdd(keras.layers.Layer):
def __init__(self, epsilon=1e-4, **kwargs):
super(wBiFPNAdd, self).__init__(**kwargs)
self.epsilon = epsilon
def build(self, input_shape):
num_in = len(input_shape)
self.w = self.add_weight(name=self.name,
shape=(num_in,),
initializer=keras.initializers.constant(1 / num_in),
trainable=True,
dtype=tf.float32)
def call(self, inputs, **kwargs):
w = keras.activations.relu(self.w)
x = tf.reduce_sum([w[i] * inputs[i] for i in range(len(inputs))], axis=0)
x = x / (tf.reduce_sum(w) + self.epsilon)
return x
def compute_output_shape(self, input_shape):
return input_shape[0]
def get_config(self):
config = super(wBiFPNAdd, self).get_config()
config.update({
'epsilon': self.epsilon
})
return config
def bbox_transform_inv(boxes, deltas, mean=None, std=None):
"""
Applies deltas (usually regression results) to boxes (usually anchors).
Before applying the deltas to the boxes, the normalization that was previously applied (in the generator) has to be removed.
The mean and std are the mean and std as applied in the generator. They are unnormalized in this function and then applied to the boxes.
Args
boxes: np.array of shape (B, N, 4), where B is the batch size, N the number of boxes and 4 values for (x1, y1, x2, y2).
deltas: np.array of same shape as boxes. These deltas (d_x1, d_y1, d_x2, d_y2) are a factor of the width/height.
mean: The mean value used when computing deltas (defaults to [0, 0, 0, 0]).
std: The standard deviation used when computing deltas (defaults to [0.2, 0.2, 0.2, 0.2]).
Returns
A np.array of the same shape as boxes, but with deltas applied to each box.
The mean and std are used during training to normalize the regression values (networks love normalization).
"""
if mean is None:
mean = [0, 0, 0, 0]
if std is None:
std = [0.2, 0.2, 0.2, 0.2]
width = boxes[:, :, 2] - boxes[:, :, 0]
height = boxes[:, :, 3] - boxes[:, :, 1]
x1 = boxes[:, :, 0] + (deltas[:, :, 0] * std[0] + mean[0]) * width
y1 = boxes[:, :, 1] + (deltas[:, :, 1] * std[1] + mean[1]) * height
x2 = boxes[:, :, 2] + (deltas[:, :, 2] * std[2] + mean[2]) * width
y2 = boxes[:, :, 3] + (deltas[:, :, 3] * std[3] + mean[3]) * height
pred_boxes = keras.backend.stack([x1, y1, x2, y2], axis=2)
return pred_boxes
class ClipBoxes(keras.layers.Layer):
"""
Keras layer to clip box values to lie inside a given shape.
"""
def call(self, inputs, **kwargs):
image, boxes = inputs
shape = keras.backend.cast(keras.backend.shape(image), keras.backend.floatx())
height = shape[1]
width = shape[2]
x1 = tf.clip_by_value(boxes[:, :, 0], 0, width - 1)
y1 = tf.clip_by_value(boxes[:, :, 1], 0, height - 1)
x2 = tf.clip_by_value(boxes[:, :, 2], 0, width - 1)
y2 = tf.clip_by_value(boxes[:, :, 3], 0, height - 1)
return keras.backend.stack([x1, y1, x2, y2], axis=2)
def compute_output_shape(self, input_shape):
return input_shape[1]
class RegressBoxes(keras.layers.Layer):
"""
Keras layer for applying regression values to boxes.
"""
def __init__(self, mean=None, std=None, *args, **kwargs):
"""
Initializer for the RegressBoxes layer.
Args
mean: The mean value of the regression values which was used for normalization.
std: The standard value of the regression values which was used for normalization.
"""
if mean is None:
mean = np.array([0, 0, 0, 0], dtype='float32')
if std is None:
std = np.array([0.2, 0.2, 0.2, 0.2], dtype='float32')
if isinstance(mean, (list, tuple)):
mean = np.array(mean)
elif not isinstance(mean, np.ndarray):
raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))
if isinstance(std, (list, tuple)):
std = np.array(std)
elif not isinstance(std, np.ndarray):
raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))
self.mean = mean
self.std = std
super(RegressBoxes, self).__init__(*args, **kwargs)
def call(self, inputs, **kwargs):
anchors, regression = inputs
return bbox_transform_inv(anchors, regression, mean=self.mean, std=self.std)
def compute_output_shape(self, input_shape):
return input_shape[0]
def get_config(self):
config = super(RegressBoxes, self).get_config()
config.update({
'mean': self.mean.tolist(),
'std': self.std.tolist(),
})
return config
def filter_detections(
boxes,
classification,
class_specific_filter=True,
nms=True,
score_threshold=0.01,
max_detections=300,
nms_threshold=0.5
):
"""
Filter detections using the boxes and classification values.
Args
boxes: Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format.
classification: Tensor of shape (num_boxes, num_classes) containing the classification scores.
other: List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores.
class_specific_filter: Whether to perform filtering per class, or take the best scoring class and filter those.
nms: Flag to enable/disable non maximum suppression.
score_threshold: Threshold used to prefilter the boxes with.
max_detections: Maximum number of detections to keep.
nms_threshold: Threshold for the IoU value to determine when a box should be suppressed.
Returns
A list of [boxes, scores, labels, other[0], other[1], ...].
boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes.
scores is shaped (max_detections,) and contains the scores of the predicted class.
labels is shaped (max_detections,) and contains the predicted label.
other[i] is shaped (max_detections, ...) and contains the filtered other[i] data.
In case there are less than max_detections detections, the tensors are padded with -1's.
"""
def _filter_detections(scores_, labels_):
# threshold based on score
# (num_score_keeps, 1)
indices_ = tf.where(keras.backend.greater(scores_, score_threshold))
if nms:
# (num_score_keeps, 4)
filtered_boxes = tf.gather_nd(boxes, indices_)
# In [4]: scores = np.array([0.1, 0.5, 0.4, 0.2, 0.7, 0.2])
# In [5]: tf.greater(scores, 0.4)
# Out[5]: <tf.Tensor: id=2, shape=(6,), dtype=bool, numpy=array([False, True, False, False, True, False])>
# In [6]: tf.where(tf.greater(scores, 0.4))
# Out[6]:
# <tf.Tensor: id=7, shape=(2, 1), dtype=int64, numpy=
# array([[1],
# [4]])>
#
# In [7]: tf.gather(scores, tf.where(tf.greater(scores, 0.4)))
# Out[7]:
# <tf.Tensor: id=15, shape=(2, 1), dtype=float64, numpy=
# array([[0.5],
# [0.7]])>
filtered_scores = keras.backend.gather(scores_, indices_)[:, 0]
# perform NMS
nms_indices = tf.image.non_max_suppression(filtered_boxes, filtered_scores, max_output_size=max_detections,
iou_threshold=nms_threshold)
# filter indices based on NMS
# (num_score_nms_keeps, 1)
indices_ = keras.backend.gather(indices_, nms_indices)
# add indices to list of all indices
# (num_score_nms_keeps, )
labels_ = tf.gather_nd(labels_, indices_)
# (num_score_nms_keeps, 2)
indices_ = keras.backend.stack([indices_[:, 0], labels_], axis=1)
return indices_
if class_specific_filter:
all_indices = []
# perform per class filtering
for c in range(int(classification.shape[1])):
scores = classification[:, c]
labels = c * tf.ones((keras.backend.shape(scores)[0],), dtype='int64')
all_indices.append(_filter_detections(scores, labels))
# concatenate indices to single tensor
# (concatenated_num_score_nms_keeps, 2)
indices = keras.backend.concatenate(all_indices, axis=0)
else:
scores = keras.backend.max(classification, axis=1)
labels = keras.backend.argmax(classification, axis=1)
indices = _filter_detections(scores, labels)
# select top k
scores = tf.gather_nd(classification, indices)
labels = indices[:, 1]
scores, top_indices = tf.nn.top_k(scores, k=keras.backend.minimum(max_detections, keras.backend.shape(scores)[0]))
# filter input using the final set of indices
indices = keras.backend.gather(indices[:, 0], top_indices)
boxes = keras.backend.gather(boxes, indices)
labels = keras.backend.gather(labels, top_indices)
# zero pad the outputs
pad_size = keras.backend.maximum(0, max_detections - keras.backend.shape(scores)[0])
boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
scores = tf.pad(scores, [[0, pad_size]], constant_values=-1)
labels = tf.pad(labels, [[0, pad_size]], constant_values=-1)
labels = keras.backend.cast(labels, 'int32')
# set shapes, since we know what they are
boxes.set_shape([max_detections, 4])
scores.set_shape([max_detections])
labels.set_shape([max_detections])
return [boxes, scores, labels]
class FilterDetections(keras.layers.Layer):
"""
Keras layer for filtering detections using score threshold and NMS.
"""
def __init__(
self,
nms=True,
class_specific_filter=True,
nms_threshold=0.5,
score_threshold=0.01,
max_detections=300,
parallel_iterations=32,
**kwargs
):
"""
Filters detections using score threshold, NMS and selecting the top-k detections.
Args
nms: Flag to enable/disable NMS.
class_specific_filter: Whether to perform filtering per class, or take the best scoring class and filter those.
nms_threshold: Threshold for the IoU value to determine when a box should be suppressed.
score_threshold: Threshold used to prefilter the boxes with.
max_detections: Maximum number of detections to keep.
parallel_iterations: Number of batch items to process in parallel.
"""
self.nms = nms
self.class_specific_filter = class_specific_filter
self.nms_threshold = nms_threshold
self.score_threshold = score_threshold
self.max_detections = max_detections
self.parallel_iterations = parallel_iterations
super(FilterDetections, self).__init__(**kwargs)
def call(self, inputs, **kwargs):
"""
Constructs the NMS graph.
Args
inputs : List of [boxes, classification, other[0], other[1], ...] tensors.
"""
boxes = inputs[0]
classification = inputs[1]
# wrap nms with our parameters
def _filter_detections(args):
boxes_ = args[0]
classification_ = args[1]
return filter_detections(
boxes_,
classification_,
nms=self.nms,
class_specific_filter=self.class_specific_filter,
score_threshold=self.score_threshold,
max_detections=self.max_detections,
nms_threshold=self.nms_threshold,
)
# call filter_detections on each batch item
outputs = tf.map_fn(
_filter_detections,
elems=[boxes, classification],
dtype=[keras.backend.floatx(), keras.backend.floatx(), 'int32'],
parallel_iterations=self.parallel_iterations
)
return outputs
def compute_output_shape(self, input_shape):
"""
Computes the output shapes given the input shapes.
Args
input_shape : List of input shapes [boxes, classification].
Returns
List of tuples representing the output shapes:
[filtered_boxes.shape, filtered_scores.shape, filtered_labels.shape, filtered_other[0].shape, filtered_other[1].shape, ...]
"""
return [
(input_shape[0][0], self.max_detections, 4),
(input_shape[1][0], self.max_detections),
(input_shape[1][0], self.max_detections),
]
def compute_mask(self, inputs, mask=None):
"""
This is required in Keras when there is more than 1 output.
"""
return (len(inputs) + 1) * [None]
def get_config(self):
"""
Gets the configuration of this layer.
Returns
Dictionary containing the parameters of this layer.
"""
config = super(FilterDetections, self).get_config()
config.update({
'nms': self.nms,
'class_specific_filter': self.class_specific_filter,
'nms_threshold': self.nms_threshold,
'score_threshold': self.score_threshold,
'max_detections': self.max_detections,
'parallel_iterations': self.parallel_iterations,
})
return config