-
Notifications
You must be signed in to change notification settings - Fork 5
/
net.py
205 lines (181 loc) · 6.81 KB
/
net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# -*- coding:utf-8 -*-
"""
The network of SSD. SSD includes 4 modules:
body network for extracting features.
3 down_sample for generating multi scale features.
5 classification modules.
5 box predicting modules.
Utlize gulon redefine the network.
use net.hybridize()! Base class is nn.HybridBlock, and use nn.HybridSequential() construct a network.
In nn.HybridBlock, we should implement the hybrid_forward() method!
Symbol use the function, ndarray must can use! The name of function must be same!
"""
from mxnet.gluon import nn
from mxnet import sym
from mxnet import nd
from mxnet.contrib.ndarray import MultiBoxPrior
# from mxnet.contrib.symbol import MultiBoxPrior
# import utlized functions.
from utils import class_predictor, box_predictor, flatten_prediction, concat_predictions
# Main network for extracting features. Could be ResNet.
class Residual(nn.Block):
def __init__(self, channels, same_shape=True, **kwargs):
super(Residual, self).__init__(**kwargs)
self.same_shape = same_shape
strides = 1 if same_shape else 2
self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1,
strides=strides)
self.bn1 = nn.BatchNorm()
self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm()
if not same_shape:
self.conv3 = nn.Conv2D(channels, kernel_size=1,
strides=strides)
def forward(self, x):
out = nd.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
if not self.same_shape:
x = self.conv3(x)
return nd.relu(out + x)
'''
def Residual(channels, same_shape=True):
strides = 1 if same_shape else 2
out = nn.Sequential()
out.add(
nn.Conv2D(channels=channels, kernel_size=3, padding=1, strides=strides),
nn.BatchNorm(),
nn.Conv2D(channels=channels, kernel_size=3, padding=1), # default stride=1.
nn.BatchNorm()
)
if not same_shape:
out.add(nn.Conv2D(channels=channels, kernel_size=1, strides=strides))
return out
'''
# Have problem, loss is nan! This problem might come from the dim.
def ResNet():
# block 1
b1 = nn.Conv2D(channels=64, kernel_size=7, strides=2)
# block 2
b2 = nn.Sequential()
b2.add(
nn.MaxPool2D(pool_size=2, strides=2),
Residual(64),
Residual(64)
)
# block 3
b3 = nn.Sequential()
b3.add(
Residual(128, same_shape=False),
Residual(128)
)
# block 4
b4 = nn.Sequential()
b4.add(
Residual(256, same_shape=False),
Residual(256)
)
# block 5
b5 = nn.Sequential()
b5.add(
Residual(512, same_shape=False),
Residual(512)
)
# chain all blocks together
out = nn.Sequential()
out.add(b1, b2, b3, b4)
return out
# Down sample.
'''
Define a conv block for generating multi scale features, including two Conv-BatchNorm-Relu
blocks and then a pooling layer to halve the feature size.
'''
def down_sample(num_filters):
"""stack two Conv-BatchNorm-Relu blocks and then a pooling layer to halve the feature size"""
out = nn.Sequential()
for _ in range(2):
out.add(nn.Conv2D(channels=num_filters, kernel_size=3, strides=1, padding=1),
nn.BatchNorm(),
nn.Activation("relu")
)
out.add(nn.MaxPool2D(pool_size=2, strides=2))
return out
def body():
out = nn.Sequential()
for nfilters in [16, 32, 64]:
out.add(down_sample(num_filters=nfilters))
return out
# Build a SSD model.
'''
SSD includes 4 modules:
body network for extracting features.
3 down_sample for generating multi scale features.
5 classification modules.
5 box predicting modules.
The predictions are applied on the main(body) network output, halved module output(down_sample),
and finally the global pooled layer, respectively.
'''
def ssd_model(num_anchors, num_classes):
downsamplers = nn.Sequential()
for _ in range(3):
downsamplers.add(down_sample(128))
class_predictors = nn.Sequential()
box_predictors = nn.Sequential()
for _ in range(5):
class_predictors.add(class_predictor(num_anchors, num_classes))
box_predictors.add(box_predictor(num_anchors))
model = nn.Sequential()
model.add(body(), downsamplers, class_predictors, box_predictors)
return model
# Conpute prediction.
def ssd_forward(x, model, sizes, ratios, verbose=False):
body, downsamplers, class_predictors, box_predictors = model
anchors, class_preds, box_preds = [], [], []
# anchor box, class prediction, box prediction.
# feature extraction
x = body(x)
for i in range(5):
# predict
anchors.append(MultiBoxPrior(x, sizes=sizes[i], ratios=ratios[i]))
class_preds.append(flatten_prediction(class_predictors[i](x)))
box_preds.append(flatten_prediction(box_predictors[i](x)))
if verbose:
print('Predict scale', i, x.shape, 'with', anchors[-1].shape[1], 'anchors')
# down sample
if i < 3:
x = downsamplers[i](x)
elif i == 3:
# nn.GlobalMaxPool2D()
x = nd.Pooling(x, global_pool=True, pool_type="max", kernel=(x.shape[2], x.shape[3]))
# concat date
return (concat_predictions(anchors),
concat_predictions(class_preds),
concat_predictions(box_preds))
# class SSD, whole model.
class SSD(nn.Block):
def __init__(self, num_classes, sizes, ratios, verbose=False, **kwargs):
super(SSD, self).__init__(**kwargs)
# anchor box sizes and ratios for 5 feature scales.
self.sizes = sizes
self.ratios = ratios
self.num_classes = num_classes
self.verbose = verbose
num_anchors = len(self.sizes[0]) + len(self.ratios[0]) - 1
# use name_scope to guard the names
with self.name_scope():
self.model = ssd_model(num_anchors, self.num_classes)
def forward(self, x):
anchors, class_preds, box_preds = ssd_forward(
x, self.model, self.sizes, self.ratios, verbose=self.verbose)
# it is better to have class predictions reshaped for softmax computation
class_preds = class_preds.reshape(shape=(0, -1, self.num_classes+1))
return anchors, class_preds, box_preds
if __name__ == '__main__':
from mxnet import nd
# test network
sizes = [[.2, .272], [.37, .447], [.54, .619],
[.71, .79], [.88, .961]]
ratios = [[1, 2, .5]] * 5
net = SSD(num_classes=20, sizes=sizes, ratios=ratios)
net.initialize()
x = nd.random.uniform(shape=(1, 3, 300, 300))
y = net(x)