-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
172 lines (127 loc) · 6.04 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import os
import numpy as np
num_hidden_units = 300 #number of nodes in the hidden unit
minibatch_size = 100 #size of each mini_batch
regularization_rate = 0.01 #coefficient for regularization
learning_rate = 0.001 #coefficient to decide the rate of learning
#function to implement the ReLu (Rectified Linear Unit) function
def relu_function(matrix_content, matrix_dim_x, matrix_dim_y):
ret_vector = np.zeros((matrix_dim_x, matrix_dim_y))
for i in range(0, matrix_dim_x):
for j in range(0, matrix_dim_y):
if matrix_content[i, j]> 0:
ret_vector[i,j] = matrix_content[i,j]
else:
ret_vector[i,j] = 0
return ret_vector
#function to implement the gradient of ReLu (Rectified Linear Unit) function
def grad_relu(matrix_content, matrix_dim_x, matrix_dim_y):
ret_vector = np.zeros((matrix_dim_x, matrix_dim_y))
for i in range(matrix_dim_x):
for j in range(matrix_dim_y):
if matrix_content[i,j] > 0:
ret_vector[i,j] = 1
else:
ret_vector[i,j] = 0
return ret_vector
#function to implement Softmax
def softmax_function(vector_content):
return np.exp(vector_content - np.max(vector_content)) / np.sum(np.exp(vector_content - np.max(vector_content)), axis=0)
#function to create mini_batches while training the MLP model
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert inputs.shape[0] == targets.shape[0]
if shuffle:
indices = np.arange(inputs.shape[0])
np.random.shuffle(indices)
for start_idx in range(0, inputs.shape[0] - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
#function to train the MLP model
def train(trainX, trainY):
#initializing the parameters
w1_mat = np.random.randn(num_hidden_units, 28*28) *np.sqrt(2./(num_hidden_units+28*28))
w2_mat = np.random.randn(10, num_hidden_units) *np.sqrt(2./(10+num_hidden_units))
b1_vec = np.zeros((num_hidden_units, 1))
b2_vec = np.zeros((10, 1))
trainX = np.reshape(trainX, (trainX.shape[0], 28*28))
trainY = np.reshape(trainY, (trainY.shape[0], 1))
for num_epochs in range(25) :
if num_epochs%2==0:
print "Current epoch number : ", num_epochs
for batch in iterate_minibatches (trainX, trainY, minibatch_size, shuffle=True):
x_batch, y_batch = batch
x_batch = x_batch.T
y_batch = y_batch.T
#forward propagation to get the intermediate values and the final output value
z1 = np.dot(w1_mat, x_batch) + b1_vec
a1 = relu_function(z1, num_hidden_units, minibatch_size)
z2 = np.dot(w2_mat, a1) + b2_vec
a2_softmax = softmax_function(z2)
#ground truth used to find the cross_entropy error
gt_vector = np.zeros((10, minibatch_size))
for example_num in range(minibatch_size):
gt_vector[y_batch[0, example_num], example_num] = 1
#applying regularization to keep the magnitude of weights within a limit
d_w2_mat = regularization_rate*w2_mat
d_w1_mat = regularization_rate*w1_mat
#backpropagation
delta_2 = (a2_softmax - gt_vector)
d_w2_mat = d_w2_mat + np.dot(delta_2, (np.matrix(a1)).T)
d_b2_vec = np.sum(delta_2, axis = 1, keepdims=True)
delta_1 = np.multiply((np.dot(w2_mat.T, delta_2)), grad_relu(z1, num_hidden_units, minibatch_size))
d_w1_mat = d_w1_mat + np.dot(delta_1, np.matrix(x_batch).T)
d_b1_vec = np.sum(delta_1, axis = 1, keepdims=True)
d_w2_mat = d_w2_mat/minibatch_size
d_w1_mat = d_w1_mat/minibatch_size
d_b2_vec = d_b2_vec/minibatch_size
d_b1_vec= d_b1_vec/minibatch_size
w2_mat = w2_mat - learning_rate*d_w2_mat
b2_vec = b2_vec - learning_rate*d_b2_vec
w1_mat = w1_mat - learning_rate*d_w1_mat
b1_vec = b1_vec - learning_rate*d_b1_vec
#writing the final parameter values obtained in the folder "weights"
params_dir ="./weights"
fd_w1 = open(os.path.join(params_dir,'w1_values'), "w")
fd_b1 = open(os.path.join(params_dir,'b1_values'), "w")
fd_w2 = open(os.path.join(params_dir,'w2_values'), "w")
fd_b2 = open(os.path.join(params_dir,'b2_values'), "w")
w1_mat.tofile(fd_w1)
b1_vec.tofile(fd_b1)
w2_mat.tofile(fd_w2)
b2_vec.tofile(fd_b2)
fd_w1.close()
fd_b1.close()
fd_w2.close()
fd_b2.close()
#function to test the MLP model
def test(testX):
output_labels = np.zeros(testX.shape[0])
num_examples = testX.shape[0]
testX = np.reshape(testX, (num_examples, 28*28))
testX = testX.T
#read the parameter values
params_dir ="./weights"
fd_w1 = open(os.path.join(params_dir,'w1_values'))
fd_b1 = open(os.path.join(params_dir,'b1_values'))
fd_w2 = open(os.path.join(params_dir,'w2_values'))
fd_b2 = open(os.path.join(params_dir,'b2_values'))
loaded = np.fromfile(file=fd_w1, dtype=np.float)
w1_mat = loaded.reshape((num_hidden_units, 28*28)).astype(np.float)
loaded = np.fromfile(file=fd_b1, dtype=np.float)
b1_vec = loaded.reshape((num_hidden_units, 1)).astype(np.float)
loaded = np.fromfile(file=fd_w2, dtype=np.float)
w2_mat = loaded.reshape((10, num_hidden_units)).astype(np.float)
loaded = np.fromfile(file=fd_b2, dtype=np.float)
b2_vec = loaded.reshape((10, 1)).astype(np.float)
#forward propagation to get the predicted values
z1 = np.dot(w1_mat, testX) + b1_vec #b1_vec ->200 X 1
a1 = relu_function(z1, num_hidden_units, num_examples)
z2 = np.dot(w2_mat, a1) + b2_vec
a2_softmax = softmax_function(z2)
for i in range(num_examples):
pred_col = a2_softmax[:, [i]]
output_labels[i] = np.argmax(pred_col)
return output_labels