-
Notifications
You must be signed in to change notification settings - Fork 1
/
multiclass.py
349 lines (272 loc) · 11.2 KB
/
multiclass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
from submission import Submission
from logreg import sigmoid
from logreg import hypothesis
from utils import add_intercept
class MultiClassGrader(Submission):
X = np.stack([np.ones(20),
np.exp(1) * np.sin(np.arange(1, 21)),
np.exp(0.5) * np.cos(np.arange(1, 21))], axis=1)
y = (np.sin(X[:, 0] + X[:, 1]) > 0).astype(float)
Xm = np.array([[-1, -1],
[-1, -2],
[-2, -1],
[-2, -2],
[1, 1],
[1, 2],
[2, 1],
[2, 2],
[-1, 1],
[-1, 2],
[-2, 1],
[-2, 2],
[1, -1],
[1, -2],
[-2, -1],
[-2, -2]])
ym = np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
t1 = np.sin(np.reshape(np.arange(1, 25, 2), (4, 3), order='F'))
t2 = np.cos(np.reshape(np.arange(1, 41, 2), (4, 5), order='F'))
def __init__(self):
part_names = ['Regularized Logistic Regression',
'One-vs-All Classifier Training',
'One-vs-All Classifier Prediction',
'Neural Network Prediction Function']
super().__init__('multi-class-classification-and-neural-networks', part_names)
def __iter__(self):
for part_id in range(1, 5):
try:
func = self.functions[part_id]
# Each part has different expected arguments/different function
if part_id == 1:
res = func(np.array([0.25, 0.5, -0.5]), self.X, self.y, 0.1)
res = np.hstack(res).tolist()
elif part_id == 2:
res = func(self.Xm, self.ym, 4, 0.1)
elif part_id == 3:
res = func(self.t1, self.Xm) + 1
elif part_id == 4:
res = func(self.t1, self.t2, self.Xm) + 1
else:
raise KeyError
yield part_id, res
except KeyError:
yield part_id, 0
def lr_cost_function(theta, X, y, lambda_):
"""
Computes the cost of using theta as the parameter for regularized
logistic regression and the gradient of the cost w.r.t. to the parameters.
Parameters
----------
theta : array_like
Logistic regression parameters. A vector with shape (n, ). n is
the number of features including any intercept.
X : array_like
The data set with shape (m x n). m is the number of examples, and
n is the number of features (including intercept).
y : array_like
The data labels. A vector with shape (m, ).
lambda_ : float
The regularization parameter.
Returns
-------
J : float
The computed value for the regularized cost function.
grad : array_like
A vector of shape (n, ) which is the gradient of the cost
function with respect to theta, at the current values of theta.
Instructions
------------
Compute the cost of a particular choice of theta. You should set J to the cost.
Compute the partial derivatives and set grad to the partial
derivatives of the cost w.r.t. each parameter in theta
Hint 1
------
The computation of the cost function and gradients can be efficiently
vectorized. For example, consider the computation
sigmoid(X * theta)
Each row of the resulting matrix will contain the value of the prediction
for that example. You can make use of this to vectorize the cost function
and gradient computations.
Hint 2
------
When computing the gradient of the regularized cost function, there are
many possible vectorized solutions, but one solution looks like:
grad = (unregularized gradient for logistic regression)
temp = theta
temp[0] = 0 # because we don't add anything for j = 0
grad = grad + YOUR_CODE_HERE (using the temp variable)
Hint 3
------
We have provided the implementatation of the sigmoid function within
the file `utils.py`. At the start of the notebook, we imported this file
as a module. Thus to access the sigmoid function within that file, you can
do the following: `utils.sigmoid(z)`.
"""
# Initialize some useful values
m = y.size
# convert labels to ints if their type is bool
if y.dtype == bool:
y = y.astype(int)
# You need to return the following variables correctly
J = 0
grad = np.zeros(theta.shape)
# ====================== YOUR CODE HERE ======================
# ============================================================
return J, grad
def one_vs_all(X, y, num_labels, lambda_):
"""
Trains num_labels logistic regression classifiers and returns
each of these classifiers in a matrix all_theta, where the i-th
row of all_theta corresponds to the classifier for label i.
Parameters
----------
X : array_like
The input dataset of shape (m x n). m is the number of
data points, and n is the number of features. Note that we
do not assume that the intercept term (or bias) is in X, however
we provide the code below to add the bias term to X.
y : array_like
The data labels. A vector of shape (m, ).
num_labels : int
Number of possible labels.
lambda_ : float
The logistic regularization parameter.
Returns
-------
all_theta : array_like
The trained parameters for logistic regression for each class.
This is a matrix of shape (K x n+1) where K is number of classes
(ie. `numlabels`) and n is number of features without the bias.
Instructions
------------
You should complete the following code to train `num_labels`
logistic regression classifiers with regularization parameter `lambda_`.
Hint
----
You can use y == c to obtain a vector of 1's and 0's that tell you
whether the ground truth is true/false for this class.
Note
----
For this assignment, we recommend using `scipy.optimize.minimize(method='CG')`
to optimize the cost function. It is okay to use a for-loop
(`for c in range(num_labels):`) to loop over the different classes.
Example Code
------------
# Set Initial theta
initial_theta = np.zeros(n + 1)
# Set options for minimize
options = {'maxiter': 50}
# Run minimize to obtain the optimal theta. This function will
# return a class object where theta is in `res.x` and cost in `res.fun`
res = optimize.minimize(lr_cost_function,
initial_theta,
(X, (y == c), lambda_),
jac=True,
method='TNC',
options=options)
"""
# Some useful variables
m, n = X.shape
# You need to return the following variables correctly
all_theta = np.zeros((num_labels, n + 1))
# Add ones to the X data matrix
X = np.concatenate([np.ones((m, 1)), X], axis=1)
# ====================== YOUR CODE HERE ======================
# ============================================================
return all_theta
def predict_one_vs_all(all_theta, X):
"""
Return a vector of predictions for each example in the matrix X.
Note that X contains the examples in rows. all_theta is a matrix where
the i-th row is a trained logistic regression theta vector for the
i-th class. You should set p to a vector of values from 0..K-1
(e.g., p = [0, 2, 0, 1] predicts classes 0, 2, 0, 1 for 4 examples) .
Parameters
----------
all_theta : array_like
The trained parameters for logistic regression for each class.
This is a matrix of shape (K x n+1) where K is number of classes
and n is number of features without the bias.
X : array_like
Data points to predict their labels. This is a matrix of shape
(m x n) where m is number of data points to predict, and n is number
of features without the bias term. Note we add the bias term for X in
this function.
Returns
-------
p : array_like
The predictions for each data point in X. This is a vector of shape (m, ).
Instructions
------------
Complete the following code to make predictions using your learned logistic
regression parameters (one-vs-all). You should set p to a vector of predictions
(from 0 to num_labels-1).
Hint
----
This code can be done all vectorized using the numpy argmax function.
In particular, the argmax function returns the index of the max element,
for more information see '?np.argmax' or search online. If your examples
are in rows, then, you can use np.argmax(A, axis=1) to obtain the index
of the max for each row.
"""
m = X.shape[0]
num_labels = all_theta.shape[0]
# You need to return the following variables correctly
p = np.zeros(m)
# Add ones to the X data matrix
X = np.concatenate([np.ones((m, 1)), X], axis=1)
# ====================== YOUR CODE HERE ======================
# ============================================================
return p
def predict(Theta1, Theta2, X):
"""
Predict the label of an input given a trained neural network.
Parameters
----------
Theta1 : array_like
Weights for the first layer in the neural network.
It has shape (2nd hidden layer size x input size)
Theta2: array_like
Weights for the second layer in the neural network.
It has shape (output layer size x 2nd hidden layer size)
X : array_like
The image inputs having shape (number of examples x image dimensions).
Return
------
p : array_like
Predictions vector containing the predicted label for each example.
It has a length equal to the number of examples.
Instructions
------------
Complete the following code to make predictions using your learned neural
network. You should set p to a vector containing labels
between 0 to (num_labels-1).
Hint
----
This code can be done all vectorized using the numpy argmax function.
In particular, the argmax function returns the index of the max element,
for more information see '?np.argmax' or search online. If your examples
are in rows, then, you can use np.argmax(A, axis=1) to obtain the index
of the max for each row.
"""
# Make sure the input has two dimensions
if X.ndim == 1:
X = X[None] # promote to 2-dimensions
# useful variables
m = X.shape[0]
num_labels = Theta2.shape[0]
# You need to return the following variables correctly
p = np.zeros(X.shape[0])
# ====================== YOUR CODE HERE ======================
# =============================================================
return p
if __name__ == '__main__':
grader = MultiClassGrader()
grader[1] = lr_cost_function
grader[2] = one_vs_all
grader[3] = predict_one_vs_all
grader[4] = predict
grader.grade()