-
Notifications
You must be signed in to change notification settings - Fork 333
/
softmax.py
77 lines (59 loc) · 2.78 KB
/
softmax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import scipy.sparse
import scipy.optimize
def softmax_cost(theta, num_classes, input_size, lambda_, data, labels):
"""
:param theta:
:param num_classes: the number of classes
:param input_size: the size N of input vector
:param lambda_: weight decay parameter
:param data: the N x M input matrix, where each column corresponds
a single test set
:param labels: an M x 1 matrix containing the labels for the input data
"""
m = data.shape[1]
theta = theta.reshape(num_classes, input_size)
theta_data = theta.dot(data)
theta_data = theta_data - np.max(theta_data)
prob_data = np.exp(theta_data) / np.sum(np.exp(theta_data), axis=0)
indicator = scipy.sparse.csr_matrix((np.ones(m), (labels, np.array(range(m)))))
indicator = np.array(indicator.todense())
cost = (-1 / m) * np.sum(indicator * np.log(prob_data)) + (lambda_ / 2) * np.sum(theta * theta)
grad = (-1 / m) * (indicator - prob_data).dot(data.transpose()) + lambda_ * theta
return cost, grad.flatten()
def softmax_predict(model, data):
# model - model trained using softmaxTrain
# data - the N x M input matrix, where each column data(:, i) corresponds to
# a single test set
#
# Your code should produce the prediction matrix
# pred, where pred(i) is argmax_c P(y(c) | x(i)).
opt_theta, input_size, num_classes = model
opt_theta = opt_theta.reshape(num_classes, input_size)
prod = opt_theta.dot(data)
pred = np.exp(prod) / np.sum(np.exp(prod), axis=0)
pred = pred.argmax(axis=0)
return pred
def softmax_train(input_size, num_classes, lambda_, data, labels, options={'maxiter': 400, 'disp': True}):
#softmaxTrain Train a softmax model with the given parameters on the given
# data. Returns softmaxOptTheta, a vector containing the trained parameters
# for the model.
#
# input_size: the size of an input vector x^(i)
# num_classes: the number of classes
# lambda_: weight decay parameter
# input_data: an N by M matrix containing the input data, such that
# inputData(:, c) is the cth input
# labels: M by 1 matrix containing the class labels for the
# corresponding inputs. labels(c) is the class label for
# the cth input
# options (optional): options
# options.maxIter: number of iterations to train for
# Initialize theta randomly
theta = 0.005 * np.random.randn(num_classes * input_size)
J = lambda x: softmax_cost(x, num_classes, input_size, lambda_, data, labels)
result = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options)
print result
# Return optimum theta, input size & num classes
opt_theta = result.x
return opt_theta, input_size, num_classes