forked from mazefeng/ml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
softmax.py
executable file
·125 lines (85 loc) · 3.07 KB
/
softmax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# coding=utf-8
import sys
import numpy as np
import math
import random
from common import read_dense_data
from common import sigmoid
from common import align
random.seed(1024 * 1024)
from cg import CG
from gd import SGDOption
from gd import SGD
class SoftmaxRegression:
def __init__(self):
self.w = None
self.c = 0
def train(self, X, Y, lamb = 1.0):
O = len(set([v[0] for v in Y.tolist()]))
m, n = X.shape
w = np.matrix(0.005 * np.random.random([O, n])).reshape(-1, 1)
opt = SGDOption()
opt.max_iter = 50
opt.mini_batch_size = 5000
# w_opt = SGD(self.cost, w, X, Y, opt, lamb = lamb, O = O)
w_opt = CG(self.cost, w, 80, X = X, Y = Y, lamb = lamb, O = O)
self.w = w_opt.reshape(O, n)
print >> sys.stderr, 'c = ', self.c
def cost(self, w, X, Y, lamb, O):
m, n = X.shape
w = w.reshape(O, n)
I = Y.T
Y = np.matrix(np.zeros((O, m)))
Y[(I, np.matrix(range(m)))] = 1
P = np.exp(w * X.T)
P = P / P.sum(0)
L = - (1.0 / m) * np.multiply(Y, np.log(P)).sum()
R = (lamb / 2.0) * np.square(w).sum()
J = L + R
grad = - (1.0 / m) * (Y - P) * X + lamb * w
grad = grad.reshape(-1, 1)
self.c += 1
return J, grad
def predict(self, X):
P = np.exp(self.w * X.T)
P = P / P.sum(0)
return np.argmax(P, 0).T
def test(self, X, Y):
Y_pred = self.predict(X)
P = np.matrix(np.zeros(Y.shape))
P[np.where(Y_pred == Y)] = 1
acc = 1.0 * P.sum() / len(Y)
print >> sys.stderr, 'Accuracy %lf%% (%d/%d)' % (100.0 * acc, P.sum(), len(Y))
return 1.0 * P.sum() / len(Y)
if __name__ == '__main__':
train_path = 'data/mnist.train'
test_path = 'data/mnist.test'
X_train, Y_train = read_dense_data(open(train_path))
print >> sys.stderr, 'read training data done.'
X_train = np.matrix(X_train)
Y_train = [int(y) for y in Y_train]
Y_train = np.matrix(Y_train).T
print >> sys.stderr, 'create training matrix done.'
X_test, Y_test = read_dense_data(open(test_path))
print >> sys.stderr, 'read test data done'
X_test = np.matrix(X_test)
Y_test = [int(y) for y in Y_test]
Y_test = np.matrix(Y_test).T
print >> sys.stderr, 'create test matrix done.'
X_train, X_test = align(X_train, X_test)
'''
X_all = np.row_stack([X_train, X_test])
print X_all.shape
mean = X_all.mean(0)
std = X_all.std(0)
del X_all
X_train = 1.0 * (X_train - mean) / (std + 0.0001)
X_test = 1.0 * (X_test - mean) / (std + 0.0001)
'''
clf = SoftmaxRegression()
clf.train(X_train, Y_train)
# clf.train(X_test, Y_test)
acc_train = clf.test(X_train, Y_train)
acc_test = clf.test(X_test, Y_test)
print >> sys.stderr, 'Training accuracy for Softmax Regression : %lf%%' % (100.0 * acc_train)
print >> sys.stderr, 'Test accuracy for Softmax Regression : %lf%%' % (100.0 * acc_test)