-
Notifications
You must be signed in to change notification settings - Fork 0
/
SGDOptimizer.py
106 lines (93 loc) · 3.6 KB
/
SGDOptimizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
'''
Created on Aug 22, 2013
@author: tiago
'''
import numpy as np
import theano
import theano.tensor as T
import Network as net
import Integrate as ig
class SGDOptimizer(object):
'''
Fit a network with minibatch stochastic gradient descent
(batch size must be set to one for now due to network)
'''
def __init__(self, n, integ, inSamples, outSamples, c0, learning_rate = 0.1, L1_reg = 0, L2_reg = 0, batch_size = 20):
'''
Creates the cost function and gradient expressions, compiles the model
Keyword arguments:
inSamples -- the control variables to be fed in the integration loop (n_steps, n_samples, n_input_dims)
outSamples -- the desired output (n_unit_steps, n_samples, n_output_dims)
c0 -- initial state of the system (n_samples, n_output_dims)
learning_rate -- multiplicative constant for the gradient
l1_reg -- L1 regularization penalty
l2_reg -- L2 regularization penalty
batch_size -- number of samples to be evaluated simultaneously
'''
index = T.lscalar()
inputSequence = T.ftensor3("is")
outputSequence = T.ftensor3("os")
initialState = T.fmatrix("c_i")
integ.buildModel(inputSequence, outputSequence, initialState)
cost = integ.mean + L1_reg * n.L1 + L2_reg * n.L2_sqr
gparams = []
for param in n.params:
gparam = T.grad(cost, param)
gparams.append(gparam)
updates = []
for param, gparam in zip(n.params, gparams):
updates.append((param, param - learning_rate * gparam))
print "Compiling model..."
self.model = theano.function(inputs=[index],
outputs=cost,
updates=updates + integ.updates,
givens={
inputSequence: inSamples[:,index * batch_size:(index + 1) * batch_size,:],
outputSequence: outSamples[:,index * batch_size:(index + 1) * batch_size,:],
initialState: c0[index * batch_size:(index + 1) * batch_size]})
self.eval = theano.function(inputs=[index],
outputs=integ.cUnits,
givens={
inputSequence: inSamples[:,index * batch_size:(index + 1) * batch_size,:],
outputSequence: outSamples[:,index * batch_size:(index + 1) * batch_size,:],
initialState: c0[index * batch_size:(index + 1) * batch_size]})
def simpleTest():
rng = np.random.RandomState(1234)
iSeq = np.array(rng.rand(300,5), dtype='float32')
oSeq = np.array([[1,1],[0.2,0.3],[0.4,0.4]], dtype='float32')
c0 = np.array([0.1,0.1], dtype='float32')
iSamples = theano.shared(np.rollaxis(np.tile(iSeq, (50,1,1)),0,2))
oSamples = theano.shared(np.rollaxis(np.tile(oSeq, (50,1,1)),0,2))
ci = theano.shared(np.tile(c0, (50,1)))
print iSamples.shape.eval(), oSamples.shape.eval(), ci.shape.eval()
n = net.Network(rng, [8,2], 5)
o = ig.ODESolver(n)
integ = ig.Integrate(o.eulerStep)
bs = 1
sgd = SGDOptimizer(n, integ, iSamples, oSamples, ci, batch_size=bs)
print "Dry run!"
for _ in range(0,50):
for index in range(0,50/bs):
score = sgd.model(index)
print score
def combinedTest():
rng = np.random.RandomState(1234)
iSeq = np.array(rng.rand(300,3), dtype='float32')
oSeq = np.array([[1,1],[0.2,0.3],[0.4,0.4]], dtype='float32')
c0 = np.array([0.1,0.1], dtype='float32')
iSamples = theano.shared(np.rollaxis(np.tile(iSeq, (50,1,1)),0,2))
oSamples = theano.shared(np.rollaxis(np.tile(oSeq, (50,1,1)),0,2))
ci = theano.shared(np.tile(c0, (50,1)))
print iSamples.shape.eval(), oSamples.shape.eval(), ci.shape.eval()
n = net.Network(rng, [8,2], 5)
o = ig.ODESolver(n)
integ = ig.Integrate(o.combinedEulerStep)
bs = 1
sgd = SGDOptimizer(n, integ, iSamples, oSamples, ci, batch_size=bs)
print "Dry run!"
for _ in range(0,50):
for index in range(0,50/bs):
score = sgd.model(index)
print score
if __name__ == '__main__':
combinedTest()