-
Notifications
You must be signed in to change notification settings - Fork 0
/
sgd.py
156 lines (137 loc) · 4.87 KB
/
sgd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Create by Etzion Harari
# https://github.com/EtzionR
# Load libraries:
import matplotlib.pyplot as plt
import numpy as np
# Define useful functions:
linear_gradient = lambda error, ary: -2 * np.mean(ary * error)
rmse = lambda errors: np.sqrt(np.mean(np.power(errors, 2)))
# Define Adam Optimizer:
class ADAM:
"""
Adaptive Moment Optimization
"""
def __init__(self,lr,b1=.9,b2=.999,eps=1e-9):
"""
object initilize
:param eta: step size (eta)
:param b1: beta 1 (default: .9)
:param b2: beta 2 (default: .999)
:param eps: epsilon (default: 1e-9)
"""
self.t = 1
self.m = 0
self.v = 0
self.b1 = b1
self.b2 = b2
self.lr = lr
self.eps = eps
def optimize(self,grd):
"""
optimize learning rate and required weight change
:param grd: gradient
:return: required change in the weight
"""
self.m = (self.b1*self.m) + ((1-self.b1)*grd)
self.v = (self.b2*self.v) + ((1-self.b2)*grd*grd)
m = self.m/(1-(self.b1**self.t))
v = self.v/(1-(self.b2**self.t))
self.t+=1
return (self.lr*m)/((v**.5)+self.eps)
# Define Linear SGD Object:
class LinearSGD:
"""
Linear SGD Regreesor Calculator
Bulid to use sgd to predict the betas vector,
using stochastic gradient descent method
"""
def __init__(self, lr=1e-2, iters=100, sample_rate=0.5,adam=True):
"""
initilize the Linear SGD object
:param lr: float, the requied learning rate (default: .5)
:param iters: int, the required number of algorithm iterations (default: 100)
:param sample_rate: float, the % of samples for gradient calculation
in each iteration (default: .5)
"""
# model variables
self.iters = iters
self.adam = adam
self.loss = []
self.sr = sample_rate
self.lr = lr
# fitting variables
self.adams = None
self.betas = None
self.n = None
self.m = None
def create_matrix(self, x):
"""
create x matrix from given input (vector/matrix)
:param x: input features data
:return: x matrix
"""
x = np.array(x)
if len(x.shape) == 1:
return np.array([np.ones(x.shape[0]), x]).T
else:
return np.array([np.ones(x.shape[0])] + [xi for xi in x.T]).T
def update_weights(self, subset_x, errors):
"""
update the betas values by using the sgd
:param subset_x: subset of x matrix
:param errors: vector of error between prediction and y values
"""
self.loss.append(rmse(errors))
for j in range(self.m):
if self.adam:
self.betas[j] -= self.adams[j].optimize(linear_gradient(errors,subset_x[:,j]))
else:
self.betas[j] -= self.lr*linear_gradient(errors,subset_x[:, j])
def fit(self, x, y):
"""
fitting the betas values using sgd
:param x: input x values
:param y: input y values
"""
# prepere the data for fitting
x = self.create_matrix(x)
y = np.array(y)
self.n = x.shape[0]
self.m = x.shape[1]
self.betas = np.random.uniform(0, 1, self.m)
# if we want use adam optimizer, we should inilize the ADAM objects
if self.adam:
self.adams = [ADAM(self.lr) for _ in range(self.m)]
# fitting the betas to the data
for i in range(self.iters):
# select the subset of x & y and calculate the errors
selection = np.random.random(self.n) < self.sr
subset_x = x[selection, :]
errors = y[selection] - np.dot(subset_x, self.betas)
# update the betas using the loss
self.update_weights(subset_x, errors)
return self
def predict(self, x):
"""
predict the y value using the calculated betas
:param x: input x features
:return: y - output prediction
"""
return np.dot(self.create_matrix(x), self.betas)
def plot_loss(self, size=10):
"""
plot the loss by iterations
:param size: plot size
"""
plt.figure(figsize=(size, size * .6))
plt.title(f'Prediction Loss by number of Iterations:', fontsize=15)
plt.plot(np.arange(self.iters), self.loss, color='r', label='loss')
plt.xlabel('Iterations', fontsize=14)
plt.ylabel('RMSE', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.legend(fontsize=14)
plt.ylim(0, self.loss[0] * 1.05)
plt.show()
# License
# MIT © Etzion Harari