-
Notifications
You must be signed in to change notification settings - Fork 8
/
sgd_linear_regression_multivariable.py
138 lines (100 loc) · 3.34 KB
/
sgd_linear_regression_multivariable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from numpy import loadtxt, zeros, ones, array, linspace, logspace, mean, std, shape
from pylab import scatter, show, title, xlabel, ylabel, plot, contour
def feature_normalize(X):
'''
Returns a normalized version of X where
the mean value of each feature is 0 and the standard deviation
is 1. This is often a good preprocessing step to do when
working with learning algorithms.
'''
mean_r = []
std_r = []
X_norm = X
n_c = X.shape[1]
for i in range(n_c):
m = mean(X[:, i])
s = std(X[:, i])
mean_r.append(m)
std_r.append(s)
X_norm[:, i] = (X_norm[:, i] - m) / s
return X_norm, mean_r, std_r
def compute_cost(X, y, theta):
'''
Comput cost for linear regression
'''
#Number of training samples
m = y.size
predictions = X.dot(theta)
sqErrors = (predictions - y)
J = (1.0 / (2 * m)) * sqErrors.T.dot(sqErrors)
return J
def gradient_descent(X, y, theta, alpha, num_iters):
'''
Performs gradient descent to learn theta
by taking num_items gradient steps with learning
rate alpha
'''
m = y.size
for i in range(num_iters):
predictions = X.dot(theta)
theta_size = theta.size
for it in range(theta_size):
temp = X[:, it]
temp.shape = (m, 1)
errors_x1 = (predictions - y) * temp
theta[it][0] = theta[it][0] - alpha * (1.0 / m) * errors_x1.sum()
return theta
#Load the dataset
data = loadtxt('ex1data1.txt', delimiter=',')
#Plot the data
scatter(data[:, 0], data[:, 1], marker='o', c='b')
title('Profits distribution')
xlabel('Population of City in 10,000s')
ylabel('Profit in $10,000s')
#show()
X = data[:, 0]
y = data[:, 1]
#number of training samples
m = y.size
#Add a column of ones to X (interception data)
it = ones(shape=(m, 2))
it[:, 1] = X
#Initialize theta parameters
theta = zeros(shape=(2, 1))
#Some gradient descent settings
iterations = 1500
alpha = 0.01
#compute and display initial cost
cost=compute_cost(it, y, theta)
print(cost)
theta = gradient_descent(it, y, theta, alpha, iterations)
print(theta)
#Predict values for population sizes of 35,000 and 70,000
predict1 = array([1, 3.5]).dot(theta).flatten()
print('For population = 35,000, we predict a profit of %f' % (predict1 * 10000))
predict2 = array([1, 7.0]).dot(theta).flatten()
print('For population = 70,000, we predict a profit of %f' % (predict2 * 10000))
#Plot the results
result = it.dot(theta).flatten()
plot(data[:, 0], result)
show()
#Grid over which we will calculate J
theta0_vals = linspace(-10, 10, 100)
theta1_vals = linspace(-1, 4, 100)
#initialize J_vals to a matrix of 0's
J_vals = zeros(shape=(theta0_vals.size, theta1_vals.size))
#Fill out J_vals
for t1, element in enumerate(theta0_vals):
for t2, element2 in enumerate(theta1_vals):
thetaT = zeros(shape=(2, 1))
thetaT[0][0] = element
thetaT[1][0] = element2
J_vals[t1, t2] = compute_cost(it, y, thetaT)
#Contour plot
J_vals = J_vals.T
#Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100
contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20))
xlabel('theta_0')
ylabel('theta_1')
scatter(theta[0][0], theta[1][0])
show()