-
Notifications
You must be signed in to change notification settings - Fork 0
/
linear_regression.ipynb.url
82 lines (63 loc) · 2.07 KB
/
linear_regression.ipynb.url
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
[InternetShortcut]
URL=https://colab.research.google.com/drive/1lMIopiyFAQ4zBcyVJP7kxPuVK5pGIU1g#
def mean(values):
return sum(values)/float(len(values))
def var(values,mean):
return sum([(x-mean)**2 for x in values])
dataset = [[1,1],[2,3],[4,3],[3,2],[5,5]]
x = [row[0] for row in dataset]
y = [row[1] for row in dataset]
mean_x, mean_y = mean(x), mean(y)
var_x, var_y = var(x,mean_x), var(y,mean_y)
print('x stats: means=%.3f variance=%.3f'% (mean_x, var_x))
print('x stats: means=%.3f variance=%.3f'% (mean_y, var_y))
def covariance(x,mean_x,y,mean_y):
covar = 0.0
for i in range(len(x)):
covar += (x[i]-mean_x)*(y[i]-mean_y)
return covar
covar = covariance(x,mean_x,y,mean_y)
print('Covariance: %.3f'%(covar))
def coeff(dataset):
x = [row[0] for row in dataset]
y = [row[1] for row in dataset]
x_mean, y_mean = mean(x), mean(y)
b1 = covariance(x,x_mean,y,y_mean)/variance(x,x_mean)
b0 = y_mean - b1*x_mean
return [b0,b1]
b0,b1 = coeff(dataset)
print('Coefficients: B0=%.3f, B1=%.3f'%(b0,b1))
def linear_reg(train, test):
predict = list()
b0,b1 = coeff(train)
for row in test:
yhat = b0 + b1*row[0]
predict.append(yhat)
return predict
from math import sqrt
def rmse_metric(actual, predicted):
sum_err = 0.0
for i in range(len(actual)):
pred_err = predicted[i] - actual[i]
sum_err += (pred_err ** 2)
mean_err = sum_err / float(len(actual))
return sqrt(mean_err)
def eval_algo(dataset, algo):
test_set = list()
for row in dataset:
row_copy = list(row)
row_copy[-1] = None
test_set.append(row_copy)
predicted = algo(dataset, test_set)
actual = [row[-1] for row in dataset]
rmse_val = rmse_metric(actual, predicted)
return rmse_val
rmse = eval_algo(dataset, linear_reg)
print('RMSE: ',(rmse))
from numpy import cov
from scipy.stats import spearmanr
dataset = [[1,1.99],[2,1.99],[4,3.59],[3,2.8],[5,4.39]]
x = [row[0] for row in dataset]
y = [row[1] for row in dataset]
corr, _ = spearmanr(x,y)
print('Spearmans correlation: %.3f'% corr)