linear_regression.ipynb.url

[InternetShortcut]
URL=https://colab.research.google.com/drive/1lMIopiyFAQ4zBcyVJP7kxPuVK5pGIU1g#

def mean(values):
  return sum(values)/float(len(values))

def var(values,mean):
  return sum([(x-mean)**2 for x in values])

dataset = [[1,1],[2,3],[4,3],[3,2],[5,5]]
x = [row[0] for row in dataset]
y = [row[1] for row in dataset]

mean_x, mean_y = mean(x), mean(y)
var_x, var_y = var(x,mean_x), var(y,mean_y)
print('x stats: means=%.3f variance=%.3f'% (mean_x, var_x))
print('x stats: means=%.3f variance=%.3f'% (mean_y, var_y))


def covariance(x,mean_x,y,mean_y):
  covar = 0.0
  for i in range(len(x)):
    covar += (x[i]-mean_x)*(y[i]-mean_y)
  return covar

covar = covariance(x,mean_x,y,mean_y)
print('Covariance: %.3f'%(covar))

def coeff(dataset):
  x = [row[0] for row in dataset]
  y = [row[1] for row in dataset]

  x_mean, y_mean = mean(x), mean(y)
  b1 = covariance(x,x_mean,y,y_mean)/variance(x,x_mean)
  b0 = y_mean - b1*x_mean
  return [b0,b1]

b0,b1 = coeff(dataset)
print('Coefficients: B0=%.3f, B1=%.3f'%(b0,b1))

def linear_reg(train, test):
  predict = list()
  b0,b1 = coeff(train)
  for row in test:
    yhat = b0 + b1*row[0]
    predict.append(yhat)
  return predict

from math import sqrt

def rmse_metric(actual, predicted):
  sum_err = 0.0
  for i in range(len(actual)):
    pred_err = predicted[i] - actual[i]
    sum_err += (pred_err ** 2)
  mean_err = sum_err / float(len(actual))
  return sqrt(mean_err)

def eval_algo(dataset, algo):
  test_set = list()
  for row in dataset:
    row_copy = list(row)
    row_copy[-1] = None
    test_set.append(row_copy)
  predicted = algo(dataset, test_set)

  actual = [row[-1] for row in dataset]
  rmse_val = rmse_metric(actual, predicted)
  return rmse_val


rmse = eval_algo(dataset, linear_reg)
print('RMSE: ',(rmse))

from numpy import cov
from scipy.stats import spearmanr

dataset = [[1,1.99],[2,1.99],[4,3.59],[3,2.8],[5,4.39]]
x = [row[0] for row in dataset]
y = [row[1] for row in dataset]
corr, _ = spearmanr(x,y)
print('Spearmans correlation: %.3f'% corr)