-
Notifications
You must be signed in to change notification settings - Fork 0
/
part11.py
160 lines (123 loc) · 4.97 KB
/
part11.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(12345)
np.set_printoptions(precision=2,suppress=True)
def lstsq_norm_e(A, b):
L = np.linalg.cholesky(np.dot(A.T, A))
R = L.T
A_tb = np.dot(A.T, b)
w = np.linalg.solve(R.T, A_tb)
return np.linalg.solve(R, w)
def lstsq_qr(A, b):
Q, R = np.linalg.qr(A, mode='reduced')
Qtb = np.dot(Q.T, b)
return np.linalg.solve(R, Qtb)
def lstsq_svd(A, b):
U,S,V = np.linalg.svd(A,full_matrices=False)
V = V.T
U_tb = np.dot(U.T,b)
S = np.diag(S)
w = np.linalg.solve(S, U_tb)
return np.dot(V,w)
def basis_regression(sales_data, test_data=None):
future_months, future_sales = p11_basis_regression_all_items(sales_data)
method_comparison = p11_compare_methods_all_items(future_sales)
gt_errors = None
if test_data is not None:
gt_errors = p11_gt_all_items(future_sales, test_data)
return future_months, future_sales, method_comparison, gt_errors
# 3 : Test against ground truth
def p11_gt_all_items(sales_estimated, test_data):
num_item = sales_estimated.shape[0]
gt_error = np.zeros([num_item, 3]) # which item, which method?
for i in range(num_item):
gt_error_one_item = p11_gt_one_item(sales_estimated[i, :], test_data[i, :])
print 'Item %d Error compared to Groud Truth' % i
print gt_error_one_item, '\n'
gt_error[i,:] = gt_error_one_item
return gt_error
def p11_gt_one_item(sales_estimated_for_item, test_data_for_item): # (3,6) matrix
e = np.linalg.norm(sales_estimated_for_item - test_data_for_item, axis=1)
return e
# 2: Compare Methods
def p11_compare_methods_all_items(sales_estimated):
num_item = sales_estimated.shape[0]
comparison = np.zeros([num_item, 3, 3]) # which item, which method, compared to which method?
for i in range(num_item):
comparison_one_item = p11_compare_methods_one_item(sales_estimated[i, :, :])
print 'Item %d Comparison of Methods' % i
print comparison_one_item, '\n'
comparison[i,:,:] = comparison_one_item
return comparison
def p11_compare_methods_one_item(sales_estimated_for_item): # (3,6) matrix
relative_error = np.zeros([3,3])
for i in range(3):
for j in range(3):
x = sales_estimated_for_item[i,:]
y = sales_estimated_for_item[j,:]
nom = np.linalg.norm(x - y)
denom = np.linalg.norm(y)
relative_error[i,j] = nom / denom
return relative_error
# 1: Estimate
def p11_basis_regression_all_items(sales_data):
num_item = sales_data.shape[0]
sales = np.zeros([num_item, 3, 6]) # which item, which method, which month?
months = np.zeros([num_item, 6])
for i in range(num_item):
months_current_item, sales_current_item = p11_remove_nans(sales_data[i, :])
future_months, future_sales = p11_basis_regression_one_item(months_current_item, sales_current_item)
print 'Item %d Predictions' % i
print future_sales, '\n'
sales[i] = future_sales
months[i] = future_months
return months, sales
def p11_basis_regression_one_item(months, sales):
A = p11_form_A(months)
b = sales
x_norm_e = lstsq_norm_e(A,b)
x_qr = lstsq_qr(A,b)
x_svd = lstsq_svd(A,b)
next_six_months = np.arange(months[-1]+1, months[-1]+7)
A_new = p11_form_A(next_six_months)
next_six_sales = np.zeros([3,6]) # 3 methods, 6 months estimated
next_six_sales[0,:] = np.dot(A_new, x_norm_e)
next_six_sales[1,:] = np.dot(A_new, x_qr)
next_six_sales[2,:] = np.dot(A_new, x_svd)
return next_six_months, next_six_sales
def p11_form_A(months):
N = months.shape[0]
A = np.zeros([N,8])
for i in range(N):
pi = np.pi
row = np.array([np.sin(pi * (1.0/12) * months[i]), np.cos(pi * (1.0/12) * months[i]),
np.sin(pi * (1.0/6) * months[i]), np.cos(pi * (1.0/6) * months[i]),
np.sin(pi * (1.0/2) * months[i]), np.cos(pi * (1.0/2) * months[i]),
months[i],
1])
A[i,:] = row
return A
def p11_remove_nans(row_vector):
months = np.nonzero(~np.isnan(row_vector))[0]
sales = row_vector[months]
return months, sales
def project_part1_1(train, test=None):
future_months, future_sales, comparison, gt_error = basis_regression(train, test)
plt.figure()
colors = ['r','k','g','b','m','y','c']
for i in range(train.shape[0]):
past_months, past_sales = p11_remove_nans(train[i, :])
plt.plot(past_months, past_sales, '--o', color=colors[i], lw=0.7, ms=6)
plt.plot(future_months[i,:], future_sales[i,0,:], '^', color=colors[i], ms=8)
plt.show()
errors = None
if test is not None:
errors = test - future_sales[:,0,:]
return future_months, future_sales, errors
R_true = np.genfromtxt('dataset12.csv', delimiter=',',dtype=float)
train = R_true.copy()
# train = R_true[:,:18].copy()
# test = R_true[:,18:].copy()
project_part1_1(train)