forked from Bradleydi/Risk-Models
-
Notifications
You must be signed in to change notification settings - Fork 0
/
risk_adj.py
136 lines (94 loc) · 3.64 KB
/
risk_adj.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import math
import numpy as np
import sklearn
import time
import scipy.stats as sps
import pandas as pd
import datetime
import pickle
import csv
from scipy import linalg
def Covariance_NW(data_cov,lambd,delay=2):
# return Newey-West estimate of covariance
# no modification for time period
# i.e. if the input data is daily, the output is daily
Tn = data_cov.shape[0]
Fn = data_cov.shape[1]
# exponentially decay weights
w = np.array([lambd**n for n in range(Tn)][::-1])
w = w/w.sum()
# wighted average of factors
f_mean_w = np.average(data_cov,axis=0,weights=w)
# variance: factors - wighted average of factors;
# Combining i_th and j_th to get the covariance
f_cov_raw = np.array([ data_cov[:,i] - f_mean_w[i] for i in range(Fn) ])
# Calculate the cov matrix
F_raw =
F_raw = np.zeros((Fn,Fn))
for i in range(Fn):
for j in range(Fn):
cov_ij = np.sum( f_cov_raw[i] * f_cov_raw[j] * w )
F_raw[i,j] = cov_ij
# NW modification for autocorrelation
for d in range(1,delay+1):
cov_nw_i = np.zeros((Fn,Fn))
for i in range(Fn):
for j in range(Fn):
cov_ij = np.sum( f_cov_raw[i][:-d] * f_cov_raw[j][d:] * w[d:] ) / np.sum(w[d:])
cov_nw_i[i,j] = cov_ij
F_NW += (1-d/(delay+1.)) * (cov_nw_i + cov_nw_i.T)
return F_NW
def NW_adjusted(data, tau=90,length=100, n_start=100, n_forward=21, NW=True):
'''
Input:
data The pandas dataframe of factor return with the last column being dates
tau Half life
length How many frames are used to calculate the covariance matrix
The length must be greater than Fn (the No. of factors)
n_start 'n_start-length' to 'n_start' frames are used to get the covariance
n_forward The period ahead to calculate bias B; standard deviation of r_ahead / risk_predicted
n_start-length n_start n_start+n_forward
|---------------------|-------------------------|
NW_cov Bias
Output:
F_NW
Other parameters:
Tn No. of frames
Fn No. of factors
'''
if n_start<length:
print('ERROR: n_start should be greater than length')
return
elif n_start+n_forward>=data.shape[0]:
print('ERROR: n_start + n_forward should be greater than No. of total frames')
return
data_cov = data.iloc[n_start-length:n_start,:].as_matrix()
lambd = 0.5**(1./tau)
# calculate Newey-West covariance
if NW:
F_NW = n_forward*Covariance_NW(data_cov,lambd)
else:
F_NW = n_forward*np.cov(data_cov.T)
# decomp of NW covariance
s, U = linalg.eigh(F_NW)
r = (data.iloc[n_start:n_start+n_forward,:]+1).cumprod().iloc[-1,:]-1
R_eigen = np.dot(U.T,r)
Var_eigen = s
if not np.allclose(F_NW, U @ np.diag(s) @ U.T ):
print('ERROR in eigh')
return
return data_cov, U, F_NW, R_eigen, np.sqrt(Var_eigen)
def Eigen_adjusted(F_NW,U,Std_i,length=252,N_mc=1000):
for i in range(N_mc):
if i%200==0:print(i)
r_mc = np.array([np.random.normal(0, std_, length) for std_ in Std_i])
r_mc = np.dot(U,r_mc)
F_mc = np.cov(r_mc)
s, U_mc = linalg.eigh(F_mc)
q = (U_mc.T@F_NW)@(U_mc)
if i==0:
stat = np.diagonal(q)/s
else:
stat+= np.diagonal(q)/s
stat = np.sqrt(stat/N_mc)
return stat