-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathAUDCAD_unequal_mod.py
184 lines (150 loc) · 7.78 KB
/
AUDCAD_unequal_mod.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# Example 5.1: Pair Trading USD.AUD vs USD.CAD Using the Johansen Eigenvector
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import statsmodels.formula.api as sm
# import statsmodels.tsa.stattools as ts
import statsmodels.tsa.vector_ar.vecm as vm
from model import LSTM_train, prediction
from seq_norm import seq_and_norm
from evaluation import position_returns, eval_metrics
from check_mean_reversion import *
# change matplotlib.pyplot theme
plt.style.use('ggplot')
def generate_plot(xaxis, trace, trace_label, title, ylabel, xlabel):
plt.figure()
for idx in range(len(trace)):
plt.plot(trace[idx], label=trace_label[idx])
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.xticks(rotation=45)
plt.legend()
plt.show()
# If you trade currencies both must end in the same currency, here USD
df1 = pd.read_csv('CADUSD=X.csv')
df1['Date'] = pd.to_datetime(df1['Date'], infer_datetime_format=True)
# df1 = df1[(df1['Date']>'2009-08-20')&(df1['Date']<'2014-11-10')]
df1.rename(columns={'Close': 'CAD'}, inplace=True)
df1.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)
df2 = pd.read_csv('AUDUSD=X.csv')
df2['Date'] = pd.to_datetime(df2['Date'], infer_datetime_format=True)
# df2 = df2[(df2['Date']>'2009-08-20')&(df2['Date']<'2014-11-10')]
df2.rename(columns={'Close': 'AUD'}, inplace=True)
df2.drop(['Open', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)
df = pd.merge(df1, df2, how='inner', on='Date')
df.set_index('Date', inplace=True)
df.dropna(inplace=True)
df1 = pd.read_csv('CADUSD=X.csv')
df1['Date'] = pd.to_datetime(df1['Date'], infer_datetime_format=True)
# df1 = df1[(df1['Date']>'2009-08-20')&(df1['Date']<'2014-11-10')]
df1.rename(columns={'Open': 'CAD'}, inplace=True)
df1.drop(['Close', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)
df2 = pd.read_csv('AUDUSD=X.csv')
df2['Date'] = pd.to_datetime(df2['Date'], infer_datetime_format=True)
# df2 = df2[(df2['Date']>'2009-08-20')&(df2['Date']<'2014-11-10')]
df2.rename(columns={'Open': 'AUD'}, inplace=True)
df2.drop(['Close', 'High', 'Low', 'Adj Close', 'Volume'], axis=1, inplace=True)
df_opens = pd.merge(df1, df2, how='inner', on='Date')
df_opens.set_index('Date', inplace=True)
df_opens.dropna(inplace=True)
print(df_opens.shape)
print(df.shape)
# check mean reversion
plot_scatter(df)
adf_test1(df)
generate_plot(df_opens.index, [df.loc[:, "AUD"], df.loc[:, "CAD"]], ["AUD-USD", "CAD-USD"],
"Conversion Rate (Closing Price)", "Price", "Time Period")
generate_plot(df_opens.index, [df_opens.loc[:, "AUD"], df_opens.loc[:, "CAD"]], ["AUD-USD", "CAD-USD"],
"Conversion Rate (Opening Price)", "Price", "Time Period")
trainlen = 250 # cointegration metric is calculated for train length 250
lookback = 60
print("Lookback", lookback)
print(df.head(5))
hedgeRatio = np.full(df.shape, np.NaN)
numUnits = np.full(df.shape[0], np.NaN)
target = pd.DataFrame()
for t in range(trainlen + 1, df.shape[0]):
# Johansen test
'''coint_johansen is a cointegration test to find possible
correlation between time series in the long term'''
result = vm.coint_johansen(df.values[(t - trainlen - 1):t - 1], det_order=0, k_ar_diff=1)
hedgeRatio[t, :] = result.evec[:, 0]
# hedgeRatio[t,:]=result.evec[:,0]/result.evec[:, 0][0] #almost the same
# Dot multiply (=multiply the 2 hedge ratios by the respective 2 closes and add up) to obtain the net market value of the portfolio (single column)
yport = pd.DataFrame(np.dot(df.values[(t - lookback):t], result.evec[:, 0]), columns=['signal'])
ma = yport.mean()
mstd = yport.std()
# calculate the number of units to invest in the portfolio (single column)
# note the negative sign, mean reversion bet
numUnits[t] = -(
yport.iloc[-1, :] - ma) / mstd # numUnits are number of units of unit portfolio of AUDUSD and CADUSD
yport.index = df[(t - lookback):t].index
# saving the latestyport value in target dataframe
target = target.append(yport.iloc[-1, :])
# prepare data for LSTM model
# inputs to LSTM Model -- AUD/USD rate, CAD/USD rate, signal
# target for the model is the next day's signal
# merge targets with closing price
data_to_model = pd.merge(target, df, left_on=target.index, right_on=df.index, how="inner")
index = data_to_model.index
data_to_model.reset_index(inplace=True)
data_to_model = data_to_model[['signal', 'CAD', 'AUD']]
# split the data into train and test (train-split 75%)
train_size = int(len(data_to_model) * 0.80)
train = data_to_model.iloc[:train_size, :]
index_train = index[:train_size]
test = data_to_model.iloc[train_size:, :]
index_test = index[train_size:]
batch_size = 16
print("Batch Size", batch_size)
# converting the data into sequences of batch_size
# Each sequence/window is normalised
train_x, train_y, norm_y_train = seq_and_norm(train, batch_size)
test_x, test_y, norm_y_test = seq_and_norm(test, batch_size)
train_loss, val_loss, model = LSTM_train(train_x, train_y, batch_size)
plt.plot(train_loss, label="Training Loss")
plt.plot(val_loss, label='Validation Loss')
plt.legend()
plt.show()
# predictions on train and test + inverse transformation
train_pred = prediction(train_x, model, norm_y_train)
test_pred = prediction(test_x, model, norm_y_test)
test_rmspe = eval_metrics(norm_y_test[:, 0], test_pred)
train_rmspe = eval_metrics(norm_y_train[:, 0], train_pred)
print("Test Set RMSPE: {rmspe}%".format(rmspe=test_rmspe))
print("Train Set RMSPE: {rmspe}%".format(rmspe=train_rmspe))
# plot trading signal prediction and actual
generate_plot(index_train, [train_pred, norm_y_train[:, 0]], ["Predicted Train", "Actual Train"],
"Trading Signal RMSPE {}%".format(train_rmspe), "Trading Signal", "Time Period")
generate_plot(index_train, [test_pred, norm_y_test[:, 0]], ["Predicted Test", "Actual Test"],
"Trading Signal RMSPE {}%".format(test_rmspe), "Trading Signal", "Time Period")
# postions, returns and evaluations
# Train Set
start = batch_size # 270
end = train_size
print("+++++++++Results for Prediction on Train Set+++++++++")
positions_train, pnl_train, returns_train = position_returns(train_pred, hedgeRatio, df, df_opens, start, end)
print("+++++++++Results on Actual Train Set+++++++++")
positions_train_ac, pnl_train_ac, returns_train_ac = position_returns(np.array(train.loc[batch_size:, 'signal']),
hedgeRatio, df, df_opens, start, end)
generate_plot(index_train, [positions_train, positions_train_ac], ["Predicted", "Actual"], "Positions on Train Set",
"Positions", "Time Period")
generate_plot(index_train, [pnl_train, pnl_train_ac], ["Predicted", "Actual"], "Profit/Loss on Train Set", "Proft/Loss",
"Time Period")
generate_plot(index_train, [returns_train, returns_train_ac], ['Predicted', "Actual"], "Return on Train Set",
"Cumulative Returns", "Time Period")
# test set
start = df.shape[0] - test_pred.shape[0] # 270
end = df.shape[0]
print("+++++++++Results for Prediction on Test Set+++++++++")
positions_test, pnl_test, returns_test = position_returns(test_pred, hedgeRatio, df, df_opens, start, end)
print("+++++++++Results on Actual Test Set+++++++++")
positions_test_ac, pnl_test_ac, returns_test_ac = position_returns(
np.array(test.loc[train_size + batch_size:, 'signal']), hedgeRatio, df, df_opens, start, end)
generate_plot(index_test, [positions_test, positions_test_ac], ["Predicted", "Actual"], "Positions on Test Set",
"Positions", "Time Period")
generate_plot(index_test, [pnl_test, pnl_test_ac], ["Predicted", "Actual"], "Proft/Loss on Test Set", "Proft/Loss",
"Time Period")
generate_plot(index_test, [returns_test, returns_test_ac], ["Predicted", "Actual"], "Cumulative Returns on Test Set",
"Cumulative Returns", "Time Period")