-
Notifications
You must be signed in to change notification settings - Fork 3
/
data_processing.py
111 lines (103 loc) · 3.55 KB
/
data_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
def extract_prices(filename):
Prices = dict()
with open(filename, 'r') as file:
for line in file:
l = line.split(';')
if l[0] == "Price":
if l[1] in Prices.keys():
T, P = Prices[l[1]]
T.append(int(l[6]))
P.append(int(l[4]))
else:
Prices[l[1]] = [int(l[6])], [int(l[4])]
return Prices
def extract_qties(filename):
T = []
Q = []
with open(filename, 'r') as file:
qty = 0
tick = 0
for line in file:
l = line.split(';')
if l[0] == "Tick":
tick += 1
T.append(tick)
Q.append(qty)
qty = 0
if l[0] == "Price" and l[5] != "None":
qty += int(l[5])
return T, Q
def extract_wealths(filename):
Wealths = dict()
with open(filename, 'r') as file:
for line in file:
l = line.split(';')
if l[0] == "AgentWealth":
if l[1] in Wealths.keys():
T, W = Wealths[l[1]]
T.append(int(l[3]))
W.append(int(l[2]))
else:
Wealths[l[1]] = [int(l[3])], [int(l[2])]
return Wealths
def extract_cash(filename):
Cash = dict()
with open(filename, 'r') as file:
for line in file:
l = line.split(';')
if l[0] == "Agent":
if l[1] in Cash.keys():
T, C = Cash[l[1]]
T.append(int(l[-1]))
C.append(int(l[2]))
else:
Cash[l[1]] = [int(l[-1])], [int(l[2])]
return Cash
def extract_limit_orders(filename):
LimitOrder = dict()
with open(filename, 'r') as file:
for line in file:
l = line.split(';')
if l[0] == "LimitOrder":
if l[2] in LimitOrder.keys():
T, L = LimitOrder[l[2]]
T.append(int(l[-1]))
L.append(l[3])
else:
LimitOrder[l[2]] = [int(l[-1])], [l[3]]
return LimitOrder
def draw_returns_hist(filename, asset, nb_pts, tau=1):
Prices = np.array(extract_prices(filename)[asset][1])
Returns = np.log(Prices[tau:])-np.log(Prices[:-tau])
Y, X, _ = plt.hist(Returns, nb_pts) # Y contient le nombre d'occurence et X les nb_pts+1 points séparant les différentes barres de l'histogramme
plt.clf() # On ne veut pas que le plt.hist soit affiché : il est moche
R = (X[1:]+X[:-1])/2 # R contient la liste des centres des abscisses des barres de l'histogramme
r = np.max(np.abs(R))
R2 = np.linspace(-r, r, nb_pts*2)
Y = np.array(Y)
D = Y*R.size/(max(R)-min(R))/np.sum(Y) # D contient la densité des rentabilités
mu = np.mean(Returns)
sigma = np.sqrt(np.mean((Returns-mu)**2))
N = scipy.stats.norm.pdf(R2, mu, sigma) # Loi normale de même espérance et écart-type que les rentabilités
X = ((R-mu)/sigma)**4
plt.semilogy(R, D, 'o', label='Returns for tau = %i. Kurtosis = %.2f' % (tau, 3+scipy.stats.kurtosis(Returns)))
plt.semilogy(R2, N, '--', label='Normal PDF')
plt.xlabel('Returns')
plt.ylabel('Density')
plt.legend(loc='best')
plt.title('Distribution of returns')
r = np.max(np.abs(R))*1.05
plt.axis([-r, r, 10**-3, max(D)*2])
plt.grid()
plt.show()
def smooth(lst, p):
lst_out = []
n = len(lst)
for i in range(n):
x_inf = max(0, i-p)
x_sup = min(n, i+p+1)
lst_out.append(sum(lst[x_inf:x_sup])/(x_sup - x_inf))
return lst_out