-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcorrelation_plots.py
116 lines (92 loc) · 3.53 KB
/
correlation_plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
import pandas as pd
import matplotlib.pyplot as plt
def calculate_correlation_stats(x,y):
df = pd.DataFrame({'x':x,'y':y})
# coefficient of correlation
pearsons_r = df.corr()['x'][1]
# other stats building blocks
df['error'] = df['y'] - df['x']
df['error_sq'] = df['error'] ** 2
df['abs_error'] = abs(df['error'])
df['abs_percent_error'] = 100 * abs(df['error'] / df['x'])
# error and bias stats
RMSE = df['error_sq'].mean() ** 0.5
MAE = df['abs_error'].mean()
DAE = df['abs_error'].median()
MAPE = df['abs_percent_error'].mean()
DAPE = df['abs_percent_error'].median()
mean_bias = df['error'].mean()
median_bias = df['error'].median()
# create text block
txt = ('Pearson\'s r: ' + str(round(pearsons_r,3)) + '\n' +
'RMSE: ' + str(round(RMSE,3)) + '\n\n' +
'MAE: ' + str(round(MAE,3)) + '\n' +
'MAPE: ' + str(round(MAPE,3)) + '\n' +
'mean bias: ' + str(round(mean_bias,3)) + '\n\n' +
'DAE: ' + str(round(DAE,3)) + '\n' +
'DAPE: ' + str(round(DAPE,3)) + '\n' +
'median bias: ' + str(round(median_bias,3))
)
return txt
def correlation_scatter(x,y,
group=pd.Series(),
marker_size = 8,
xlab='please add x-axis label',
ylab='please add y-axis label',
title='',
stats=False):
# plot groups as different colors if there are between 2 and 20 groups
use_groups = False
num_grps = len(group.unique())
if (num_grps > 1 & num_grps < 21):
use_groups = True
if use_groups:
grps = sorted(group.unique())
cmap=plt.get_cmap("tab20")
colmap = {}
for col,grp in zip(cmap.colors,grps):
colmap[grp] = col
df = pd.DataFrame({'x':x,'y':y,'group':group})
for grp in grps:
subdf = df[df['group']==grp]
plt.scatter(subdf['x'],subdf['y'],
s=marker_size,color=colmap[grp],label=grp)
# don't use groups; just plot them the same color without a legend
else:
plt.scatter(x,y,s=marker_size)
group = False
# figure out the axis limits
smallest = min(min(x),min(y))
largest = max(max(x),max(y))
margin = (largest-smallest)*0.05
low = smallest-margin
high = largest+margin
# one-to-one line
plt.plot([low,high],[low,high], color='k', linestyle='-', linewidth=1)
# axes config and title
plt.xlim(low,high)
plt.ylim(low,high)
plt.xlabel(xlab)
plt.ylabel(ylab)
plt.title(title)
# legend outside
if use_groups:
plt.legend(loc='upper left',bbox_to_anchor=(1.6, 1))
#plt.legend(loc='upper left',bbox_to_anchor=(2.6, 1))
#plt.legend(loc='lower right',bbox_to_anchor=(1, 1))
# statistics
if stats:
plt.figtext(0.8,0.85,
calculate_correlation_stats(x,y),
va='top')
# make it a square plot
plt.gca().set_aspect('equal', adjustable='box')
plt.show()
def correlation_scatter_with_stats(x,y,
group=pd.Series(),
marker_size = 8,
xlab='please add x-axis label',
ylab='please add y-axis label',
title=''):
correlation_scatter(x,y,group,marker_size,xlab,ylab,title,True)