-
Notifications
You must be signed in to change notification settings - Fork 2
/
modelselection_BIC.py
91 lines (73 loc) · 2.44 KB
/
modelselection_BIC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from __future__ import division
import numpy as np
from modelselection import *
def get_pis(x, gap):
x_normalized = x / np.sum(x)
pi = np.mean(x_normalized[:gap])
_pi = np.mean(x_normalized[gap:])
assert np.abs(pi * gap + _pi * (x.size - gap) - 1) < 1e-10, 'sum of probs must be 1'
return (pi, _pi)
def loglikelihood_step(x, gap, pis=None, summed=True):
assert gap >= 1, 'gap must be >= 1'
if pis is None:
pis = get_pis(x, gap)
pi_vec = np.array([pis[0]] * gap + [pis[1]] * (x.size - gap))
llh = x * np.log(pi_vec)
if summed:
llh = np.sum(llh)
return llh
def get_LR(x, xmin_max=None):
"""
compute log-likelihood ratio
note that xmin of powerlaw is assume to be 0 to compute proper likelihood ratio
"""
N = np.sum(x)
x_normalized = x / N
if xmin_max is None:
xmin_max = x.size - 1
xmin = 1#get_xmin(x, xmin_max=70)
alpha = get_scaleparam(x, xmin)
power = loglikelihood(x_normalized, alpha, xmin, summed=False)
# adjustment term to absorb the difference of parameter dimension
power_adj = - 0.5 * np.log(N)
#step_adj = - 0.5 * 2 * np.log(N)
step_adj = - 0.5 * np.log(N)
LR = np.zeros(xmin_max)
for gap in xrange(1, xmin_max + 1):
step = loglikelihood_step(x_normalized, gap, summed=False)
diff = step - power
diff_adj = step_adj - power_adj
omega2 = np.sum(x * diff ** 2) / N \
- np.sum(x * diff / N) ** 2
LR[gap - 1] = (np.sum(x * diff) + diff_adj) / np.sqrt(N * omega2)
return LR
def get_BIC(x, xmin_max=None):
"""
compute BIC of powerlaw and 2-step distribution
"""
if xmin_max is None:
xmin_max = x.size
N = np.sum(x)
BICs = np.zeros(xmin_max)
xmin = get_xmin(x, xmin_max=70)
alpha = get_scaleparam(x, xmin)
BICs[0] = loglikelihood(x, alpha, xmin) - 0.5 * np.log(N)
for gap in xrange(1, xmin_max):
BICs[gap] = loglikelihood_step(x, gap) - 0.5 * 2 * np.log(N)
return BICs
if __name__ == "__main__":
import sys
import matplotlib.pyplot as plt
U = np.loadtxt(sys.argv[1])
K = U.shape[1]
#for k in xrange(K):
# bic = get_BIC(U[:, k])
# plt.plot(bic)
# print (k + 1, np.argmax(bic))
sd = 1.96
for k in xrange(K):
LR = get_LR(U[:, k])
plt.plot(LR)
print (k + 1, np.where(LR > sd))
plt.xlim(-1, 20)
plt.show()