-
Notifications
You must be signed in to change notification settings - Fork 0
/
Abrahams-2009aa-results.R
109 lines (103 loc) · 6.16 KB
/
Abrahams-2009aa-results.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
library( "ggplot2" )
the.results <- read.csv( "Abrahams-2009aa-results.csv" )
# Those results had not been made with a set threshold so the Profillic calls are not right. Looking at it, a threshold of 20 works well to match their multiplicity count.
pdf( "Abrahams-2009aa-results_pHMM_entropy_hist.pdf" )
ggplot( data=data.frame( the.results ), aes ( the.results$pEntropy ) ) + geom_histogram( breaks=seq(0, 100, by = 1), col = "black", fill = "blue", alpha = .2 ) + scale_x_discrete(breaks = number_ticks(n = 10 ) ) + labs(x="Entropy of Profile HMM", y="Count") + geom_vline(xintercept=c(20,20), col = "blue")
dev.off();
the.results[ , "Profillic" ] <- ifelse( as.numeric( the.results[ , "pEntropy" ] ) < 20, "1", the.results[ , "Profillic_nent" ] );
#write.csv( the.results, file = "Abrahams-2009aa-results-withCorrectProfillicCalls.csv", row.names = FALSE )
print( xtable( the.results ), include.rownames = FALSE );
# % latex table generated in R 3.1.2 by xtable 1.7-4 package
# % Thu Oct 22 10:42:32 2015
# \begin{table}[ht]
# \centering
# \begin{tabular}{llrrrrrlr}
# \hline
# Participant & fibig & nSeq & nHyper & nRecombined & pEntropy & Profillic\_nent & Profillic & Abrahams \\
# \hline
# 0089 & 5 & 22 & 0 & 0 & 6.76 & 3 & 1 & 1 \\
# 0114 & 4 & 27 & 0 & 4 & 65.50 & 3 & 3 & 3 \\
# 0334 & 1-2 & 22 & 0 & 0 & 5.02 & 2 & 1 & 1 \\
# 0393 & 4 & 22 & 0 & 0 & 5.57 & 2 & 1 & 1 \\
# 0478 & 1-2 & 23 & 0 & 3 & 70.71 & 2 & 2 & 3 \\
# 0595 & 4 & 28 & 0 & 0 & 6.93 & 6 & 1 & 1 \\
# 0626 & 4 & 24 & 0 & 0 & 5.57 & 2 & 1 & 1 \\
# 0665 & 4 & 20 & 0 & 0 & 6.86 & 2 & 1 & 1 \\
# 0682 & 1-2 & 22 & 0 & 0 & 4.73 & 2 & 1 & 1 \\
# 0985 & 1-2 & 23 & 0 & 0 & 4.61 & 2 & 1 & 1 \\
# 1086 & 1-2 & 24 & 0 & 0 & 8.49 & 2 & 1 & 1 \\
# 1172 & 1-2 & 20 & 0 & 0 & 4.90 & 2 & 1 & 1 \\
# 1176 & 1-2 & 21 & 0 & 0 & 9.27 & 3 & 1 & 3 \\
# 1196 & 1-2 & 23 & 2 & 0 & 32.78 & 2 & 2 & 3 \\
# 1335 & 4 & 21 & 0 & 1 & 85.85 & 2 & 2 & 3 \\
# 1373 & 1-2 & 22 & 0 & 0 & 6.36 & 3 & 1 & 1 \\
# 1394 & 1-2 & 20 & 0 & 0 & 6.40 & 3 & 1 & 1 \\
# 2010 & 4 & 23 & 0 & 0 & 6.78 & 2 & 1 & 1 \\
# 2052 & 1-2 & 23 & 0 & 0 & 5.46 & 2 & 1 & 1 \\
# 2060 & 1-2 & 22 & 0 & 0 & 5.55 & 2 & 1 & 1 \\
# 2103 & 1-2 & 20 & 0 & 0 & 4.83 & 2 & 1 & 1 \\
# 703010010 & 2 & 22 & 0 & 0 & 21.40 & 3 & 3 & 3 \\
# 703010054 & 5-6 & 27 & 0 & 0 & 15.91 & 2 & 1 & 1 \\
# 703010131 & 4 & 22 & 0 & 0 & 6.10 & 2 & 1 & 1 \\
# 703010159 & 2 & 20 & 0 & 0 & 6.89 & 5 & 1 & 1 \\
# 703010193 & 4 & 24 & 1 & 0 & 6.75 & 2 & 1 & 1 \\
# 703010200 & 4 & 18 & 0 & 8 & 57.09 & 3 & 3 & 3 \\
# 703010217 & 5-6 & 25 & 0 & 0 & 6.63 & 2 & 1 & 1 \\
# 703010228 & 4 & 28 & 0 & 3 & 53.28 & 2 & 2 & 2 \\
# 704010017 & 5-6 & 25 & 0 & 0 & 14.24 & 3 & 1 & 1 \\
# 704010042 & 4 & 42 & 0 & 0 & 7.80 & 2 & 1 & 1 \\
# 704010056 & 5-6 & 21 & 0 & 1 & 17.73 & 4 & 1 & 1 \\
# 704010069 & 4 & 23 & 0 & 0 & 9.71 & 2 & 1 & 1 \\
# 704010083 & 2 & 24 & 0 & 0 & 6.15 & 2 & 1 & 1 \\
# 704809221 & 1-2 & 28 & 0 & 0 & 6.85 & 2 & 1 & 1 \\
# 704810053 & 1-2 & 20 & 0 & 0 & 7.61 & 2 & 1 & 1 \\
# 704810015 & x & 23 & 0 & 0 & 9.84 & 3 & 1 & 1 \\
# 705010026 & x & 23 & 0 & 0 & 7.79 & 3 & 1 & 1 \\
# 705010078 & 4 & 26 & 0 & 0 & 5.99 & 3 & 1 & 1 \\
# 705010110 & x & 20 & 0 & 0 & 8.98 & 6 & 1 & 1 \\
# 706010018 & 4 & 23 & 0 & 0 & 15.52 & 2 & 1 & 1 \\
# 706010151 & 6 & 15 & 0 & 7 & 53.90 & 2 & 2 & 2 \\
# 706010164 & 4 & 19 & 0 & 0 & 5.23 & 2 & 1 & 1 \\
# CAP129 & 4 & 19 & 0 & 0 & 5.20 & 2 & 1 & 1 \\
# CAP136 & 5 & 16 & 0 & 2 & 28.86 & 2 & 2 & 2 \\
# CAP174 & 5 & 21 & 0 & 0 & 6.37 & 2 & 1 & 1 \\
# CAP177 & 1-2 & 20 & 0 & 0 & 4.97 & 2 & 1 & 1 \\
# CAP188 & 1-2 & 22 & 0 & 0 & 5.51 & 2 & 1 & 1 \\
# CAP200 & 5 & 18 & 0 & 0 & 5.77 & 2 & 1 & 1 \\
# CAP206 & 5 & 21 & 0 & 0 & 6.30 & 2 & 1 & 1 \\
# CAP210 & 1-2 & 21 & 0 & 0 & 4.35 & 2 & 1 & 1 \\
# CAP217 & 5 & 20 & 0 & 0 & 4.88 & 2 & 1 & 1 \\
# CAP220 & 1-2 & 15 & 0 & 0 & 7.61 & 2 & 1 & 1 \\
# CAP221 & 1-2 & 21 & 0 & 0 & 7.10 & 2 & 1 & 1 \\
# CAP222 & 1-2 & 21 & 0 & 0 & 54.08 & 3 & 3 & 3 \\
# CAP224 & 5 & 19 & 0 & 0 & 21.18 & 2 & 2 & 2 \\
# CAP225 & 3 & 20 & 0 & 0 & 6.85 & 2 & 1 & 1 \\
# CAP237 & 3 & 22 & 0 & 0 & 5.87 & 2 & 1 & 1 \\
# CAP239 & 5 & 24 & 0 & 0 & 6.49 & 3 & 1 & 1 \\
# CAP260 & 5 & 18 & 0 & 5 & 47.68 & 2 & 2 & 2 \\
# CAP269 & 6 & 18 & 0 & 0 & 16.37 & 3 & 1 & 1 \\
# CAP37 & 4 & 20 & 0 & 2 & 67.29 & 3 & 3 & 3 \\
# CAP40 & 6 & 22 & 1 & 0 & 12.04 & 2 & 1 & 1 \\
# CAP45 & 1-2 & 16 & 0 & 0 & 4.81 & 2 & 1 & 1 \\
# CAP63 & 3 & 19 & 1 & 0 & 5.93 & 2 & 1 & 1 \\
# CAP69 & 1-2 & 20 & 0 & 9 & 61.61 & 3 & 3 & 5 \\
# CAP8 & 5 & 62 & 0 & 1 & 13.93 & 3 & 1 & 1 \\
# CAP84 & 4 & 22 & 1 & 0 & 5.67 & 1 & 1 & 1 \\
# CAP85 & 6 & 21 & 2 & 0 & 11.89 & 2 & 1 & 1 \\
# \hline
# \end{tabular}
# \end{table}
sum( ( the.results[ , "Abrahams" ] > 1 ) & ( the.results[ , "Abrahams" ] == the.results[ , "Profillic" ] )
+ )
# [1] 10
the.results[ ( the.results[ , "Abrahams" ] > 1 ) & ( the.results[ , "Profillic" ] > 1 ) & ( the.results[ , "Abrahams" ] != the.results[ , "Profillic" ] ), ]
# Participant fibig nSeq nHyper nRecombined pEntropy Profillic_nent Profillic
# 5 0478 1-2 23 0 3 70.71 2 2
# 14 1196 1-2 23 2 0 32.78 2 2
# 15 1335 4 21 0 1 85.85 2 2
# 66 CAP69 1-2 20 0 9 61.61 3 3
# Abrahams
# 5 3
# 14 3
# 15 3
# 66 5