-
Notifications
You must be signed in to change notification settings - Fork 1
/
R_demo_blogpost4.R
148 lines (135 loc) · 4.88 KB
/
R_demo_blogpost4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#Demonstration of p-values
#D.V.M. Bishop, started 17th Dec 2017
#Simulate repeated sampling from population with null effect, plot distribution of obtained effect sizes
#NB this script is not an efficient way to do this - designed so each step makes sense for beginners
require(plotrix) #for boxed.labels function - text with white background
options(scipen = 999)#turn off scientific notation
set.seed(12) #to ensure same values generated on each run - change to any other number for different results
#-----------------------------------------------------------------------
#Pvalue demo: simulated data with no effect for 10000 runs
#-----------------------------------------------------------------------
#Plots histograms for effect size with N = 10 and N = 80
#NB bin width needs careful crafting to ensure both plots are similar
#See histo help: this explains that if you try to set binwidth it is treated as 'suggestion'
set.seed(17)
mypop <- rnorm(100000, 2, 1) #population data
#set up to save plots
plotsave <- 1 #if you just want plots in plot window, set to zero
if (plotsave == 1) {
pngname <- 'Fig4_1_hist_effsize_freq.png'
png(pngname, width = 650, height = 300)
}
par(mfrow = c(1, 2)) #plot in one row and 2 columns
myn <- c(20, 160) #two possible values of N (both groups combined)
for (j in 1:2) {
#run through twice: once with 10 per group and once with 80 per group
mybreaks <-
seq(-1.7, 1.7, .15) #have to handcraft to get both histos similar scale
htextloc <- .7 #horiz location of 'Percent' label
if (j == 2)
{
mybreaks <- seq(-.5, .5, .05)
htextloc <- .2
}
d.all <- vector() #initialise vector to save all the effect sizes from this run
rangea <- 1:(myn[j] / 2)
rangeb <- (1 + myn[j] / 2):myn[j]
mygroup <- c(rep('A', myn[j] / 2), rep('B', myn[j] / 2))
for (i in 1:10000) {
temp <- sample(mypop, myn[j])
myvar <- c(var(temp[rangea]), var(temp[rangeb]))
poolsd <-
sqrt(mean(myvar)) #formula for pooled SD if there is equal N per group
d.all <-
c(d.all, (mean(temp[rangea]) - mean(temp[rangeb])) / poolsd) #compute d and stick on end of list
}
myprobs <- c(.95, .99, .999)
myprobtext <- c('5%', '1%', '0.1%')
d.all <-
d.all[d.all < max(mybreaks)] #remove out of range values so hist will plot
d.all <- d.all[d.all > min(mybreaks)]
hist(
d.all,
breaks = mybreaks,
main = paste0('N per group = ', myn[j] / 2),
xlab = 'Effect size, d',
cex.axis = .8,
las = 1
) #cex.axis determines size of axis text, las rotates text
myquants <- quantile(d.all, probs = myprobs)
myquants <- round(myquants, 2)
boxed.labels(htextloc, 1300, ' Percent', bg = "white", border = NA) #add text to the graph
for (q in 1:length(myquants)) {
boxed.labels(
myquants[q],
1200,
myprobtext[q],
cex = .8,
bg = "white",
border = NA
) #add text to the graph
abline(v = myquants[q], col = 'red', lty = 3) #draw a line for each cutoff
}
}
if (plotsave == 1) {
dev.off()
} #stop writing to png
#-----------------------------------------------------------------------
# Pvalue demo: simulated data with no effect, small effect and medium effect
# Demonstrates use of rnorm to sample from population in one step
#-----------------------------------------------------------------------
plotsave <- 1 #if you just want plots in plot window, set to zero
if (plotsave == 1) {
pngname <- 'Fig4_2_three_effsize_probdensity.png'
png(pngname, width = 500, height = 300)
}
myn <- 20
rangea <- 1:(myn / 2)
rangeb <- (1 + myn / 2):myn
myeff <-
c(0, .2, .7) #we will assume we can multiply by 10 to get wt change in lb
myprobs <- c(.95, .99, .999)
mypill <- c('A', 'B', 'C')
for (j in 1:3) {
d.all <- vector() #variable to save all the effect sizes from this run
for (i in 1:50000) {
temp <- rnorm(myn, mean = 0, sd = 1)
temp[rangea] <- temp[rangea] + myeff[j]
#NB alternatively, could use temp[rangea] <- rnorm (myn/2, mean=myeff, sd =1)
myvar <- c(var(temp[rangea]), var(temp[rangeb]))
poolsd <-
sqrt(mean(myvar)) #formula for pooled SD if there is equal N per group
d.all <-
c(d.all, 10 * (mean(temp[rangea]) - mean(temp[rangeb])) / poolsd) #compute d and stick on end of list
}
# Kernel Density Plot
myd <- density(d.all) # returns the density data
if (j == 1) {
plot(
myd,
main = paste0('N per group = ', myn[j] / 2),
xlab = 'Wt difference in lb',
ylab = 'Probability',
cex.axis = .8,
las = 1,
ylim = c(0, .1),
xlim = c(-20, 20)
) #cex.axis determines size of axis text, las rotates text
}
if (j > 1) {
lines(myd)
}
abline(v = myeff[j] * 10) #draw a line for each mean
boxed.labels(
myeff[j] * 10,
.1,
mypill[j],
bg = "white",
border = NA,
cex = 1.1
)
}
abline(v = 3, col = 'red', lty = 3) #red line shows obtained result
if (plotsave == 1) {
dev.off()
} #stop writing to png