-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgemma.R
220 lines (192 loc) · 8.81 KB
/
gemma.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# SUMMARY
# -------
# This file defines some functions that I use for QTL mapping in
# GEMMA. Here is an overview of the functions defined in this file:
#
# write.gemma.pheno(file,phenotype,pheno)
# write.gemma.covariates(file,covariates,pheno)
# write.gemma.map(file,map)
# write.gemma.geno(file,geno,map)
# read.gemma.assoc(file)
# run.gemma(phenotype,covariates,pheno,geno,map,gemmadir,gemma.exe,
# chromosomes)
# run.gemma.norr(phenotype,covariates,pheno,geno,map,gemmadir,gemma.exe)
#
# FUNCTION DEFINITIONS
# ----------------------------------------------------------------------
# Write the phenotype data to a file in the format used by GEMMA. Each
# line of the file contains one phenotype observation.
write.gemma.pheno <- function (file, phenotype, pheno) {
y <- pheno[phenotype]
if (is.numeric(y))
y <- round(y,digits = 6)
write.table(y,file,quote = FALSE,row.names = FALSE,col.names = FALSE)
}
# ----------------------------------------------------------------------
# Write the covariate data to a file in the format used by GEMMA. Each
# line corresponds to a sample. We must include an additional
# covariate for the intercept.
write.gemma.covariates <- function (file, covariates, pheno) {
if (is.null(covariates)) {
write.table(data.frame(rep(1,nrow(pheno))),file,sep = " ",
quote = FALSE,row.names = FALSE,col.names = FALSE)
} else {
write.table(cbind(1,data.frame(lapply(pheno[covariates],function (x) {
if (is.numeric(x))
round(x,digits=6)
else
x
}))),file,sep = " ",quote = FALSE,row.names = FALSE,col.names = FALSE)
}
}
# ----------------------------------------------------------------------
# Write the SNP information to a space-delimited text file in the
# format used by GEMMA. This file contains one line per SNP, with
# three columns: (1) SNP label, (2) base-pair position, (3)
# chromosome.
write.gemma.map <- function (file, map)
write.table(map[c("snp","pos","chr")],file,sep = " ",quote = FALSE,
row.names = FALSE,col.names = FALSE)
# ----------------------------------------------------------------------
# Store the mean genotypes as a space-delimited text file in the
# format used by GEMMA, in which we have one row per SNP, and one
# column per sample. The first three column give the SNP label, and
# the two alleles.
write.gemma.geno <- function (file, geno, map) {
geno <- t(geno)
geno <- as.data.frame(geno,check.names = FALSE)
geno <- round(geno,digits = 3)
geno <- cbind(map[c("snp","ref","alt")],geno)
write.table(geno,file,sep = " ",quote = FALSE,row.names = FALSE,
col.names = FALSE)
}
# ----------------------------------------------------------------------
# Reads in the association results from GEMMA, and returns a data
# frame containing three columns: chromosome number ("chr"); base-pair
# position ("pos"); and the base-10 logarithm of the p-value ("log10p").
read.gemma.assoc <- function (file) {
gwscan <- read.table(file,sep = "\t",header = TRUE,check.names = FALSE,
quote = "",stringsAsFactors = FALSE)
rownames(gwscan) <- gwscan$rs
gwscan <- gwscan[c("chr","ps","p_lrt")]
gwscan <- transform(gwscan,p_lrt = -log10(p_lrt))
colnames(gwscan) <- c("chr","pos","log10p")
return(gwscan)
}
# ----------------------------------------------------------------------
# This function maps QTLs using GEMMA, writing all the files required
# by GEMMA to the directory specified by "gemmadir". The QTLs are
# mapped separately for each chromosome, in which the kinship matrix
# is computed using all markers except the markers on the given
# chromosome.
run.gemma <- function (phenotype, covariates, pheno, geno, map,
gemmadir, gemma.exe, chromosomes = NULL) {
# Set the local directory to the location of the GEMMA files.
srcdir <- getwd()
setwd(gemmadir)
# Give summary of analysis.
cat("Mapping QTLs for",phenotype,"in",nrow(pheno),"mice, ")
if (!is.null(covariates)) {
cat("controlling for ",paste(covariates,collapse=" + "),".\n",sep="")
} else {
cat("with no covariates included.\n")
}
# Write the phenotype and covariate data to separate files.
cat("Writing phenotype and covariate data to file.\n")
write.gemma.pheno(paste0(gemmadir,"/pheno.txt"),phenotype,pheno)
write.gemma.covariates(paste0(gemmadir,"/covariates.txt"),covariates,pheno)
# We will map QTLs on these chromosomes.
if (is.null(chromosomes))
chromosomes <- levels(map$chr)
# Repeat for each chromosome.
scans <- vector("list",length(chromosomes))
names(scans) <- chromosomes
for (chr in chromosomes) {
# Compute the kinship matrix using all markers that are *not* on
# the chromosome.
cat("Mapping QTLs on chromosome ",chr,".\n",sep="")
cat(" * Computing kinship matrix.\n")
markers <- which(map$chr != chr)
K <- tcrossprod(center.columns(geno[,markers])) / length(markers)
# Save the kinship matrix to a text file.
cat(" * Writing kinship matrix to file.\n")
write.table(round(K,digits = 6),paste0(gemmadir,"/kinship.txt"),sep = " ",
quote = FALSE,row.names = FALSE,col.names = FALSE)
# Write out the mean genotypes and map information for all markers
# on the chromosome.
markers <- which(map$chr == chr)
cat(" * Writing to file genotypes for ",length(markers),
" markers on chromosome ",chr,".\n",sep="")
write.gemma.geno(paste0(gemmadir,"/geno.txt"),geno[,markers],map[markers,])
cat(" * Writing genetic map for",length(markers),
"markers on chromosome",chr,"to file.\n")
write.gemma.map(paste0(gemmadir,"/map.txt"),map[markers,])
# Now we are finally ready to run GEMMA for all markers on the
# chromosome using the kinship matrix computed using all the
# markers *not* on the chromosome.
cat(" * Computing p-values for ",length(markers),
" markers on chromosome ",chr,".\n",sep="")
system(paste(gemma.exe,"-g geno.txt -a map.txt -p pheno.txt",
"-c covariates.txt -k kinship.txt -notsnp -lmm 2",
"-lmin 0.01 -lmax 100"),
ignore.stdout = TRUE)
# Load the results of the GEMMA association analysis.
scans[[chr]] <-
read.gemma.assoc(paste0(gemmadir,"/output/result.assoc.txt"))
}
# Restore the working directory.
setwd(srcdir)
# Merge the mapping results from all chromosomes into a single table.
gwscan <- do.call(rbind,scans)
rownames(gwscan) <- do.call(c,lapply(scans,rownames))
class(gwscan) <- c("scanone","data.frame")
# Return the genome-wide scan.
return(gwscan)
}
# ----------------------------------------------------------------------
# This function maps QTLs using GEMMA without the "realized
# relatedness" matrix to account for population structure.
run.gemma.norr <- function (phenotype, covariates, pheno, geno, map,
gemmadir, gemma.exe) {
# Set the local directory to the location of the GEMMA files.
srcdir <- getwd()
setwd(gemmadir)
# Give summary of analysis.
cat("Mapping QTLs for",phenotype,"in",nrow(pheno),"mice, ")
if (!is.null(covariates)) {
cat("controlling for ",paste(covariates,collapse=" + "),".\n",sep="")
} else {
cat("with no covariates included.\n")
}
# Write the phenotype and covariate data to separate files.
cat("Writing phenotype and covariate data to file.\n")
write.gemma.pheno(paste0(gemmadir,"/pheno.txt"),phenotype,pheno)
write.gemma.covariates(paste0(gemmadir,"/covariates.txt"),covariates,pheno)
# Write out the mean genotypes and map information for all markers.
cat("Writing SNP and genotype data to file.\n")
write.gemma.map(paste0(gemmadir,"/map.txt"),map)
write.gemma.geno(paste0(gemmadir,"/geno.txt"),geno,map)
# Write out the kinship matrix to file.
cat("Writing identity kinship matrix to file.\n");
write.table(diag(nrow(pheno)),paste0(gemmadir,"/kinship.txt"),
sep = " ",quote = FALSE,row.names = FALSE,
col.names = FALSE)
# cat("Writing full kinship matrix to file.\n");
# K <- tcrossprod(center.columns(geno)) / nrow(map)
# write.table(round(K,digits = 6),paste0(gemmadir,"/kinship.txt"),
# sep = " ",quote = FALSE,row.names = FALSE,
# col.names = FALSE)
# Now we are finally ready to run GEMMA for all markers.
cat("Computing p-values for",nrow(map),"candidate markers.\n")
system(paste(gemma.exe,"-g geno.txt -a map.txt -p pheno.txt",
"-c covariates.txt -k kinship.txt -notsnp -lmm 2",
"-lmin 0.01 -lmax 100"),
ignore.stdout = TRUE)
# Load the results of the GEMMA association analysis.
gwscan <- read.gemma.assoc(paste0(gemmadir,"/output/result.assoc.txt"))
class(gwscan) <- c("scanone","data.frame")
# Restore the working directory.
setwd(srcdir)
# Return the genome-wide scan.
return(gwscan)
}