-
Notifications
You must be signed in to change notification settings - Fork 0
/
LINCs_PC3_SS1_QA.Rmd
267 lines (190 loc) · 13.3 KB
/
LINCs_PC3_SS1_QA.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
---
title: "LINCs MEP PC3 SS1 QA Report"
date: "`r Sys.Date()`"
output: pdf_document
---
```{r setup, echo=FALSE, message=FALSE}
library("ggplot2")
library("data.table")
library("MEMA")
create8WellPseudoImage <- function(DT, pr, prDisplay){
p <- ggplot(DT, aes_string(x="ArrayColumn", y="ArrayRow",colour=pr))+
geom_point(size=1)+
scale_y_reverse()+ scale_x_continuous(breaks= c(min(DT$ArrayColumn),round(mean(c(min(DT$ArrayColumn),max(DT$ArrayColumn)))),max(DT$ArrayColumn)))+
scale_colour_gradient(low = "white", high = "red",limits=quantile(DT[[pr]],probs = c(0,.998),na.rm=TRUE))+
guides(colour = guide_legend(prDisplay, keywidth = .5, keyheight = .5))+
ggtitle(paste(prDisplay,"for",unique(DT$CellLine), "cells in plate",unique(DT$Barcode)))+
xlab("")+ylab("")+theme(axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1),plot.title = element_text(size = rel(.5)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.3)))+
facet_wrap(~Well, ncol=4)
}
create8WellHistograms <- function(DT, pr, prDisplay, binwidth = diff(quantile(DT[[pr]],probs = c(0,.98),na.rm=TRUE))/50, upperProb = .99, ncol = 4) {
p <- ggplot(DT, aes_string(x=pr))+
geom_histogram(binwidth = binwidth)+
scale_x_continuous(limits = quantile(DT[[pr]],probs = c(0,upperProb),na.rm=TRUE))+
ggtitle(paste(prDisplay,"in",unique(DT$CellLine), "cells in plate",unique(DT$Barcode)))+
xlab(prDisplay)+
theme(axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1, size=rel(.5)), axis.text.y = element_text(angle = 0, vjust = 0.5, hjust=1, size=rel(1)), plot.title = element_text(size = rel(.5)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.3)))+
facet_wrap(~Well, ncol=ncol)
}
numericMedian <- function(x) as.numeric(median(x))
calcQAScore <- function(DT, threshold, value){
QAScore <- sum(DT[,value,with=FALSE] > threshold)/nrow(DT)
return (QAScore)
}
evalMedians <- function(values, reps){
tmp <- median(values[reps], na.rm=TRUE)
}
#Set the staining set to be analyzed
ss <- "SS1"
cmp <- "Cyto."
# ss <- "SS2"
# cmp <- ""
popDT <- fread(paste0("./",ss,"/Population/Annotated Data/PC3_",ss,"_PopAnn.txt"))
barcodes <- sort(unique(popDT$Barcode))
#Set a threshold for filtering wells on their QA score
wellQAThresh <- 0
#TODO: Read this from sDT
lthresh <- 0.6
# Clean up the dataset that will be used in the analysis
popDT <- popDT[!popDT$ShortName =="blank"]
popDT$ShortName <- gsub("blank","",popDT$ShortName)
setnames(popDT,"Ligand","LigandAnnotID")
ms <- gregexpr("^[[:alnum:]]*[^_]", popDT$LigandAnnotID)
popDT$Ligand <- unlist(regmatches(popDT$LigandAnnotID, ms))
#Set a threshold for the Loess QA
lthresh <- .6
# Clean up the dataset that will be used in the analysis
popDT <- popDT[!popDT$ShortName =="blank"]
popDT$ShortName <- gsub("blank","",popDT$ShortName)
setnames(popDT,"Ligand","LigandAnnotID")
ms <- gregexpr("^[[:alnum:]]*[^_]", popDT$LigandAnnotID)
popDT$Ligand <- unlist(regmatches(popDT$LigandAnnotID, ms))
```
##Summary
This QA report covers the MEP-LINCs `r ss` staining set with `r unique(popDT$CellLine)` cells in 8 well MEMAs.
The plates were printed with 35 row by 20 column MEMAs using a 4x7 pin head that printed 5x5 blocks. Each sample spot contains one ECM protein paired with Collagen I. There are 46 different ECM proteins in the array arranged in a random fashion.
Images of each well were gathered on a Tecan LS Reloaded laser scanner and Olympus ScanR automated microscope. This staining set includes, DAPI, `r unique(popDT$Endpoint488)` (488nm), `r unique(popDT$Endpoint555)` (532 and 555nm) and `r unique(popDT$Endpoint647)` (635 and 647nm). Data from DAPI staining is only gathered by the ScanR.
Tecan data is gathered at the spot population level by fitting round regions of interest (ROIs) to each spot. The Tecan data in this report uses the net values defined as the raw ROI value minus the mean of the local background.
The spots that were not printed are labeled as blank.
\newpage
##QA Scoring of the dataset
QA Scoring is based on regions of low cell signal. A full explanation is in the supplemental material.
```{r QA_Scores, echo=FALSE, fig.width=3.7,fig.height=4}
for (barcode in unique(popDT$Barcode)){
DT <-popDT[popDT$Barcode==barcode,]
#Remove the fiducial entries
setkey(DT,ShortName)
DT <- DT[!"fiducial"]
p <- create8WellPseudoImage(DT, pr = "Net.532",prDisplay = unique(DT$Endpoint555))
suppressWarnings(print(p))
p <- create8WellPseudoImage(DT, pr = "Loess555",prDisplay = paste("Loess Model of", unique(DT$Endpoint555)))
suppressWarnings(print(p))
p <- create8WellHistograms(DT,pr = "Net.532",prDisplay = unique(DT$Endpoint555))
suppressWarnings(print(p))
DT <- DT[,QAScore := calcQAScore(.SD,threshold=lthresh,value="Loess555"),by="Well"]
wellScores <- unique(DT[,list(Well,QAScore=sprintf("%.2f",QAScore))])
p <- ggplot(DT, aes(x=Loess555))+
geom_histogram(binwidth=.04)+
facet_wrap(~Well, ncol=4)
r <- suppressWarnings(ggplot_build(p))
textY <- .8*max(r$panel$ranges[[1]]$y.range)
p <- ggplot(DT, aes(x=Loess555))+
geom_histogram(binwidth=.04)+
geom_text(data=wellScores, aes(label=paste0("QA\n",QAScore)), x = 1.15, y = textY, size = rel(2.5), colour="red")+
facet_wrap(~Well, ncol=4)+
geom_vline(xintercept=lthresh, colour="blue")+
ggtitle(paste("Loess Model of",unique(DT$Endpoint555)," Signal\n for",unique(DT$CellLine), "cells in plate",unique(DT$Barcode)))+xlab(unique(DT$Endpoint555))+
theme(axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1, size=rel(.5)), axis.text.y = element_text(angle = 0, vjust = 0.5, hjust=1, size=rel(1)), plot.title = element_text(size = rel(.5)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.3)))
suppressWarnings(print(p))
cat(sprintf("Mean QA Score for %9s = %.2f \n",barcode,mean(DT$QAScore)))
}
```
\newpage
#Supplemental Material
##MEMA Layout
The LINCs MEMA has 46 ECM proteins spotted in 35 rows and 20 columns. The proteins are randomly assigned to the top 30 rows of spots. Rows 31-35 are replicates of rows 1-5. The upper left and bottom right corners are image fiducials in the 488nm channel and there are four blank spots for checking orientation in all channels.
```{r Content Layout,echo=FALSE, message=FALSE, warnings=FALSE, fig.width=6}
#Select only one type of array to display and show the pattern
DT <- popDT[,list(Block,Position,ShortName,Name,ID,ArrayRow,ArrayColumn),keyby=Well]
DT <- DT["A01",]
DT <- unique(DT,by=NULL)
p <- ggplot(DT,aes(x = ArrayColumn, y = ArrayRow, fill=ShortName))+
geom_point(shape=21, size = 2.2)+
#scale_colour_manual(values=c("black", "white"))+
guides(fill=guide_legend(ncol = 4))+guides( colour = FALSE )+
theme(legend.text = element_text(size = rel(.5)),legend.title=element_text(size = rel(.5)),plot.title=element_text(size = rel(.8)))+
scale_y_reverse()+
xlab("")+ylab("")+
ggtitle(" \n\nLINCs ECM A Row Layout")
print(p)
```
##Replicate Count
The LINCs MEMA has an average of 15 replicates with a range from 13 to 19.
```{r Layout Replicate Count,echo=FALSE, message=FALSE, warnings=FALSE, fig.width=6.5, fig.height=3}
#Remove the fiducial entries
setkey(DT,ShortName)
DT <- DT[!"fiducial"]
p <- ggplot(DT, aes(x=ShortName))+
geom_bar(width=.8)+geom_hline(yintercept = mean(table(DT$ShortName)), colour="blue")+
ggtitle(" \n\nCount of Replicate ECM Proteins In LINCs MEMA")+
xlab("Printed ECM Protein")+ylab("Number of spots")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size=rel(.8)),axis.title.x = element_text(size=rel(.8)),axis.title.y = element_text(size=rel(.8)),plot.title = element_text(size = rel(1)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.5)))
print(p)
```
\newpage
##`r unique(popDT$Endpoint488) ` and `r unique(popDT$Endpoint647) ` Pseudoimages
```{r Pseudoimages, echo=FALSE, fig.width=3.7,fig.height=4}
for (barcode in unique(popDT$Barcode)){
DT <-popDT[popDT$Barcode==barcode,]
#Remove the fiducial entries
setkey(DT,ShortName)
DT <- DT[!"fiducial"]
p <- create8WellPseudoImage(DT, pr = "Net.488", prDisplay = unique(DT$Endpoint488))
suppressWarnings(print(p))
p <- create8WellPseudoImage(DT, pr = "Loess488", prDisplay = paste("Loess Model of",unique(DT$Endpoint488)))
suppressWarnings(print(p))
p <- create8WellPseudoImage(DT, pr = "Net.635", prDisplay = unique(DT$Endpoint647))
suppressWarnings(print(p))
p <- create8WellPseudoImage(DT, pr = "Loess647", prDisplay = paste("Loess Model of",unique(DT$Endpoint647)))
suppressWarnings(print(p))
}
```
\newpage
##Quality Analysis
The variance of the signal in MEMA data comes from biological and technical factors. The technical factors create regions of low cell counts per spot and uneven staining across the array. The goal of the QA pipeline is to quantify the technical factors to identify wells or plates that need to be removed from downstream processing and/or be replaced by wells from a new experiment.
The hypothesis for the MEMA QA process is that the biological signal comes from individual spots while the technical variations come from regions of low signal. A bivariate loess model can be used to quantify the number of spots in low signal regions, leading to a MEMA QA score.
###Loess Model Explanation
The loess model of a MEMA is the mean value of a weighted version of each spot's region or neighborhood. In a 700 spot array, a loess span value of 0.1 sets the size of the neighborhood to be the nearest 70 points (within approximately 5 spots in all directions). The weights are a tricubic function of the euclidean distance between the spot being modeled and the neighborhood spots. These weights vary from 1 to 0 as distances increase from the nearest to the farthest neighbor. In other words, each spot in the model takes on the mean value of its 70 nearest neighbors with the closest neighbors having the largest impact. Therefore, the loess model is dominated by the technical regional factors as opposed to individual biological responses.
A MEMA's QA score is derived from the loess model of the control-well-normalized values by calculating the proportion of spots in low signal regions(LSR). A threshold for classifying spots as LSR is based on the median of each plate's control well. To have higher scores reflect increasing quality, the MEMA QA score is defined as the proportion of non-LSR spots to total spots. This value will be 1 for MEMAs with no low signal regions and approach 0 as the number of LSR spots increases.
Below are plots showing data from well A02 in plate `r barcodes[1] ` from LINCs staining set `r ss` (DAPI, `r unique(popDT$Endpoint488) `, `r unique(popDT$Endpoint555) ` and `r unique(popDT$Endpoint647) `). The LSR spots are those to the left of the blue vertical line at the threshold value of `r lthresh ` in the histogram.
```{r Plot LI8X001_532_heatmaps , echo=FALSE, fig.width=2.5,fig.height=4}
DT <-popDT[popDT$Barcode==barcodes[2] & popDT$Well == "A02"]
#Remove the fiducial entries
setkey(DT,ShortName)
DT <- DT[!"fiducial"]
p <- ggplot(DT, aes(x=ArrayColumn, y=ArrayRow,colour=Net.532))+
geom_point(size=1.8)+
scale_y_reverse()+ scale_x_continuous(breaks= c(min(DT$ArrayColumn),round(mean(c(min(DT$ArrayColumn),max(DT$ArrayColumn)))),max(DT$ArrayColumn)))+
scale_colour_gradient(low = "white", high = "red")+
guides(colour = guide_legend(unique(DT$Endpoint555), keywidth = .5, keyheight = .5))+
ggtitle(paste("Raw Data of Tecan",unique(DT$Endpoint555),"Signal\n for",unique(DT$CellLine), "cells in plate",unique(DT$Barcode)))+
xlab("")+ylab("")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),plot.title = element_text(size = rel(.5)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.3)))
suppressWarnings(print(p))
p <- ggplot(DT, aes(x=ArrayColumn, y=ArrayRow,colour=Loess555))+
geom_point(size=1.8)+
scale_y_reverse()+ scale_x_continuous(breaks= c(min(DT$ArrayColumn),round(mean(c(min(DT$ArrayColumn),max(DT$ArrayColumn)))),max(DT$ArrayColumn)))+
scale_colour_gradient(low = "white", high = "red")+
guides(colour = guide_legend(unique(DT$Endpoint555), keywidth = .5, keyheight = .5))+
ggtitle(paste("Loess Model of Tecan",unique(DT$Endpoint555),"Signal\n for",unique(DT$CellLine), "cells in plate",unique(DT$Barcode)))+
xlab("")+ylab("")+theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),plot.title = element_text(size = rel(.5)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.3)))
suppressWarnings(print(p))
DT <- DT[,QAScore := calcQAScore(.SD,threshold=lthresh,value="Loess555"),by="Well"]
wellScores <- unique(DT[,list(Well,QAScore=sprintf("%.2f",QAScore))])
p <- ggplot(DT, aes(x=Loess555))+
geom_histogram(binwidth=.04)+
geom_vline(xintercept=lthresh, colour="blue")+
geom_text(data=wellScores, aes(label=paste0("QA\n",QAScore)), x = .9, y = 30, size = rel(7), colour="red")+
ggtitle(paste("Loess Model of Tecan",unique(DT$Endpoint555)," Signal\n for",unique(DT$CellLine), "cells in plate",unique(DT$Barcode)))+xlab(unique(DT$Endpoint555))+
theme(axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1, size=rel(.5)), axis.text.y = element_text(angle = 0, vjust = 0.5, hjust=1, size=rel(1)), plot.title = element_text(size = rel(.5)),legend.text=element_text(size = rel(.3)),legend.title=element_text(size = rel(.3)))
suppressWarnings(print(p))
```