-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_preseq_after.r
executable file
·64 lines (54 loc) · 3.79 KB
/
plot_preseq_after.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env Rscript
## USAGE:
## ./plot_preseq.r /my/path/file.lcextrap outfolder/ sampleID MaxReads
## SampleID: used for header in pdf and file name.
## MaxReads: Max number of reads (sequencing effort) to be plotted.
## EAGER output summary incl. sample ID (.csv file). Required.
## NOTE: For each CF I plot the amount of distinct reads expected at this CF, and the amount of extra sequencing necessary to get there (considering %endo and already performed sequencing)
## NOTE2: Present data-state shown as red dot. Derived from EAGER out table as well, not from preseq results --> leads to minor deviations sometimes.
args <- commandArgs(trailingOnly=TRUE)
infile <- args[1]
outfolder <- args[2]
sampleID <- args[3]
if( !is.na(args[4]) ){maxR <- as.integer(args[4])} else {maxR <- 20000000}
if( !is.na(args[5]) ){ endo <- read.csv(args[5],check.names=F);rownames(endo) <- endo[,'Sample Name'] }
if( !is.na(args[6]) ){ capres <- read.csv(args[6],check.names=F);rownames(capres) <- capres[,'Sample Name'] } # file contains EAGER results cap3
## Test if procvided endo file has actual sample name
if( sampleID %in% rownames(endo) ){
current.reads <- endo[ sampleID, 'Mapped Reads after RMDup' ]
endo.per <- endo[ sampleID, 'Endogenous DNA (%)' ]
current.cf <- round(endo[sampleID,'# mapped reads prior RMDup']/endo[sampleID,'Mapped Reads after RMDup'],2)
sequencing.done <- endo[sampleID,'# reads after C&M prior mapping'] ## use this number to calculate how much seq has to be done. Note: removal of reads due to Q cut off not considered
} else {
current.reads <- NA
endo.per <- NA
warning('SampleID not found in EAGER summary. Current read count and % endogenous cannot be shown in plot.')
}
## format data
d1=read.table(infile, header=T,sep="\t")
d2=d1[d1[,1]<=maxR,]
## extract info from capture results
cf.extraSeq <- round(capres[sampleID,'# mapped reads prior RMDup']/capres[sampleID,'Mapped Reads after RMDup'],2)
## plot
maxY <- max(d2[,'UPPER_0.95CI'])+(0.1*max(d2[,'UPPER_0.95CI']))
pdf(paste(outfolder,sampleID,"_preseq_afterExtraSeq.pdf",sep=""))
plot("",ylim=c(0,maxY),xlim=c(0,maxR),xlab="Mapped reads prior RMDup",ylab="Mapped Reads after RMDup (w/ 95CI)",main=sampleID,cex.main=0.95)
mtext(paste("% Endo:",endo.per))
polygon(c( d2[,'TOTAL_READS'] , rev(d2[,'TOTAL_READS']) ) , c( d2[,'LOWER_0.95CI'] , rev(d2[,'UPPER_0.95CI']) ) ,col="lightblue1",border="lightskyblue4",lwd=1.5)
grid(NULL,NULL,lty=2,col="grey")
lines(d2[,'TOTAL_READS'],d2[,'EXPECTED_DISTINCT'],col='blue',lwd=2)
## extract/plot CFs
cluster.fac <- d2[-1,'TOTAL_READS']/d2[-1,'EXPECTED_DISTINCT'];my.idx <-vector(length=4)
for (i in c(1:4)){
c = c(2.5,3,3.5,4)[i]
my.idx[i] <- which(abs(cluster.fac-c)==min(abs(cluster.fac-c)))
}
points(d2[my.idx,'TOTAL_READS'],d2[my.idx,'EXPECTED_DISTINCT'],pch=21,col='black',bg=colorRampPalette(c('darkolivegreen1','darkgreen'))(4))
## current seq depth
points(endo[sampleID,'# mapped reads prior RMDup'],endo[sampleID,'Mapped Reads after RMDup'],pch=21,col='black',bg='red')
points(capres[sampleID,'# mapped reads prior RMDup'],capres[sampleID,'Mapped Reads after RMDup'] ,pch=8,col='red',cex=2)
## legend
## NOTE: For each CF I plot the amount of distinct reads expected at this CF, and the amount of extra sequencing necessary to get there (considering %endo and already performed sequencing)
legend('bottomright',legend=paste('CF:',c('2.5','3.0','3.5','4.0'),'; #Dist.:',round(d2[my.idx,'EXPECTED_DISTINCT']/1000000,2),'M; #SeqLibEx:',round(((d2[my.idx,'TOTAL_READS']*(100/endo.per))-sequencing.done)/1000000,2),'M',sep=""),fill=colorRampPalette(c('darkolivegreen1','darkgreen'))(4))
legend('topleft',legend=paste('actual CF:',cf.extraSeq,'; #Dist:',round(capres[sampleID,'Mapped Reads after RMDup']/1000000,2),'M; #TotalSeq:',round(capres[sampleID,'# of Raw Reads prior Clip & Merge (C&M)']/1000000,2),'M',sep=""),fill="red")
dev.off()