Skip to content

Commit

Permalink
Test of --noPlot flag in Docker
Browse files Browse the repository at this point in the history
  • Loading branch information
AHDemiG1 committed Mar 22, 2023
1 parent 94c271d commit 4c840af
Show file tree
Hide file tree
Showing 5 changed files with 412 additions and 378 deletions.
5 changes: 4 additions & 1 deletion clinCNV.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env Rscript
set.seed(100)
options(warn=-1)
clincnvVersion = paste0("ClinCNV version: v1.18.2")
clincnvVersion = paste0("ClinCNV version: v1.18.3")

## CHECK R VERSION
if (!( (as.numeric(version$major) >= 3 & as.numeric(version$minor) > 2.0) | as.numeric(version$major) >= 4) ) {
Expand Down Expand Up @@ -647,6 +647,9 @@ if (length(samplesToFilterOut) > 0) {
}

print(paste("We start to cluster your data (you will find a plot if clustering is possible in your output directory)", opt$out, Sys.time()))
if (opt$noPlot) {
print(paste("Ay no, you used --noPlot flag. No plots for you then.", Sys.time()))
}
if (is.null(opt$clusterProvided)) {
if (opt$noPlot) {
clustering = rep("0", ncol(normal))
Expand Down
137 changes: 77 additions & 60 deletions germline/helpersGermline.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ EstimateModeSimple <- function(x, chrom="", genders=NULL) {
mu
}




determineSDsOfGermlineSample <- function(x) {
Expand Down Expand Up @@ -131,13 +131,13 @@ plotFoundCNVs <- function(found_CNVs, toyLogFoldChange, toyBedFile, outputFolder
", number of regions:", found_CNVs[s,3] - found_CNVs[s,2] + 1 ,
", length:", toyBedFile[found_CNVs[s,3],3] - toyBedFile[found_CNVs[s,2],2])
CNV_name_to_write <- paste(colnames(toyLogFoldChange)[sam_no], chrom, toyBedFile[found_CNVs[s,2],2], toyBedFile[found_CNVs[s,3],3], "CN",vector_of_states[found_CNVs[s,4]], sep="_")


vectorOfGeneNames = c()
genesThatHasToBeSeparated = unique(toyBedFile[found_CNVs[s,2]:found_CNVs[s,3],5])
for (i in 1:length(genesThatHasToBeSeparated)) {
if (is.character(genesThatHasToBeSeparated[i]))
vectorOfGeneNames = c(vectorOfGeneNames, unlist(strsplit(genesThatHasToBeSeparated[i], split=",")))
vectorOfGeneNames = c(vectorOfGeneNames, unlist(strsplit(genesThatHasToBeSeparated[i], split=",")))
}
vectorOfGeneNamesTrimmed = c()
if (length(vectorOfGeneNames) > 0) {
Expand All @@ -146,7 +146,7 @@ plotFoundCNVs <- function(found_CNVs, toyLogFoldChange, toyBedFile, outputFolder
}
}
if (length(vectorOfGeneNamesTrimmed) > 0) {
annotationGenes <- paste(unique(vectorOfGeneNamesTrimmed), collapse=",")
annotationGenes <- paste(unique(vectorOfGeneNamesTrimmed), collapse=",")
} else {
annotationGenes = "na"
}
Expand All @@ -160,7 +160,7 @@ plotFoundCNVs <- function(found_CNVs, toyLogFoldChange, toyBedFile, outputFolder

length_of_repr <- 1000


st <- found_CNVs[s,2]
fn <- found_CNVs[s,3]

Expand All @@ -169,10 +169,10 @@ plotFoundCNVs <- function(found_CNVs, toyLogFoldChange, toyBedFile, outputFolder
plot_st <- max(1,st - length_of_repr)
plot_fn <- min(length(toyLogFoldChange), fn + length_of_repr)
png(filename=paste0(outputFolder, "/", paste0(CNV_name_to_write, ".png")), type = "cairo", width = 1536, height = 640)
#bitmap(filename=paste0(outputFolder, "/", paste0(CNV_name_to_write, ".png")) ,width = 1024, height = 640, units = "px" )

#bitmap(filename=paste0(outputFolder, "/", paste0(CNV_name_to_write, ".png")) ,width = 1024, height = 640, units = "px" )




plot(toyLogFoldChange[plot_st:plot_fn], main=CNV_name, ylab="Copy Number", xlab=(paste("CNV within Chromosome Arm" )),
ylim=c(0, 3), cex=toySizesOfPointsFromLocalSds[plot_st:plot_fn], yaxt='n')

Expand All @@ -186,7 +186,7 @@ plotFoundCNVs <- function(found_CNVs, toyLogFoldChange, toyBedFile, outputFolder


abline(h=sqrt(cn_states/2),lty=2,col=colours,lwd=3)

points((st - plot_st):(st - plot_st + fn - st) + 1, toyLogFoldChange[st:fn],col="black", pch=21,bg=colours[found_CNVs[s,4]], cex=toySizesOfPointsFromLocalSds[found_CNVs[s,2]:found_CNVs[s,3]])


Expand All @@ -205,7 +205,7 @@ plotFoundCNVs <- function(found_CNVs, toyLogFoldChange, toyBedFile, outputFolder
family = "mono")
dev.off()
}

}
}
return(cnvsToOutput)
Expand All @@ -221,17 +221,19 @@ Determine.gender <- function(normalized.coverage.corrected.gc, probes) {
matrix_of_values[matrix_of_values > 1.5] = 1.5
clKmeans <- NULL
clKmeans <- tryCatch({kmeans(matrix_of_values, centers=matrix(c(0,1,sqrt(1/2), sqrt(1/2)), nrow=2))},
error = function(e) {
NULL
})
error = function(e) {
NULL
})
if (!is.null(clKmeans)) {
clusters <- clKmeans$cluster
clusters[clusters == 1] <- "F"
clusters[clusters == 2] <- "M"
png(filename=paste0(opt$out, paste0("/genders.png")), type = "cairo", width=800, height=800)
plot(matrix_of_values, col = clKmeans$cluster, xlab="Y Chromsome", ylab="X Chromosome", pch=19, cex=2)
points(clKmeans$centers, col = 1:2, pch = 8, cex = 10)
dev.off()
if (!opt$noPlot) {
png(filename=paste0(opt$out, paste0("/genders.png")), type = "cairo", width=800, height=800)
plot(matrix_of_values, col = clKmeans$cluster, xlab="Y Chromsome", ylab="X Chromosome", pch=19, cex=2)
points(clKmeans$centers, col = 1:2, pch = 8, cex = 10)
dev.off()
}
} else {
clusters <- rep(2, nrow(matrix_of_values))
clusters[which(matrix_of_values[,2] < 0.25)] = 1
Expand Down Expand Up @@ -281,7 +283,7 @@ FindRobustMeanAndStandardDeviation <- function(x, genders, chrom, modeEstimated
}
}
}

if (length(x) > 30 ){
mu = median(x)
} else {
Expand All @@ -292,7 +294,7 @@ FindRobustMeanAndStandardDeviation <- function(x, genders, chrom, modeEstimated
}
return(matrix(c(mu, Qn(x), "Qn"), nrow=1))
}

if (chrom == "chrX") {
if (length(which(genders == "F")) <= 5) {
x = x[which(genders == "M")]
Expand All @@ -310,7 +312,7 @@ FindRobustMeanAndStandardDeviation <- function(x, genders, chrom, modeEstimated
}
}


forMeanX = x
return(matrix(c(median(forMeanX), Qn(forMeanX), "Qn"), nrow=1))

Expand All @@ -329,10 +331,10 @@ FindRobustMeanAndStandardDeviation <- function(x, genders, chrom, modeEstimated
} else {
mu = modeEstimated
}
if (length(x) < 100) {
density_of_x <- density(x, bw=bandwidth, kernel="gaussian")
}

if (length(x) < 100) {
density_of_x <- density(x, bw=bandwidth, kernel="gaussian")
}
closest_to_mu <- which.min(abs(density_of_x$x - mu))
which_are_bigger <- which(density_of_x$y > density_of_x$y[closest_to_mu])
density_of_x <- as.data.frame(cbind(density_of_x$x, density_of_x$y))
Expand Down Expand Up @@ -385,15 +387,15 @@ FindRobustMeanAndStandardDeviation <- function(x, genders, chrom, modeEstimated

if (upper_bound_differs & lower_bound_differs) {
dtnorm0 <- function(X, mean, sd, log = TRUE) {msm::dtnorm(X, mean, sd, lower=lower_bound, upper=upper_bound,
log=T)}
log=T)}
} else if (!upper_bound_differs & !lower_bound_differs) {
return(matrix(c(median(x), Qn(x), "Qn"), nrow=1))
} else if (upper_bound_differs) {
dtnorm0 <- function(X, mean, sd, log = FALSE) {msm::dtnorm(X, mean, sd, lower = -10**10, upper=upper_bound,
log=T)}
log=T)}
} else {
dtnorm0 <- function(X, mean, sd, log = FALSE) {msm::dtnorm(X, mean, sd, lower=lower_bound, upper=10**10,
log=T)}
log=T)}
}
QnX <- Qn(x)
data = x[which(x >= lower_bound & x <= upper_bound)]
Expand Down Expand Up @@ -429,10 +431,10 @@ returnClustering <- function(minNumOfElemsInCluster) {
minNumOfElemsInCluster = as.numeric(minNumOfElemsInCluster)
set.seed(100)
clustering = rep(0, ncol(normal))

numOfElementsInCluster = (minNumOfElemsInCluster)
outliersFromClustering = rep(FALSE, ncol(normal))

coverageForClustering = sqrt(normal[which(!bedFile[,1] %in% c("chrX","chrY")),])
sdsOfRegions <- apply(coverageForClustering, 1, mad)
potentiallyPolymorphicRegions <- which(sdsOfRegions > quantile(sdsOfRegions, 0.95) | sdsOfRegions < quantile(sdsOfRegions, 0.05))
Expand All @@ -445,7 +447,7 @@ returnClustering <- function(minNumOfElemsInCluster) {
coverageForClustering = coverageForClustering[sample(1:nrow(coverageForClustering), 100000),]
}


if (!is.null(opt$triosFile)) {
samplesActuallyPlayingRole = c()
for (trioRow in 1:nrow(trios)) {
Expand Down Expand Up @@ -480,18 +482,21 @@ returnClustering <- function(minNumOfElemsInCluster) {
print(paste("You ask to clusterise intro clusters of size", minNumOfElemsInCluster, "but size of the cohort is", ncol(normal), "which is not enough. We continue without clustering."))

setwd(opt$out)
png(filename="clusteringSolution.png", type = "cairo", width=1024, height=1024)
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="Isometric MDS", type="n")
text(x, y, labels = colnames(normal), cex=.7, col=clustering + 1)
dev.off()

if (!opt$noPlot) {
png(filename="clusteringSolution.png", type = "cairo", width=1024, height=1024)
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="Isometric MDS", type="n")
text(x, y, labels = colnames(normal), cex=.7, col=clustering + 1)
dev.off()
}
return(list(clustering, outliersFromClustering))
}

coordsAfterMDS = t((rbind(x, y)))
distMatrix = dist(t((rbind(x, y))))
hc <- hclust(distMatrix, method="ward.D")


numOfClusters = 1
percentage = c()
Expand All @@ -502,7 +507,9 @@ returnClustering <- function(minNumOfElemsInCluster) {

#memb <- km$cluster
memb=cutree(hc, k=numOfClusters)
plot(coordsAfterMDS, col=memb+1, main=numOfClusters)
if (!opt$noPlot) {
plot(coordsAfterMDS, col=memb+1, main=numOfClusters)
}
numOfObservationsInClusters <- table(memb)
centers = matrix(0, nrow=0, ncol=2)
sds = matrix(0, nrow=0, ncol=2)
Expand Down Expand Up @@ -535,7 +542,7 @@ returnClustering <- function(minNumOfElemsInCluster) {
}
}



memb=cutree(hc, k=numOfClusters - 1)
numOfObservationsInClusters <- table(memb)
Expand Down Expand Up @@ -565,13 +572,16 @@ returnClustering <- function(minNumOfElemsInCluster) {
# clustering[-samplesActuallyPlayingRole] = -1
#}


setwd(opt$out)
png(filename="clusteringSolution.png", type = "cairo", width=1024, height=1024)
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="Isometric MDS", type="n")
text(x, y, labels = row.names(distMatrix), cex=.7, col=clustering + 1)
dev.off()
if (!opt$noPlot) {

png(filename="clusteringSolution.png", type = "cairo", width=1024, height=1024)
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="Isometric MDS", type="n")
text(x, y, labels = row.names(distMatrix), cex=.7, col=clustering + 1)
dev.off()
}
setwd(opt$folderWithScript)

return(list(clustering, outliersFromClustering))
Expand Down Expand Up @@ -618,12 +628,14 @@ returnTreeForCorrelation <- function(coverage.normalised.local, sdsOfGermlineSam

trainingDataset <- as.data.frame(cbind(covariancesClose, distnacesClose, sumOfLengths, minLength, maxLength))
if (length(which(distnacesClose > 1000 )) > 0)
trainingDataset = trainingDataset[-which(distnacesClose > 1000),]
trainingDataset = trainingDataset[-which(distnacesClose > 1000),]
if (nrow(trainingDataset) > 100 & nrow(unique(trainingDataset)) > 10) {
fit <- ctree(covariancesClose ~ (distnacesClose) + (sumOfLengths) + minLength + maxLength, data=trainingDataset, control=ctree_control(mincriterion = 0.99))
png(filename="treeOnCorrelationOfCoverage.png", type = "cairo", width=4000, height=1800)
plot(fit)
dev.off()
fit <- ctree(covariancesClose ~ (distnacesClose) + (sumOfLengths) + minLength + maxLength, data=trainingDataset, control=ctree_control(mincriterion = 0.99))
if (!opt$noPlot) {
png(filename="treeOnCorrelationOfCoverage.png", type = "cairo", width=4000, height=1800)
plot(fit)
dev.off()
}
} else {
return(NULL)
}
Expand Down Expand Up @@ -886,11 +898,14 @@ returnClustering2 <- function(minNumOfElemsInCluster) {
print(paste("You ask to clusterise intro clusters of size", minNumOfElemsInCluster, "but size of the cohort is", ncol(normal), "which is not enough. We continue without clustering."))

setwd(opt$out)
png(filename="clusteringSolution.png", type = "cairo", width=1024, height=1024)
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="UMAP", type="n")
text(x, y, labels = colnames(normal), cex=.7, col=clustering + 1)
dev.off()
if (!opt$noPlot) {

png(filename="clusteringSolution.png", type = "cairo", width=1024, height=1024)
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="UMAP", type="n")
text(x, y, labels = colnames(normal), cex=.7, col=clustering + 1)
dev.off()
}
return(list(clustering, outliersFromClustering))
}

Expand Down Expand Up @@ -923,7 +938,7 @@ returnClustering2 <- function(minNumOfElemsInCluster) {
minDist = distanceToSignCluster
closestCluster = signCluster
}
}
}
clustering[elem] = closestCluster
}
}
Expand All @@ -935,11 +950,13 @@ returnClustering2 <- function(minNumOfElemsInCluster) {
palleteToPlot = rainbow(max(clustering))
colsToPlot = sapply(1:length(clustering), function(i){palleteToPlot[clustering[i]]})
setwd(opt$out)
png(filename="clusteringSolution.png", type = "cairo", width=1024, height=1024)
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="UMAP", type="n")
text(x, y, labels = row.names(distMatrix), cex=.7, col=colsToPlot)
dev.off()
if (!opt$noPlot) {
png(filename="clusteringSolution.png", type = "cairo", width=1024, height=1024)
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
main="UMAP", type="n")
text(x, y, labels = row.names(distMatrix), cex=.7, col=colsToPlot)
dev.off()
}
setwd(opt$folderWithScript)

return(list(clustering, outliersFromClustering))
Expand Down
29 changes: 16 additions & 13 deletions somatic/bafSegmentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,18 +253,21 @@ determineAllowedChroms <- function(healthySample, tumorSample, healthySampleName
subDir = paste0(tumorSampleName, "_", healthySampleName)
dir.create(file.path(folderBAF, "result", subDir))
setwd(file.path(folderBAF, "result", subDir))
png(paste0(tumorSampleName, "_", healthySampleName, ".png"), type = "cairo", width=1400, height=600)
op <- par(mar=c(11,4,4,2))
x <- barplot(evaluated, col=colVec, main=paste(tumorSampleName, healthySampleName, "expected proportion of BAF with p < 0.05:", round(pvalueShift, digits=3)), ylim=c(0,1), xaxt="n")
text(x-2.5, par("usr")[3] - 0.15, labels = plotLabels, srt = 45, pos = 1, xpd = TRUE)
par(mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0)
abline(h=0.05, lwd=2)
abline(h=0.1, lwd=2, col="red")
dev.off()

png("overdispersion.png", type = "cairo", width=1024, height=1024)
plot(overdispersionFactorsNornm ~ overdispersionFactorsTum, main="Dispersion over Binomial")
dev.off()
if (!opt$noPlot) {
png(paste0(tumorSampleName, "_", healthySampleName, ".png"), type = "cairo", width=1400, height=600)
op <- par(mar=c(11,4,4,2))
x <- barplot(evaluated, col=colVec, main=paste(tumorSampleName, healthySampleName, "expected proportion of BAF with p < 0.05:", round(pvalueShift, digits=3)), ylim=c(0,1), xaxt="n")
text(x-2.5, par("usr")[3] - 0.15, labels = plotLabels, srt = 45, pos = 1, xpd = TRUE)
par(mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0)
abline(h=0.05, lwd=2)
abline(h=0.1, lwd=2, col="red")
dev.off()

png("overdispersion.png", type = "cairo", width=1024, height=1024)
plot(overdispersionFactorsNornm ~ overdispersionFactorsTum, main="Dispersion over Binomial")
dev.off()
}

trackFilename = paste0(tumorSampleName, "_", healthySampleName, ".igv")
print(getwd())
Expand Down Expand Up @@ -373,7 +376,7 @@ predictWholeGenomeEvent <- function(healthySampleBAF, tumorSampleBAF, matrixOfCo
positiveValues <- values[moreThan0]
positiveValues[which(positiveValues < -minAbs)] = abs(positiveValues[which(positiveValues < -minAbs)] )
mod <- densityMclust(positiveValues, modelNames=c("E"))
plot(mod, what="density", data=positiveValues, breaks=50,xlim=c(-minAbs,0.5))
#plot(mod, what="density", data=positiveValues, breaks=50,xlim=c(-minAbs,0.5))
mergedData <- (mergeClustersCloseToEachOther(mod$parameters$mean, mod$parameters$pro))
if (ncol(mergedData) > 1) {
mergedData <- mergedData[,order(mergedData[1,])]
Expand Down Expand Up @@ -405,7 +408,7 @@ predictWholeGenomeEvent <- function(healthySampleBAF, tumorSampleBAF, matrixOfCo
positiveValues <- values[lessThan0]
positiveValues[which(positiveValues < -minAbs)] = abs(positiveValues[which(positiveValues < -minAbs)] )
mod <- densityMclust(positiveValues, modelNames=c("E"))
plot(mod, what="density", data=positiveValues, breaks=50,xlim=c(-minAbs,0.5))
#plot(mod, what="density", data=positiveValues, breaks=50,xlim=c(-minAbs,0.5))
mergedData <- (mergeClustersCloseToEachOther(mod$parameters$mean, mod$parameters$pro))
if (ncol(mergedData) > 1) {
mergedData <- mergedData[,order(mergedData[1,])]
Expand Down
Loading

0 comments on commit 4c840af

Please sign in to comment.