Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Code polishing for Compress Annotated VCFs #602

Merged
merged 3 commits into from
Jun 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 39 additions & 30 deletions annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,17 @@ vcfNotToAnnotate = Channel.create()
if (annotateVCF == []) {
Channel.empty().mix(
Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz")
.flatten().map{vcf -> ['none', 'haplotypecaller', vcf, null]},
.flatten().map{vcf -> ['haplotypecaller', vcf]},
Channel.fromPath("${directoryMap.manta}/*SV.vcf.gz")
.flatten().map{vcf -> ['none', 'manta', vcf, null]},
.flatten().map{vcf -> ['manta', vcf]},
Channel.fromPath("${directoryMap.mutect1}/*.vcf.gz")
.flatten().map{vcf -> ['none', 'mutect1', vcf, null]},
.flatten().map{vcf -> ['mutect1', vcf]},
Channel.fromPath("${directoryMap.mutect2}/*.vcf.gz")
.flatten().map{vcf -> ['none', 'mutect2', vcf, null]},
.flatten().map{vcf -> ['mutect2', vcf]},
Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")
.flatten().map{vcf -> ['none', 'strelka', vcf, null]},
.flatten().map{vcf -> ['strelka', vcf]},
Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")
.flatten().map{vcf -> ['none', 'strelkabp', vcf, null]}
.flatten().map{vcf -> ['strelkabp', vcf]}
).choice(vcfToAnnotate, vcfNotToAnnotate) {
annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
}
Expand All @@ -94,25 +94,30 @@ if (annotateVCF == []) {
annotateVCF.each{ list += ",${it}" }
list = list.substring(1)
if (StringUtils.countMatches("${list}", ",") == 0) vcfToAnnotate = Channel.fromPath("${list}")
.map{vcf -> ['none', 'userspecified', vcf, null]}
.map{vcf -> ['userspecified', vcf]}
else vcfToAnnotate = Channel.fromPath("{$list}")
.map{vcf -> ['none', 'userspecified', vcf, null]}
.map{vcf -> ['userspecified', vcf]}
} else exit 1, "specify only tools or files to annotate, not both"

vcfNotToAnnotate.close()

(vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)

vcfForVep = vcfForVep.map {
variantCaller, vcf ->
["vep", variantCaller, vcf, null]
}

process RunBcftoolsStats {
tag {vcf}

publishDir directoryMap.bcftoolsStats, mode: 'link'

input:
set annotator, variantCaller, file(vcf), file(idx) from vcfForBCFtools
set variantCaller, file(vcf) from vcfForBCFtools

output:
file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport
file ("*.bcf.tools.stats.out") into bcfReport

when: !params.noReports

Expand All @@ -130,10 +135,10 @@ process RunVcftools {
publishDir directoryMap.vcftools, mode: 'link'

input:
set annotator, variantCaller, file(vcf), file(idx) from vcfForVCFtools
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport
file ("${vcf.simpleName}.*") into vcfReport

when: !params.noReports

Expand All @@ -146,21 +151,21 @@ if (params.verbose) vcfReport = vcfReport.view {
}

process RunSnpeff {
tag {vcf}
tag {"${variantCaller} - ${vcf}"}

publishDir params.outDir, mode: 'link', saveAs: {
if (it == "${vcf.baseName}.snpEff.csv") "${directoryMap.snpeffReports}/${it}"
else if (it == "${vcf.baseName}.snpEff.ann.vcf") null
if (it == "${vcf.simpleName}_snpEff.csv") "${directoryMap.snpeffReports}/${it}"
else if (it == "${vcf.simpleName}_snpEff.ann.vcf") null
else "${directoryMap.snpeff}/${it}"
}

input:
set annotator, variantCaller, file(vcf), file(idx) from vcfForSnpeff
set variantCaller, file(vcf) from vcfForSnpeff
val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb)

output:
set file("${vcf.baseName}.snpEff.genes.txt"), file("${vcf.baseName}.snpEff.csv"), file("${vcf.baseName}.snpEff.summary.html") into snpeffOutput
set val("snpeff"), variantCaller, file("${vcf.baseName}.snpEff.ann.vcf") into snpeffVCF
set file("${vcf.simpleName}_snpEff.genes.txt"), file("${vcf.simpleName}_snpEff.csv"), file("${vcf.simpleName}_snpEff.summary.html") into snpeffOutput
set val("snpeff"), variantCaller, file("${vcf.simpleName}_snpEff.ann.vcf") into snpeffVCF

when: 'snpeff' in tools || 'merge' in tools

Expand All @@ -169,14 +174,14 @@ process RunSnpeff {
java -Xmx${task.memory.toGiga()}g \
-jar \$SNPEFF_HOME/snpEff.jar \
${snpeffDb} \
-csvStats ${vcf.baseName}.snpEff.csv \
-csvStats ${vcf.simpleName}_snpEff.csv \
-nodownload \
-canon \
-v \
${vcf} \
> ${vcf.baseName}.snpEff.ann.vcf
> ${vcf.simpleName}_snpEff.ann.vcf

mv snpEff_summary.html ${vcf.baseName}.snpEff.summary.html
mv snpEff_summary.html ${vcf.simpleName}_snpEff.summary.html
"""
}

Expand All @@ -194,41 +199,44 @@ if('merge' in tools) {
vcfCompressed = Channel.create()

vcfForVep = Channel.empty().mix(
vcfCompressed.until({it[0]!="snpeff"})
vcfCompressed.until({ it[0]=="merge" })
)
}

process RunVEP {
tag {vcf}
tag {"${variantCaller} - ${vcf}"}

publishDir params.outDir, mode: 'link', saveAs: {
if (it == "${vcf.baseName}.vep.summary.html") "${directoryMap.vep}/${it}"
if (it == "${vcf.simpleName}_VEP.summary.html") "${directoryMap.vep}/${it}"
else null
}

input:
set annotator, variantCaller, file(vcf), file(idx) from vcfForVep

output:
set val("vep"), variantCaller, file("${vcf.baseName}.vep.ann.vcf") into vepVCF
file("${vcf.baseName}.vep.summary.html") into vepReport
set finalannotator, variantCaller, file("${vcf.simpleName}_VEP.ann.vcf") into vepVCF
file("${vcf.simpleName}_VEP.summary.html") into vepReport

when: 'vep' in tools || 'merge' in tools

script:
finalannotator = annotator == "snpeff" ? 'merge' : 'vep'
genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
"""
vep \
-i ${vcf} \
-o ${vcf.baseName}.vep.ann.vcf \
--stats_file ${vcf.baseName}.vep.summary.html \
-o ${vcf.simpleName}_VEP.ann.vcf \
--assembly ${genome} \
--cache \
--database \
--everything \
--filter_common \
--fork ${task.cpus} \
--format vcf \
--offline \
--per_gene \
--fork ${task.cpus} \
--stats_file ${vcf.simpleName}_VEP.summary.html \
--total_length \
--vcf
"""
Expand All @@ -244,7 +252,7 @@ vcfToCompress = snpeffVCF.mix(vepVCF)
process CompressVCF {
tag {"${annotator} - ${vcf}"}

publishDir "${directoryMap."$annotator"}", mode: 'link'
publishDir "${directoryMap."$finalannotator"}", mode: 'link'

input:
set annotator, variantCaller, file(vcf) from vcfToCompress
Expand All @@ -253,6 +261,7 @@ process CompressVCF {
set annotator, variantCaller, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (vcfCompressed, vcfCompressedoutput)

script:
finalannotator = annotator == "merge" ? "vep" : annotator
"""
bgzip < ${vcf} > ${vcf}.gz
tabix ${vcf}.gz
Expand Down
4 changes: 2 additions & 2 deletions germlineVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ process RunBcftoolsStats {
set variantCaller, file(vcf) from vcfForBCFtools

output:
file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport
file ("${vcf.simpleName}.bcf.tools.stats.out") into bcfReport

when: !params.noReports

Expand All @@ -583,7 +583,7 @@ process RunVcftools {
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport
file ("${vcf.simpleName}.*") into vcfReport

when: !params.noReports

Expand Down
10 changes: 5 additions & 5 deletions lib/QC.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class QC {
// Run bcftools on vcf file
static def bcftools(vcf) {
"""
bcftools stats ${vcf} > ${vcf.baseName}.bcf.tools.stats.out
bcftools stats ${vcf} > ${vcf.simpleName}.bcf.tools.stats.out
"""
}

Expand All @@ -30,22 +30,22 @@ class QC {
vcftools \
--gzvcf ${vcf} \
--relatedness2 \
--out ${vcf.baseName}
--out ${vcf.simpleName}

vcftools \
--gzvcf ${vcf} \
--TsTv-by-count \
--out ${vcf.baseName}
--out ${vcf.simpleName}

vcftools \
--gzvcf ${vcf} \
--TsTv-by-qual \
--out ${vcf.baseName}
--out ${vcf.simpleName}

vcftools \
--gzvcf ${vcf} \
--FILTER-summary \
--out ${vcf.baseName}
--out ${vcf.simpleName}
"""
}

Expand Down
4 changes: 2 additions & 2 deletions somaticVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@ process RunBcftoolsStats {
set variantCaller, file(vcf) from vcfForBCFtools

output:
file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport
file ("${vcf.simpleName}.bcf.tools.stats.out") into bcfReport

when: !params.noReports

Expand All @@ -834,7 +834,7 @@ process RunVcftools {
set variantCaller, file(vcf) from vcfForVCFtools

output:
file ("${vcf.baseName}.*") into vcfReport
file ("${vcf.simpleName}.*") into vcfReport

when: !params.noReports

Expand Down