From 546c2ac8e063f041fa9b657f110850ebe36570ff Mon Sep 17 00:00:00 2001 From: Rike Date: Fri, 11 Mar 2022 11:24:33 +0100 Subject: [PATCH 01/52] Install controlfreec --- modules.json | 3 + modules/nf-core/modules/controlfreec/main.nf | 158 +++++++++++++++ modules/nf-core/modules/controlfreec/meta.yml | 183 ++++++++++++++++++ 3 files changed, 344 insertions(+) create mode 100644 modules/nf-core/modules/controlfreec/main.nf create mode 100644 modules/nf-core/modules/controlfreec/meta.yml diff --git a/modules.json b/modules.json index beabc2378c..ad474b8d54 100644 --- a/modules.json +++ b/modules.json @@ -24,6 +24,9 @@ "cnvkit/batch": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "controlfreec": { + "git_sha": "c189835b1bb444e5ee87416fdbea66e2c2ba365e" + }, "custom/dumpsoftwareversions": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, diff --git a/modules/nf-core/modules/controlfreec/main.nf b/modules/nf-core/modules/controlfreec/main.nf new file mode 100644 index 0000000000..21084f641b --- /dev/null +++ b/modules/nf-core/modules/controlfreec/main.nf @@ -0,0 +1,158 @@ +process CONTROLFREEC { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::control-freec=11.6" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/control-freec:11.6--h1b792b2_1': + 'quay.io/biocontainers/control-freec:11.6--h1b792b2_1' }" + + input: + tuple val(meta), path(mpileup_normal), path(mpileup_tumor), path(cpn_normal), path(cpn_tumor), path(minipileup_normal), path(minipileup_tumor) + path fasta + path fai + path snp_position + path known_snps + path known_snps_tbi + path chr_directory + path mappability + path target_bed + path gccontent_profile + + output: + tuple val(meta), path("*_ratio.BedGraph") , emit: bedgraph, optional: true + tuple val(meta), path("*_control.cpn") , emit: control_cpn + tuple val(meta), path("*_sample.cpn") , emit: sample_cpn + tuple val(meta), path("GC_profile.*.cpn") , emit: gcprofile_cpn, optional:true + tuple val(meta), path("*_BAF.txt") , emit: BAF + tuple val(meta), path("*_CNVs") , emit: CNV + tuple val(meta), path("*_info.txt") , emit: info + tuple val(meta), path("*_ratio.txt") , emit: ratio + tuple val(meta), path("config.txt") , emit: config + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + //"General" configurations + def bedgraphoutput = task.ext.args?["general"]?["bedgraphoutput"] ? "BedGraphOutput = ${task.ext.args["general"]["bedgraphoutput"]}" : "" + def chr_files = chr_directory ? "chrFiles =\${PWD}/${chr_directory}" : "" + def chr_length = fai ? "chrLenFile = \${PWD}/${fai}" : "" + def breakpointthreshold = task.ext.args?["general"]?["breakpointthreshold"] ? "breakPointThreshold = ${task.ext.args["general"]["breakpointthreshold"]}" : "" + def breakpointtype = task.ext.args?["general"]?["breakpointtype"] ? "breakPointType = ${task.ext.args["general"]["breakpointtype"]}" : "" + def coefficientofvariation = task.ext.args?["general"]?["coefficient"] ? "coefficientOfVariation = ${task.ext.args["general"]["coefficientofvariation"]}" : "" + def contamination = task.ext.args?["general"]?["contamination"] ? "contamination = ${task.ext.args["general"]["contamination"]}" : "" + def contaminationadjustment = task.ext.args?["general"]?["contaminationadjustment"] ? "contaminationAdjustment = ${task.ext.args["general"]["contaminationadjustment"]}" : "" + def degree = task.ext.args?["general"]?["degree"] ? "degree = ${task.ext.args["general"]["degree"]}" : "" + def forcegccontentnormalization = task.ext.args?["general"]?["forcegccontentnormalization"] ? "forceGCcontentNormalization = ${task.ext.args["general"]["forcegccontentnormalization"]}" : "" + def gccontentprofile = gccontent_profile ? "GCcontentProfile = ${gccontent_profile}" : "" + def mappability = mappability ? "gemMappabilityFile = \${PWD}/${mappability}" : "" + def intercept = task.ext.args?["general"]?["intercept"] ? "intercept = ${task.ext.args["general"]["intercept"]}" : "" + def mincnalength = task.ext.args?["general"]?["mincnalength"] ? "minCNAlength = ${task.ext.args["general"]["mincnalength"]}" : "" + def minmappabilityperwindow = task.ext.args?["general"]?["minmappabilityperwindow"] ? "minMappabilityPerWindow = ${task.ext.args["general"]["minmappabilityperwindow"]}" : "" + def minexpectedgc = task.ext.args?["general"]?["minexpectedgc"] ? "minExpectedGC = ${task.ext.args["general"]["minexpectedgc"]}" : "" + def maxexpectedgc = task.ext.args?["general"]?["maxexpectedgc"] ? "maxExpectedGC = ${task.ext.args["general"]["maxexpectedgc"]}" : "" + def minimalsubclonepresence = task.ext.args?["general"]?["minimalsubclonepresence"] ? "minimalSubclonePresence = ${task.ext.args["general"]["minimalsubclonepresence"]}" : "" + def noisydata = task.ext.args?["general"]?["noisydata"] ? "noisyData = ${task.ext.args["general"]["noisydata"]}" : "" + def output = task.ext.prefix ? "outputDir = \${PWD}/${task.ext.prefix}" : "" + def ploidy = task.ext.args?["general"]?["ploidy"] ? "ploidy = ${task.ext.args["general"]["ploidy"]}" : "" + def printNA = task.ext.args?["general"]?["printNA"] ? "printNA = ${task.ext.args["general"]["printNA"]}" : "" + def readcountthreshold = task.ext.args?["general"]?["readcountthreshold"] ? "readCountThreshold = ${task.ext.args["general"]["readcountthreshold"]}" : "" + def sex = task.ext.args?["general"]?["sex"] ? "sex = ${task.ext.args["general"]["sex"]}" : "" + def step = task.ext.args?["general"]?["step"] ? "step = ${task.ext.args["general"]["step"]}" : "" + def telocentromeric = task.ext.args?["general"]?["telocentromeric"] ? "telocentromeric = ${task.ext.args["general"]["telocentromeric"]} " : "" + def uniquematch = task.ext.args?["general"]?["uniquematch"] ? "uniqueMatch = ${task.ext.args["general"]["uniquematch"]}" : "" + def window = task.ext.args?["general"]?["window"] ? "window = ${task.ext.args["general"]["window"]}" : "" + + //"Control" configurations + def matefile_normal = mpileup_normal ? "mateFile = \${PWD}/${mpileup_normal}" : "" + def matecopynumberfile_normal = cpn_normal ? "mateCopyNumberFile = \${PWD}/${cpn_normal}" : "" + def minipileup_normal = minipileup_normal ? "miniPileup = \${PWD}/${minipileup_normal}" : "" + def inputformat_normal = task.ext.args?["control"]?["inputformat"] ? "inputFormat = ${task.ext.args["control"]["inputformat"]}" : "" + def mateorientation_normal = task.ext.args?["control"]?["mateorientation"] ? "mateOrientation = ${task.ext.args["control"]["mateorientation"]}" : "" + + //"Sample" configuration + def matefile_tumor = mpileup_tumor ? "mateFile = \${PWD}/${mpileup_tumor}" : "" + def matecopynumberfile_tumor = cpn_tumor ? "mateCopyNumberFile = \${PWD}/${cpn_tumor}" : "" + def minipileup_tumor = minipileup_tumor ? "miniPileup = \${PWD}/${minipileup_tumor}" : "" + def inputformat_tumor = task.ext.args?["sample"]?["inputformat"] ? "inputFormat = ${task.ext.args["sample"]["inputformat"]}" : "" + def mateorientation_tumor = task.ext.args?["sample"]?["mateorientation"] ? "mateOrientation = ${task.ext.args["sample"]["mateorientation"]}" : "" + + //"BAF" configuration + def makepileup = snp_position ? "makePileup = \${PWD}/${snp_position}" : "" + def fastafile = fasta ? "fastaFile = \${PWD}/${fasta}" : "" + def minimalcoverageperposition = task.ext.args?["BAF"]?["minimalcoverageperposition"] ? "minimalCoveragePerPosition = ${task.ext.args["BAF"]["minimalcoverageperposition"]}" : "" + def minimalqualityperposition = task.ext.args?["BAF"]?["minimalqualityperposition"] ? "minimalQualityPerPosition = ${task.ext.args["BAF"]["minimalqualityperposition"]}" : "" + def shiftinquality = task.ext.args?["BAF"]?["shiftinquality"] ? "shiftInQuality = ${task.ext.args["BAF"]["shiftinquality"]}" : "" + def snpfile = known_snps ? "SNPfile = \$PWD/${known_snps}" : "" + + //"Target" configuration + def target_bed = target_bed ? "captureRegions = ${target_bed}" : "" + """ + touch config.txt + + echo "[general]" >> config.txt + echo ${bedgraphoutput} >> config.txt + echo ${breakpointthreshold} >> config.txt + echo ${breakpointtype} >> config.txt + echo ${chr_files} >> config.txt + echo ${chr_length} >> config.txt + echo ${coefficientofvariation} >> config.txt + echo ${contamination} >> config.txt + echo ${contaminationadjustment} >> config.txt + echo ${degree} >> config.txt + echo ${forcegccontentnormalization} >> config.txt + echo ${gccontentprofile} >> config.txt + echo ${mappability} >> config.txt + echo ${intercept} >> config.txt + echo ${mincnalength} >> config.txt + echo ${minmappabilityperwindow} >> config.txt + echo ${minexpectedgc} >> config.txt + echo ${maxexpectedgc} >> config.txt + echo ${minimalsubclonepresence} >> config.txt + echo "maxThreads = ${task.cpus}" >> config.txt + echo ${noisydata} >> config.txt + echo ${output} >> config.txt + echo ${ploidy} >> config.txt + echo ${printNA} >> config.txt + echo ${readcountthreshold} >> config.txt + echo ${sex} >> config.txt + echo ${step} >> config.txt + echo ${telocentromeric} >> config.txt + echo ${uniquematch} >> config.txt + echo ${window} >> config.txt + + echo "[control]" >> config.txt + echo ${matefile_normal} >> config.txt + echo ${matecopynumberfile_normal} >> config.txt + echo ${minipileup_normal} >> config.txt + echo ${inputformat_normal} >> config.txt + echo ${mateorientation_normal} >> config.txt + + echo "[sample]" >> config.txt + echo ${matefile_tumor} >> config.txt + echo ${matecopynumberfile_tumor} >> config.txt + echo ${minipileup_tumor} >> config.txt + echo ${inputformat_tumor} >> config.txt + echo ${mateorientation_tumor} >> config.txt + + echo "[BAF]" >> config.txt + echo ${makepileup} >> config.txt + echo ${fastafile} >> config.txt + echo ${minimalcoverageperposition} >> config.txt + echo ${minimalqualityperposition} >> config.txt + echo ${shiftinquality} >> config.txt + echo ${snpfile} >> config.txt + + echo "[target]" >> config.txt + echo ${target_bed} >> config.txt + + freec -conf config.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/controlfreec/meta.yml b/modules/nf-core/modules/controlfreec/meta.yml new file mode 100644 index 0000000000..4d1e8674bd --- /dev/null +++ b/modules/nf-core/modules/controlfreec/meta.yml @@ -0,0 +1,183 @@ +name: controlfreec +description: Copy number and genotype annotation from whole genome and whole exome sequencing data +keywords: + - cna + - cnv + - somatic + - single + - tumor-only +tools: + - controlfreec: + description: Copy number and genotype annotation from whole genome and whole exome sequencing data. + homepage: http://boevalab.inf.ethz.ch/FREEC + documentation: http://boevalab.inf.ethz.ch/FREEC/tutorial.html + tool_dev_url: https://github.com/BoevaLab/FREEC/ + doi: "10.1093/bioinformatics/btq635" + licence: ['GPL >=2'] + +input: + - args: + type: map + description: | + Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. + parameters can be removed from the map, if they are not set. All value must be surrounded by quotes, meta map parameters can be set with, i.e. sex = meta.sex: + For default values, please check the documentation above. + + ``` + { + [ + "general" :[ + "bedgraphoutput": , + "breakpointthreshold": , + "breakpointtype": , + "coefficientofvariation": , + "contamination": , + "contaminationadjustment": , + "degree": , + "forcegccontentnormalization": , + "gccontentprofile": , + "intercept": , + "mincnalength": , + "minmappabilityperwindow": , + "minexpectedgc": , + "maxexpectedgc": , + "minimalsubclonepresence": , + "noisydata": , + "ploidy": , + "printNA": , + "readcountthreshold": , + "sex": , + "step": , + "telocentromeric": , + "uniquematch": , + "window": + ], + "control":[ + "inputformat": , + "mateorientation": , + ], + "sample":[ + "inputformat": , + "mateorientation": , + ], + "BAF":[ + "minimalcoverageperposition": , + "minimalqualityperposition": , + "shiftinquality": + ] + ] + } + ``` + + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - mateFile_normal: + type: file + description: File with mapped reads + pattern: "*.{sam,bam,pileup(.gz),bowtie(.gz),eland(.gz),arachne(.gz),psl(.gz),bed(.gz)}" + - mateFile_tumor: + type: file + description: File with mapped reads + pattern: "*.{sam,bam,pileup(.gz),bowtie(.gz),eland(.gz),arachne(.gz),psl(.gz),bed(.gz)}" + - cpn_normal: + type: file + description: Raw copy number profiles (optional) + pattern: "*.cpn" + - cpn_tumor: + type: file + description: Raw copy number profiles (optional) + pattern: "*.cpn" + - minipileup_normal: + type: file + description: miniPileup file from previous run (optional) + pattern: "*.pileup" + - minipileup_tumor: + type: file + description: miniPileup file from previous run (optional) + pattern: "*.pileup" + - fasta: + type: file + description: Reference file (optional; required if args 'makePileup' is set) + pattern: "*.{fasta,fna,fa}" + - fai: + type: file + description: Fasta index + pattern: "*.fai" + - snp_position: + type: file + description: + pattern: "*.{}" + - known_snps: + type: file + description: File with known SNPs + pattern: "*.{vcf,vcf.gz}" + - known_snps_tbi: + type: file + description: Index of known_snps + pattern: "*.tbi" + - chr_directory: + type: file + description: Path to directory with chromosome fasta files (optional, required if gccontentprofile is not provided) + pattern: "*/" + - mappability: + type: file + description: Contains information of mappable positions (optional) + pattern: "*.gem" + - target_bed: + type: file + description: Sorted bed file containing capture regions (optional) + pattern: "*.bed" + + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bedgraph: + type: file + description: Bedgraph format for the UCSC genome browser + pattern: ".bedgraph" + - control_cpn: + type: file + description: files with raw copy number profiles + pattern: "*_control.cpn" + - sample_cpn: + type: file + description: files with raw copy number profiles + pattern: "*_sample.cpn" + - gcprofile_cpn: + type: file + description: file with GC-content profile. + pattern: "GC_profile.*.cpn" + - BAF: + type: file + description: file B-allele frequencies for each possibly heterozygous SNP position + pattern: "*_BAF.txt" + - CNV: + type: file + description: file with coordinates of predicted copy number alterations. + pattern: "*_CNVs" + - info: + type: file + description: parsable file with information about FREEC run + pattern: "*_info.txt" + - ratio: + type: file + description: file with ratios and predicted copy number alterations for each window + pattern: "*_ratio.txt" + - config: + type: file + description: Config file used to run Control-FREEC + pattern: "config.txt" + +authors: + - "@FriederikeHanssen" From 00267529e8fae239ab7e83c26755c2a1f5d4ff02 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 15 Mar 2022 12:14:40 +0100 Subject: [PATCH 02/52] install ascat --- modules.json | 3 + modules/nf-core/modules/ascat/main.nf | 155 +++++++++++++++++++++++++ modules/nf-core/modules/ascat/meta.yml | 92 +++++++++++++++ 3 files changed, 250 insertions(+) create mode 100644 modules/nf-core/modules/ascat/main.nf create mode 100644 modules/nf-core/modules/ascat/meta.yml diff --git a/modules.json b/modules.json index ad474b8d54..8be45191aa 100644 --- a/modules.json +++ b/modules.json @@ -3,6 +3,9 @@ "homePage": "https://github.com/nf-core/sarek", "repos": { "nf-core/modules": { + "ascat": { + "git_sha": "d6244b42f596fa26d2ecba4ce862755821ed9da8" + }, "bcftools/stats": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, diff --git a/modules/nf-core/modules/ascat/main.nf b/modules/nf-core/modules/ascat/main.nf new file mode 100644 index 0000000000..1d2bd96fe1 --- /dev/null +++ b/modules/nf-core/modules/ascat/main.nf @@ -0,0 +1,155 @@ +process ASCAT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::ascat=3.0.0 bioconda::cancerit-allelecount-4.3.0": null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:dfe5aaa885de434adb2b490b68972c5840c6d761-0': + 'quay.io/biocontainers/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:dfe5aaa885de434adb2b490b68972c5840c6d761-0' }" + + input: + tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) + path(allele_files) + path(loci_files) + + output: + tuple val(meta), path("*png"), emit: png + tuple val(meta), path("*cnvs.txt"), emit: cnvs + tuple val(meta), path("*purityploidy.txt"), emit: purityploidy + tuple val(meta), path("*segments.txt"), emit: segments + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def gender = args.gender ? "$args.gender" : "NULL" + def genomeVersion = args.genomeVersion ? "$args.genomeVersion" : "NULL" + def purity = args.purity ? "$args.purity" : "NULL" + def ploidy = args.ploidy ? "$args.ploidy" : "NULL" + def gc_files = args.gc_files ? "$args.gc_files" : "NULL" + + def minCounts_arg = args.minCounts ? ",minCounts = $args.minCounts" : "" + def chrom_names_arg = args.chrom_names ? ",chrom_names = $args.chrom_names" : "" + def min_base_qual_arg = args.min_base_qual ? ",min_base_qual = $args.min_base_qual" : "" + def min_map_qual_arg = args.min_map_qual ? ",min_map_qual = $args.min_map_qual" : "" + def ref_fasta_arg = args.ref_fasta ? ",ref.fasta = '$args.ref_fasta'" : "" + def skip_allele_counting_tumour_arg = args.skip_allele_counting_tumour ? ",skip_allele_counting_tumour = $args.skip_allele_counting_tumour" : "" + def skip_allele_counting_normal_arg = args.skip_allele_counting_normal ? ",skip_allele_counting_normal = $args.skip_allele_counting_normal" : "" + + + + """ + #!/usr/bin/env Rscript + library(RColorBrewer) + library(ASCAT) + options(bitmapType='cairo') + + + #prepare from BAM files + ascat.prepareHTS( + tumourseqfile = "$input_tumor", + normalseqfile = "$input_normal", + tumourname = "Tumour", + normalname = "Normal", + allelecounter_exe = "alleleCounter", + alleles.prefix = "$allele_files", + loci.prefix = "$loci_files", + gender = "$gender", + genomeVersion = "$genomeVersion", + nthreads = $task.cpus + $minCounts_arg + $chrom_names_arg + $min_base_qual_arg + $min_map_qual_arg + $ref_fasta_arg + $skip_allele_counting_tumour_arg + $skip_allele_counting_normal_arg + ) + + + #Load the data + ascat.bc = ascat.loadData( + Tumor_LogR_file = "Tumour_tumourLogR.txt", + Tumor_BAF_file = "Tumour_normalBAF.txt", + Germline_LogR_file = "Tumour_normalLogR.txt", + Germline_BAF_file = "Tumour_normalBAF.txt", + genomeVersion = "$genomeVersion", + gender = "$gender" + ) + + #optional GC wave correction + if(!is.null($gc_files)){ + ascat.bc = ascat.GCcorrect(ascat.bc, $gc_files) + } + + #Plot the raw data + ascat.plotRawData(ascat.bc) + + #Segment the data + ascat.bc = ascat.aspcf(ascat.bc) + + #Plot the segmented data + ascat.plotSegmentedData(ascat.bc) + + #Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, and discrete copy numbers + #If psi and rho are manually set: + if (!is.null($purity) && !is.null($ploidy)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, psi_manual=$ploidy) + } else if(!is.null($purity) && is.null($ploidy)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity) + } else if(!is.null($ploidy) && is.null($purity)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=$ploidy) + } else { + ascat.output <- ascat.runAscat(ascat.bc, gamma=1) + } + + #Write out segmented regions (including regions with one copy of each allele) + write.table(ascat.output[["segments"]], file=paste0("$prefix", ".segments.txt"), sep="\t", quote=F, row.names=F) + + #Write out CNVs in bed format + cnvs=ascat.output[["segments"]][2:6] + write.table(cnvs, file=paste0("$prefix",".cnvs.txt"), sep="\t", quote=F, row.names=F, col.names=T) + + #Write out purity and ploidy info + summary <- tryCatch({ + matrix(c(ascat.output[["aberrantcellfraction"]], ascat.output[["ploidy"]]), ncol=2, byrow=TRUE)}, error = function(err) { + # error handler picks up where error was generated + print(paste("Could not find optimal solution: ",err)) + return(matrix(c(0,0),nrow=1,ncol=2,byrow = TRUE)) + } + ) + colnames(summary) <- c("AberrantCellFraction","Ploidy") + write.table(summary, file=paste0("$prefix",".purityploidy.txt"), sep="\t", quote=F, row.names=F, col.names=T) + + #version export. Have to hardcode process name and software name because + #won't run inside an R-block + version_file_path="versions.yml" + f <- file(version_file_path,"w") + writeLines("ASCAT:", f) + writeLines(" ascat: 3.0.0",f) + close(f) + """ + + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cnvs.txt + touch ${prefix}.purityploidy.txt + touch ${prefix}.segments.txt + touch Tumour.ASCATprofile.png + touch Tumour.ASPCF.png + touch Tumour.germline.png + touch Tumour.rawprofile.png + touch Tumour.sunrise.png + touch Tumour.tumour.png + + echo 'ASCAT:' > versions.yml + echo ' ascat: 3.0.0' >> versions.yml + """ + + +} diff --git a/modules/nf-core/modules/ascat/meta.yml b/modules/nf-core/modules/ascat/meta.yml new file mode 100644 index 0000000000..949afd6af0 --- /dev/null +++ b/modules/nf-core/modules/ascat/meta.yml @@ -0,0 +1,92 @@ +name: ascat +description: copy number profiles of tumour cells. +keywords: + - sort +tools: + - ascat: + description: ASCAT is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy. ASCAT infers tumour purity (the fraction of tumour cells) and ploidy (the amount of DNA per tumour cell), expressed as multiples of haploid genomes from SNP array or massively parallel sequencing data, and calculates whole-genome allele-specific copy number profiles (the number of copies of both parental alleles for all SNP loci across the genome). + homepage: None + documentation: None + tool_dev_url: https://github.com/Crick-CancerGenomics/ascat + doi: "10.1093/bioinformatics/btaa538" + licence: ['GPL v3'] + +input: + - args: + type: map + description: | + Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. Parameters must be set between quotes. parameters can be removed from the map, if they are not set. For default values, please check the documentation above. + + ``` + { + [ + "gender": "XX", + "genomeVersion": "hg19" + "purity": , + "ploidy": , + "gc_files": , + "minCounts": , + "chrom_names": , + "min_base_qual": , + "min_map_qual": , + "ref_fasta": , + "skip_allele_counting_tumour": , + "skip_allele_counting_normal": + ] + } + ``` + + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index_normal: + type: file + description: index for normal_bam + pattern: "*.{bai}" + - input_tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index_tumor: + type: file + description: index for tumor_bam + pattern: "*.{bai}" + - allele_files: + type: file + description: allele files for ASCAT. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + - loci_files: + type: file + description: loci files for ASCAT. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - png: + type: file + description: ASCAT plots + pattern: "*.{png}" + - purityploidy: + type: file + description: purity and ploidy data + pattern: "*.purityploidy.txt" + - segments: + type: file + description: segments data + pattern: "*.segments.txt" +authors: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" From d425d59d023ba3a6f1f0fc082351f504195e2db2 Mon Sep 17 00:00:00 2001 From: Rike Date: Sat, 19 Mar 2022 15:57:05 +0100 Subject: [PATCH 03/52] Pull deepvariant into own sw --- .../local/germline_variant_calling.nf | 64 ++-------------- .../local/variantcalling/deepvariant.nf | 73 +++++++++++++++++++ .../local/variantcalling/freebayes.nf | 0 subworkflows/local/variantcalling/strelka.nf | 0 4 files changed, 79 insertions(+), 58 deletions(-) create mode 100644 subworkflows/local/variantcalling/deepvariant.nf create mode 100644 subworkflows/local/variantcalling/freebayes.nf create mode 100644 subworkflows/local/variantcalling/strelka.nf diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 4cfee46c8d..8724db4478 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -2,8 +2,6 @@ // GERMLINE VARIANT CALLING // -include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../modules/local/bgzip' include { BGZIP as BGZIP_VC_FREEBAYES } from '../../modules/local/bgzip' include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../modules/local/bgzip' include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' @@ -11,8 +9,6 @@ include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/ include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' include { BGZIP as BGZIP_VC_STRELKA } from '../../modules/local/bgzip' include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' @@ -20,7 +16,6 @@ include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/ include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_STRELKA } from '../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' -include { DEEPVARIANT } from '../../modules/nf-core/modules/deepvariant/main' include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../modules/nf-core/modules/gatk4/genotypegvcfs/main' include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' @@ -28,12 +23,11 @@ include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../subworkflows/n include { MANTA_GERMLINE } from '../../modules/local/manta/germline/main' include { STRELKA_GERMLINE } from '../../modules/nf-core/modules/strelka/germline/main' include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../modules/nf-core/modules/tabix/tabix/main' include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../modules/nf-core/modules/tabix/tabix/main' include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../modules/nf-core/modules/tabix/tabix/main' include { TIDDIT_SV } from '../../modules/nf-core/modules/tiddit/sv/main' +include { RUN_DEEPVARIANT } from './variantcalling/deepvariant.nf' workflow GERMLINE_VARIANT_CALLING { take: cram_recalibrated // channel: [mandatory] cram @@ -74,49 +68,7 @@ workflow GERMLINE_VARIANT_CALLING { } // DEEPVARIANT - - //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine - // Deepvariant: https://github.com/google/deepvariant/issues/510 - - DEEPVARIANT( - cram_recalibrated_intervals, - fasta, - fasta_fai) - - // Only when no intervals - TABIX_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) - TABIX_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) - - // Only when using intervals - BGZIP_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) - BGZIP_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) - - CONCAT_DEEPVARIANT_VCF( - BGZIP_VC_DEEPVARIANT_VCF.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - CONCAT_DEEPVARIANT_GVCF( - BGZIP_VC_DEEPVARIANT_GVCF.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - deepvariant_vcf = channel.empty().mix( - CONCAT_DEEPVARIANT_GVCF.out.vcf, - CONCAT_DEEPVARIANT_VCF.out.vcf, - DEEPVARIANT.out.gvcf.join(TABIX_VC_DEEPVARIANT_GVCF.out.tbi), - DEEPVARIANT.out.vcf.join(TABIX_VC_DEEPVARIANT_VCF.out.tbi)) - + RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) // FREEBAYES // Remap channel for Freebayes @@ -394,35 +346,31 @@ workflow GERMLINE_VARIANT_CALLING { // ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) // } - ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_VCF.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_HAPLOTYPECALLER.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_VCF.out.versions) ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) ch_versions = ch_versions.mix(CONCAT_HAPLOTYPECALLER.out.versions) ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) - ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) ch_versions = ch_versions.mix(FREEBAYES.out.versions) ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_VCF.out.versions) ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) ch_versions = ch_versions.mix(TABIX_VC_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) + emit: - deepvariant_vcf + deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf freebayes_vcf haplotypecaller_gvcf genotype_gvcf diff --git a/subworkflows/local/variantcalling/deepvariant.nf b/subworkflows/local/variantcalling/deepvariant.nf new file mode 100644 index 0000000000..f55d24f53e --- /dev/null +++ b/subworkflows/local/variantcalling/deepvariant.nf @@ -0,0 +1,73 @@ +include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../modules/local/concat_vcf/main' +include { DEEPVARIANT } from '../../../modules/nf-core/modules/deepvariant/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../modules/nf-core/modules/tabix/tabix/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../modules/nf-core/modules/tabix/tabix/main' + +//TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine +// Deepvariant: https://github.com/google/deepvariant/issues/510 +workflow RUN_DEEPVARIANT { + take: + cram_recalibrated_intervals // channel: [mandatory] + fasta + fasta_fai + intervals_bed_combine_gz + num_intervals + + main: + + ch_versions = Channel.empty() + + DEEPVARIANT( + cram_recalibrated_intervals, + fasta, + fasta_fai) + + // Only when no intervals + TABIX_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) + TABIX_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) + + // Only when using intervals + BGZIP_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) + BGZIP_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) + + CONCAT_DEEPVARIANT_VCF( + BGZIP_VC_DEEPVARIANT_VCF.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + CONCAT_DEEPVARIANT_GVCF( + BGZIP_VC_DEEPVARIANT_GVCF.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + deepvariant_vcf = channel.empty().mix( + CONCAT_DEEPVARIANT_GVCF.out.vcf, + CONCAT_DEEPVARIANT_VCF.out.vcf, + DEEPVARIANT.out.gvcf.join(TABIX_VC_DEEPVARIANT_GVCF.out.tbi), + DEEPVARIANT.out.vcf.join(TABIX_VC_DEEPVARIANT_VCF.out.tbi)) + + ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_VCF.out.versions) + ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_GVCF.out.versions) + ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_VCF.out.versions) + ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) + ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_GVCF.out.versions) + ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_VCF.out.versions) + + emit: + deepvariant_vcf + versions = ch_versions +} diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf new file mode 100644 index 0000000000..e69de29bb2 diff --git a/subworkflows/local/variantcalling/strelka.nf b/subworkflows/local/variantcalling/strelka.nf new file mode 100644 index 0000000000..e69de29bb2 From efc13ca5e6090b3099c4022024670ce104efa5e7 Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 21 Mar 2022 13:40:23 +0100 Subject: [PATCH 04/52] Pull all germline VC steps into own subworkflows --- .../local/germline_variant_calling.nf | 345 ++---------------- .../local/variantcalling/freebayes.nf | 50 +++ .../local/variantcalling/haplotypecaller.nf | 118 ++++++ subworkflows/local/variantcalling/manta.nf | 102 ++++++ subworkflows/local/variantcalling/strelka.nf | 76 ++++ subworkflows/local/variantcalling/tiddit.nf | 35 ++ 6 files changed, 414 insertions(+), 312 deletions(-) create mode 100644 subworkflows/local/variantcalling/haplotypecaller.nf create mode 100644 subworkflows/local/variantcalling/manta.nf create mode 100644 subworkflows/local/variantcalling/tiddit.nf diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 8724db4478..b76370b80d 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -2,32 +2,13 @@ // GERMLINE VARIANT CALLING // -include { BGZIP as BGZIP_VC_FREEBAYES } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' -include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../modules/nf-core/modules/gatk4/genotypegvcfs/main' -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' -include { MANTA_GERMLINE } from '../../modules/local/manta/germline/main' -include { STRELKA_GERMLINE } from '../../modules/nf-core/modules/strelka/germline/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TIDDIT_SV } from '../../modules/nf-core/modules/tiddit/sv/main' - include { RUN_DEEPVARIANT } from './variantcalling/deepvariant.nf' +include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' +include { RUN_HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' +include { RUN_MANTA } from './variantcalling/manta.nf' +include { RUN_STRELKA } from './variantcalling/strelka.nf' +include { RUN_TIDDIT } from './variantcalling/tiddit.nf' + workflow GERMLINE_VARIANT_CALLING { take: cram_recalibrated // channel: [mandatory] cram @@ -56,7 +37,7 @@ workflow GERMLINE_VARIANT_CALLING { [[ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ], cram, crai, new_intervals] } - // Remap channel with gziped intervals + indexes + // Remap channel with gzipped intervals + indexes cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi) .map{ meta, cram, crai, bed, tbi -> sample = meta.sample @@ -69,313 +50,53 @@ workflow GERMLINE_VARIANT_CALLING { // DEEPVARIANT RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - // FREEBAYES + // FREEBAYES // Remap channel for Freebayes - cram_recalibrated_intervals_freebayes = cram_recalibrated.combine(intervals) + cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals .map{ meta, cram, crai, intervals -> - sample = meta.sample - new_intervals = intervals.baseName != "no_intervals" ? intervals : [] - id = new_intervals ? sample + "_" + new_intervals.baseName : sample - new_meta = [ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ] - [new_meta, cram, crai, [], [], new_intervals] + [meta, cram, crai, [], [], intervals] } - FREEBAYES( - cram_recalibrated_intervals_freebayes, - fasta, - fasta_fai, - [], [], []) - - // Only when no intervals - TABIX_VC_FREEBAYES(FREEBAYES.out.vcf) - - // Only when using intervals - BGZIP_VC_FREEBAYES(FREEBAYES.out.vcf) - - CONCAT_FREEBAYES( - BGZIP_VC_FREEBAYES.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - freebayes_vcf = Channel.empty().mix( - CONCAT_FREEBAYES.out.vcf, - FREEBAYES.out.vcf.join(TABIX_VC_FREEBAYES.out.tbi)) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) // HAPLOTYPECALLER - - HAPLOTYPECALLER( - cram_recalibrated_intervals, - fasta, - fasta_fai, - dict, - dbsnp, - dbsnp_tbi) - - // Only when no intervals - TABIX_VC_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) - - // Only when using intervals - BGZIP_VC_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) - - CONCAT_HAPLOTYPECALLER( - BGZIP_VC_HAPLOTYPECALLER.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - HAPLOTYPECALLER.out.vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{haplotypecaller_gvcf_intervals} - - HAPLOTYPECALLER.out.tbi.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{haplotypecaller_gvcf_tbi_intervals} - - haplotypecaller_gvcf = Channel.empty().mix( - CONCAT_HAPLOTYPECALLER.out.vcf, - haplotypecaller_gvcf_intervals.no_intervals) - - haplotypecaller_gvcf_tbi = Channel.empty().mix( - CONCAT_HAPLOTYPECALLER.out.tbi, - haplotypecaller_gvcf_tbi_intervals.no_intervals) - - genotype_gvcf_to_call = haplotypecaller_gvcf.join(haplotypecaller_gvcf_tbi) - .combine(intervals_bed_combine_gz_tbi) - .map{ - meta, gvcf, gvf_tbi, intervals, intervals_tbi -> - new_intervals = intervals.simpleName != "no_intervals" ? intervals : [] - new_intervals_tbi = intervals_tbi.simpleName != "no_intervals" ? intervals_tbi : [] - [meta, gvcf, gvf_tbi, new_intervals, new_intervals_tbi] - } - - // GENOTYPEGVCFS - - GENOTYPEGVCFS( - genotype_gvcf_to_call, - fasta, - fasta_fai, - dict, - dbsnp, - dbsnp_tbi) - - genotype_gvcf = GENOTYPEGVCFS.out.vcf - - // if (joint_germline) { - // run_haplotypecaller = false - // run_vqsr = true //parameter? - // some feedback from gavin - // GATK_JOINT_GERMLINE_VARIANT_CALLING( - // haplotypecaller_vcf_gz_tbi, - // run_haplotypecaller, - // run_vqsr, - // fasta, - // fasta_fai, - // dict, - // dbsnp, - // dbsnp_tbi, - // "joined", - // allelespecific? - // resources? - // annotation? - // "BOTH", - // true, - // truthsensitivity -> parameter or module? - // ) - // ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) - // } + RUN_HAPLOTYPECALLER(cram_recalibrated_intervals, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + num_intervals) // MANTA - // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. - // Seems to be the consensus on upstream modules implementation too - - MANTA_GERMLINE( - cram_recalibrated_intervals_gz_tbi, + RUN_MANTA(cram_recalibrated_intervals_gz_tbi, fasta, - fasta_fai) - - // Figure out if using intervals or no_intervals - MANTA_GERMLINE.out.candidate_small_indels_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{manta_small_indels_vcf} - - MANTA_GERMLINE.out.candidate_sv_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{manta_sv_vcf} - - MANTA_GERMLINE.out.diploid_sv_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{manta_diploid_sv_vcf} - - // Only when using intervals - BGZIP_VC_MANTA_DIPLOID(MANTA_GERMLINE.out.diploid_sv_vcf) - - CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_GERMLINE.out.candidate_small_indels_vcf) - - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - BGZIP_VC_MANTA_SV(MANTA_GERMLINE.out.candidate_sv_vcf) - - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) - - manta_vcf = Channel.empty().mix( - CONCAT_MANTA_DIPLOID.out.vcf, - CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_SV.out.vcf, - manta_diploid_sv_vcf.no_intervals, - manta_small_indels_vcf.no_intervals, - manta_sv_vcf.no_intervals) + num_intervals) // STRELKA - // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. - // Seems to be the consensus on upstream modules implementation too - - STRELKA_GERMLINE( - cram_recalibrated_intervals_gz_tbi, + RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, fasta, - fasta_fai) - - // Figure out if using intervals or no_intervals - STRELKA_GERMLINE.out.vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{strelka_vcf} - - STRELKA_GERMLINE.out.genome_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{strelka_genome_vcf} - - // Only when using intervals - BGZIP_VC_STRELKA(STRELKA_GERMLINE.out.vcf) - - CONCAT_STRELKA( - BGZIP_VC_STRELKA.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) - - BGZIP_VC_STRELKA_GENOME(STRELKA_GERMLINE.out.genome_vcf) - - CONCAT_STRELKA_GENOME( - BGZIP_VC_STRELKA_GENOME.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - strelka_vcf = Channel.empty().mix( - CONCAT_STRELKA.out.vcf, - CONCAT_STRELKA_GENOME.out.vcf, - strelka_genome_vcf.no_intervals, - strelka_vcf.no_intervals) - - // if (tools.contains('tiddit')) { - // TODO: Update tiddit on bioconda, the current version does not support cram usage, needs newest version: - // https://github.com/SciLifeLab/TIDDIT/issues/82#issuecomment-1022103264 - // Issue opened, either this week or end of february - - // TIDDIT_SV( - // cram_recalibrated, - // fasta, - // fasta_fai - // ) - - // TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) - // tiddit_vcf_gz_tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi - // tiddit_ploidy = TIDDIT_SV.out.ploidy - // tiddit_signals = TIDDIT_SV.out.signals - // tiddit_wig = TIDDIT_SV.out.wig - // tiddit_gc_wig = TIDDIT_SV.out.gc_wig - - // ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) - // ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) - // } - + num_intervals) - ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(CONCAT_HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) - ch_versions = ch_versions.mix(FREEBAYES.out.versions) - ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) - ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) - ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_HAPLOTYPECALLER.out.versions) + //TIDDIT + //TODO ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) + ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) + ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(RUN_MANTA.out.versions) + ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) emit: deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf - freebayes_vcf - haplotypecaller_gvcf - genotype_gvcf - manta_vcf - strelka_vcf + freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf + haplotypecaller_gvcf = RUN_HAPLOTYPECALLER.out.haplotypecaller_gvcf + genotype_gvcf = RUN_HAPLOTYPECALLER.out.genotype_gvcf + manta_vcf = RUN_MANTA.out.manta_vcf + strelka_vcf = RUN_STRELKA.out.strelka_vcf versions = ch_versions } diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf index e69de29bb2..22ea00b50e 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/local/variantcalling/freebayes.nf @@ -0,0 +1,50 @@ +include { BGZIP as BGZIP_VC_FREEBAYES } from '../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../modules/local/concat_vcf/main' +include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../modules/nf-core/modules/tabix/tabix/main' + + +workflow RUN_FREEBAYES { + take: + cram_recalibrated_intervals_freebayes + fasta + fasta_fai + + main: + + ch_versions = Channel.empty() + + FREEBAYES( + cram_recalibrated_intervals_freebayes, + fasta, + fasta_fai, + [], [], []) + + // Only when no intervals + TABIX_VC_FREEBAYES(FREEBAYES.out.vcf) + + // Only when using intervals + BGZIP_VC_FREEBAYES(FREEBAYES.out.vcf) + + CONCAT_FREEBAYES( + BGZIP_VC_FREEBAYES.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + freebayes_vcf = Channel.empty().mix( + CONCAT_FREEBAYES.out.vcf, + FREEBAYES.out.vcf.join(TABIX_VC_FREEBAYES.out.tbi)) + + ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) + ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) + ch_versions = ch_versions.mix(FREEBAYES.out.versions) + ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/variantcalling/haplotypecaller.nf b/subworkflows/local/variantcalling/haplotypecaller.nf new file mode 100644 index 0000000000..e3665f1b4c --- /dev/null +++ b/subworkflows/local/variantcalling/haplotypecaller.nf @@ -0,0 +1,118 @@ +include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' +include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../modules/nf-core/modules/gatk4/genotypegvcfs/main' +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' +include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' +include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../modules/nf-core/modules/tabix/tabix/main' + +workflow RUN_HAPLOTYPECALLER { + take: + cram_recalibrated_intervals + fasta + fasta_fai + dict + dbsnp + dbsnp_tbi + num_intervals + + main: + + ch_versions = Channel.empty() + + HAPLOTYPECALLER( + cram_recalibrated_intervals, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi) + + // Only when no intervals + TABIX_VC_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) + + // Only when using intervals + BGZIP_VC_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) + + CONCAT_HAPLOTYPECALLER( + BGZIP_VC_HAPLOTYPECALLER.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + HAPLOTYPECALLER.out.vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{haplotypecaller_gvcf_intervals} + + HAPLOTYPECALLER.out.tbi.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{haplotypecaller_gvcf_tbi_intervals} + + haplotypecaller_gvcf = Channel.empty().mix( + CONCAT_HAPLOTYPECALLER.out.vcf, + haplotypecaller_gvcf_intervals.no_intervals) + + haplotypecaller_gvcf_tbi = Channel.empty().mix( + CONCAT_HAPLOTYPECALLER.out.tbi, + haplotypecaller_gvcf_tbi_intervals.no_intervals) + + genotype_gvcf_to_call = haplotypecaller_gvcf.join(haplotypecaller_gvcf_tbi) + .combine(intervals_bed_combine_gz_tbi) + .map{ + meta, gvcf, gvf_tbi, intervals, intervals_tbi -> + new_intervals = intervals.simpleName != "no_intervals" ? intervals : [] + new_intervals_tbi = intervals_tbi.simpleName != "no_intervals" ? intervals_tbi : [] + [meta, gvcf, gvf_tbi, new_intervals, new_intervals_tbi] + } + + // GENOTYPEGVCFS + + GENOTYPEGVCFS( + genotype_gvcf_to_call, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi) + + genotype_gvcf = GENOTYPEGVCFS.out.vcf + + // if (joint_germline) { + // run_haplotypecaller = false + // run_vqsr = true //parameter? + // some feedback from gavin + // GATK_JOINT_GERMLINE_VARIANT_CALLING( + // haplotypecaller_vcf_gz_tbi, + // run_haplotypecaller, + // run_vqsr, + // fasta, + // fasta_fai, + // dict, + // dbsnp, + // dbsnp_tbi, + // "joined", + // allelespecific? + // resources? + // annotation? + // "BOTH", + // true, + // truthsensitivity -> parameter or module? + // ) + // ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) + // } + ch_versions = ch_versions.mix(BGZIP_VC_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(CONCAT_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) + ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(TABIX_VC_HAPLOTYPECALLER.out.versions) + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/variantcalling/manta.nf b/subworkflows/local/variantcalling/manta.nf new file mode 100644 index 0000000000..acc536e2b9 --- /dev/null +++ b/subworkflows/local/variantcalling/manta.nf @@ -0,0 +1,102 @@ +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' +include { MANTA_GERMLINE } from '../../modules/local/manta/germline/main' + +workflow RUN_MANTA { + take: + cram_recalibrated_intervals_gz_tbi + fasta + fasta_fai + num_intervals + + main: + + ch_versions = Channel.empty() + // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. + // Seems to be the consensus on upstream modules implementation too + + MANTA_GERMLINE( + cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai) + + // Figure out if using intervals or no_intervals + MANTA_GERMLINE.out.candidate_small_indels_vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{manta_small_indels_vcf} + + MANTA_GERMLINE.out.candidate_sv_vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{manta_sv_vcf} + + MANTA_GERMLINE.out.diploid_sv_vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{manta_diploid_sv_vcf} + + // Only when using intervals + BGZIP_VC_MANTA_DIPLOID(MANTA_GERMLINE.out.diploid_sv_vcf) + + CONCAT_MANTA_DIPLOID( + BGZIP_VC_MANTA_DIPLOID.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + BGZIP_VC_MANTA_SMALL_INDELS(MANTA_GERMLINE.out.candidate_small_indels_vcf) + + CONCAT_MANTA_SMALL_INDELS( + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + BGZIP_VC_MANTA_SV(MANTA_GERMLINE.out.candidate_sv_vcf) + + CONCAT_MANTA_SV( + BGZIP_VC_MANTA_SV.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + manta_vcf = Channel.empty().mix( + CONCAT_MANTA_DIPLOID.out.vcf, + CONCAT_MANTA_SMALL_INDELS.out.vcf, + CONCAT_MANTA_SV.out.vcf, + manta_diploid_sv_vcf.no_intervals, + manta_small_indels_vcf.no_intervals, + manta_sv_vcf.no_intervals) + + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) + + emit: + versions = ch_versions + manta_vcf +} diff --git a/subworkflows/local/variantcalling/strelka.nf b/subworkflows/local/variantcalling/strelka.nf index e69de29bb2..1b22ef80ad 100644 --- a/subworkflows/local/variantcalling/strelka.nf +++ b/subworkflows/local/variantcalling/strelka.nf @@ -0,0 +1,76 @@ +include { BGZIP as BGZIP_VC_STRELKA } from '../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_STRELKA } from '../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' +include { STRELKA_GERMLINE } from '../../modules/nf-core/modules/strelka/germline/main' + +workflow RUN_STRELKA { + take: + cram_recalibrated_intervals_gz_tbi + fasta + fasta_fai + num_intervals + + main: + + ch_versions = Channel.empty() + + // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. + // Seems to be the consensus on upstream modules implementation too + + STRELKA_GERMLINE( + cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai) + + // Figure out if using intervals or no_intervals + STRELKA_GERMLINE.out.vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{strelka_vcf} + + STRELKA_GERMLINE.out.genome_vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{strelka_genome_vcf} + + // Only when using intervals + BGZIP_VC_STRELKA(STRELKA_GERMLINE.out.vcf) + + CONCAT_STRELKA( + BGZIP_VC_STRELKA.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + BGZIP_VC_STRELKA_GENOME(STRELKA_GERMLINE.out.genome_vcf) + + CONCAT_STRELKA_GENOME( + BGZIP_VC_STRELKA_GENOME.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_combine_gz) + + strelka_vcf = Channel.empty().mix( + CONCAT_STRELKA.out.vcf, + CONCAT_STRELKA_GENOME.out.vcf, + strelka_genome_vcf.no_intervals, + + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) + ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) + + emit: + versions = ch_versions + strelka_vcf +} diff --git a/subworkflows/local/variantcalling/tiddit.nf b/subworkflows/local/variantcalling/tiddit.nf new file mode 100644 index 0000000000..6a729fc9bb --- /dev/null +++ b/subworkflows/local/variantcalling/tiddit.nf @@ -0,0 +1,35 @@ +include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../modules/nf-core/modules/tabix/bgziptabix/main' +include { TIDDIT_SV } from '../../modules/nf-core/modules/tiddit/sv/main' + + +workflow RUN_TIDDIT { + take: + + + main: + + ch_versions = Channel.empty() + // if (tools.contains('tiddit')) { + // TODO: Update tiddit on bioconda, the current version does not support cram usage, needs newest version: + // https://github.com/SciLifeLab/TIDDIT/issues/82#issuecomment-1022103264 + // Issue opened, either this week or end of february + + // TIDDIT_SV( + // cram_recalibrated, + // fasta, + // fasta_fai + // ) + + // TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) + // tiddit_vcf_gz_tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi + // tiddit_ploidy = TIDDIT_SV.out.ploidy + // tiddit_signals = TIDDIT_SV.out.signals + // tiddit_wig = TIDDIT_SV.out.wig + // tiddit_gc_wig = TIDDIT_SV.out.gc_wig + + // ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) + // ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) + // } + emit: + versions = ch_versions +} From d49a08036573e019745dcd51be88879bf3d9083f Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 21 Mar 2022 13:47:46 +0100 Subject: [PATCH 05/52] Fix broken imports --- subworkflows/local/germline_variant_calling.nf | 2 +- subworkflows/local/variantcalling/freebayes.nf | 8 ++++---- .../local/variantcalling/haplotypecaller.nf | 12 ++++++------ subworkflows/local/variantcalling/manta.nf | 14 +++++++------- subworkflows/local/variantcalling/strelka.nf | 11 ++++++----- subworkflows/local/variantcalling/tiddit.nf | 6 +++--- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index b76370b80d..8b2f98f2f2 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -7,7 +7,7 @@ include { RUN_FREEBAYES } from './variantcalling/fre include { RUN_HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' include { RUN_MANTA } from './variantcalling/manta.nf' include { RUN_STRELKA } from './variantcalling/strelka.nf' -include { RUN_TIDDIT } from './variantcalling/tiddit.nf' +//include { RUN_TIDDIT } from './variantcalling/tiddit.nf' workflow GERMLINE_VARIANT_CALLING { take: diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf index 22ea00b50e..de90febeae 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/local/variantcalling/freebayes.nf @@ -1,7 +1,7 @@ -include { BGZIP as BGZIP_VC_FREEBAYES } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../modules/nf-core/modules/tabix/tabix/main' +include { BGZIP as BGZIP_VC_FREEBAYES } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../modules/local/concat_vcf/main' +include { FREEBAYES } from '../../../modules/nf-core/modules/freebayes/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_FREEBAYES { diff --git a/subworkflows/local/variantcalling/haplotypecaller.nf b/subworkflows/local/variantcalling/haplotypecaller.nf index e3665f1b4c..250783a7f9 100644 --- a/subworkflows/local/variantcalling/haplotypecaller.nf +++ b/subworkflows/local/variantcalling/haplotypecaller.nf @@ -1,9 +1,9 @@ -include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' -include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../modules/nf-core/modules/gatk4/genotypegvcfs/main' -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' -include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../modules/nf-core/modules/tabix/tabix/main' +include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../modules/local/concat_vcf/main' +include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../modules/nf-core/modules/gatk4/haplotypecaller/main' +include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' +include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_HAPLOTYPECALLER { take: diff --git a/subworkflows/local/variantcalling/manta.nf b/subworkflows/local/variantcalling/manta.nf index acc536e2b9..8c5a4846cb 100644 --- a/subworkflows/local/variantcalling/manta.nf +++ b/subworkflows/local/variantcalling/manta.nf @@ -1,10 +1,10 @@ -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { MANTA_GERMLINE } from '../../modules/local/manta/germline/main' +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' +include { MANTA_GERMLINE } from '../../../modules/local/manta/germline/main' workflow RUN_MANTA { take: diff --git a/subworkflows/local/variantcalling/strelka.nf b/subworkflows/local/variantcalling/strelka.nf index 1b22ef80ad..8ec2197c1f 100644 --- a/subworkflows/local/variantcalling/strelka.nf +++ b/subworkflows/local/variantcalling/strelka.nf @@ -1,8 +1,8 @@ -include { BGZIP as BGZIP_VC_STRELKA } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' -include { STRELKA_GERMLINE } from '../../modules/nf-core/modules/strelka/germline/main' +include { BGZIP as BGZIP_VC_STRELKA } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_STRELKA } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../modules/local/concat_vcf/main' +include { STRELKA_GERMLINE } from '../../../modules/nf-core/modules/strelka/germline/main' workflow RUN_STRELKA { take: @@ -65,6 +65,7 @@ workflow RUN_STRELKA { CONCAT_STRELKA.out.vcf, CONCAT_STRELKA_GENOME.out.vcf, strelka_genome_vcf.no_intervals, + strelka_vcf.no_intervals) ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) diff --git a/subworkflows/local/variantcalling/tiddit.nf b/subworkflows/local/variantcalling/tiddit.nf index 6a729fc9bb..87ec6050b1 100644 --- a/subworkflows/local/variantcalling/tiddit.nf +++ b/subworkflows/local/variantcalling/tiddit.nf @@ -1,7 +1,7 @@ -include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TIDDIT_SV } from '../../modules/nf-core/modules/tiddit/sv/main' - +include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../modules/nf-core/modules/tabix/bgziptabix/main' +include { TIDDIT_SV } from '../../../modules/nf-core/modules/tiddit/sv/main' +//TODO: UNDER CONSTRUCTIONS workflow RUN_TIDDIT { take: From f1a7e181909ba00cfd11befe42afdfbf54cd53cd Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 21 Mar 2022 17:18:06 +0100 Subject: [PATCH 06/52] Add slightly ugly if's to subworkflows to avoid printing of unrun processes to terminal. Sorry @maxulysse :( we'll keep improving this --- .../local/germline_variant_calling.nf | 86 ++++++--- subworkflows/local/tumor_variant_calling.nf | 181 +++--------------- .../local/variantcalling/freebayes.nf | 1 + .../local/variantcalling/haplotypecaller.nf | 4 + .../{manta.nf => manta_germline.nf} | 1 + .../local/variantcalling/manta_somatic.nf | 81 ++++++++ subworkflows/local/variantcalling/strelka.nf | 1 + 7 files changed, 172 insertions(+), 183 deletions(-) rename subworkflows/local/variantcalling/{manta.nf => manta_germline.nf} (99%) create mode 100644 subworkflows/local/variantcalling/manta_somatic.nf diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 8b2f98f2f2..7141a177aa 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -5,7 +5,7 @@ include { RUN_DEEPVARIANT } from './variantcalling/deepvariant.nf' include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' include { RUN_HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' -include { RUN_MANTA } from './variantcalling/manta.nf' +include { RUN_MANTA } from './variantcalling/manta_germline.nf' include { RUN_STRELKA } from './variantcalling/strelka.nf' //include { RUN_TIDDIT } from './variantcalling/tiddit.nf' @@ -27,6 +27,12 @@ workflow GERMLINE_VARIANT_CALLING { main: ch_versions = Channel.empty() + deepvariant = Channel.empty() + freebayes_vcf = Channel.empty() + haplotypecaller_gvcf = Channel.empty() + genotype_gvcf = Channel.empty() + manta_vcf = Channel.empty() + strelka_vcf = Channel.empty() // Remap channel with intervals cram_recalibrated_intervals = cram_recalibrated.combine(intervals) @@ -49,54 +55,78 @@ workflow GERMLINE_VARIANT_CALLING { } // DEEPVARIANT - RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) + if(params.tools.contains('deepvariant')){ + RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) + deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf + ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) + + } // FREEBAYES + if (params.tools.contains('freebayes')){ // Remap channel for Freebayes - cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals - .map{ meta, cram, crai, intervals -> - [meta, cram, crai, [], [], intervals] - } + cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals + .map{ meta, cram, crai, intervals -> + [meta, cram, crai, [], [], intervals] + } + + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) + freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf + ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) + } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) // HAPLOTYPECALLER - RUN_HAPLOTYPECALLER(cram_recalibrated_intervals, + if (params.tools.contains('haplotypecaller')){ + RUN_HAPLOTYPECALLER(cram_recalibrated_intervals, fasta, fasta_fai, dict, dbsnp, dbsnp_tbi, - num_intervals) + num_intervals, + intervals_bed_combine_gz, + intervals_bed_combine_gz_tbi) + ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) + haplotypecaller_gvcf = RUN_HAPLOTYPECALLER.out.haplotypecaller_gvcf + genotype_gvcf = RUN_HAPLOTYPECALLER.out.genotype_gvcf + + } // MANTA - RUN_MANTA(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals) + if (params.tools.contains('manta')){ + RUN_MANTA(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) + ch_versions = ch_versions.mix(RUN_MANTA.out.versions) + manta_vcf = RUN_MANTA.out.manta_vcf + + } // STRELKA - RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals) + if (params.tools.contains('strelka')){ + RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) + ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) + strelka_vcf = RUN_STRELKA.out.strelka_vcf + + } //TIDDIT //TODO - ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) - ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(RUN_MANTA.out.versions) - ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) - emit: deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf - freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf - haplotypecaller_gvcf = RUN_HAPLOTYPECALLER.out.haplotypecaller_gvcf - genotype_gvcf = RUN_HAPLOTYPECALLER.out.genotype_gvcf - manta_vcf = RUN_MANTA.out.manta_vcf - strelka_vcf = RUN_STRELKA.out.strelka_vcf + freebayes_vcf + haplotypecaller_gvcf + genotype_gvcf + manta_vcf + strelka_vcf versions = ch_versions } diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 36ee463392..3cab2b4a00 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -4,23 +4,10 @@ // -include { BGZIP as BGZIP_VC_FREEBAYES } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' -include { MANTA_TUMORONLY } from '../../modules/local/manta/tumoronly/main' -include { STRELKA_GERMLINE as STRELKA_TUMORONLY } from '../../modules/nf-core/modules/strelka/germline/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../modules/nf-core/modules/tabix/tabix/main' +include { RUN_STRELKA } from './variantcalling/strelka.nf' +include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' +include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: @@ -38,7 +25,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS no_intervals germline_resource - germline_resource_tbi // channel + germline_resource_tbi // channel panel_of_normals panel_of_normals_tbi @@ -73,47 +60,15 @@ workflow TUMOR_ONLY_VARIANT_CALLING { if (tools.contains('freebayes')){ - - cram_recalibrated.combine(intervals).map{ meta, cram, crai, intervals -> - new_meta = meta.clone() - new_meta.id = meta.sample + "_" + intervals.simpleName - new_meta.id = intervals.baseName != "no_intervals" ? meta.sample + "_" + intervals.baseName : meta.sample - intervals = intervals.baseName != "no_intervals" ? intervals : [] - [new_meta, cram, crai, [], [], intervals] - }.set{cram_recalibrated_intervals_freebayes} - - FREEBAYES( - cram_recalibrated_intervals_freebayes, - fasta, - fasta_fai, - [], - [], - [] - ) - ch_versions = ch_versions.mix(FREEBAYES.out.versions) - - if(no_intervals){ - TABIX_VC_FREEBAYES(FREEBAYES.out.vcf) - freebayes_vcf_gz = FREEBAYES.out.vcf - ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) - }else{ - BGZIP_VC_FREEBAYES(FREEBAYES.out.vcf) - BGZIP_VC_FREEBAYES.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{freebayes_vcf_to_concat} - - CONCAT_FREEBAYES(freebayes_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - freebayes_vcf_gz = CONCAT_FREEBAYES.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) - } - - freebayes_vcf = freebayes_vcf.mix(freebayes_vcf_gz) - + // Remap channel for Freebayes + cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals + .map{ meta, cram, crai, intervals -> + [meta, cram, crai, [], [], intervals] + } + + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) + freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf + ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) } if (tools.contains('mutect2')) { @@ -142,110 +97,25 @@ workflow TUMOR_ONLY_VARIANT_CALLING { if (tools.contains('manta')){ //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - MANTA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, + RUN_MANTA_SOMATIC(cram_recalibrated_intervals_gz_tbi, fasta, - fasta_fai - ) - - ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) - - if(no_intervals){ - manta_candidate_small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf - manta_tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf - }else{ - - BGZIP_VC_MANTA_SV(MANTA_TUMORONLY.out.candidate_small_indels_vcf) - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_TUMORONLY.out.candidate_sv_vcf) - BGZIP_VC_MANTA_TUMOR(MANTA_TUMORONLY.out.tumor_sv_vcf) - - BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_sv_vcf_to_concat} - - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_small_indels_vcf_to_concat} - - BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_tumor_sv_vcf_to_concat} - - CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_TUMOR(manta_tumor_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf - manta_tumor_sv_vcf = CONCAT_MANTA_TUMOR.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) - - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) - - } + fasta_fai, + num_intervals, + intervals_bed_combine_gz) manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + ch_versions = ch_versions.mix(RUN_MANTA.out.versions) } if (tools.contains('strelka')) { - //TODO: research if multiple targets can be provided: waiting for reply - - STRELKA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai - ) - - ch_versions = ch_versions.mix(STRELKA_TUMORONLY.out.versions) + RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) + ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) + strelka_vcf = RUN_STRELKA.out.strelka_vcf - if(no_intervals){ - strelka_vcf_gz = STRELKA_TUMORONLY.out.vcf - strelka_genome_vcf_gz = STRELKA_TUMORONLY.out.genome_vcf - - }else{ - BGZIP_VC_STRELKA(STRELKA_TUMORONLY.out.vcf) - BGZIP_VC_STRELKA_GENOME(STRELKA_TUMORONLY.out.genome_vcf) - - BGZIP_VC_STRELKA.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_vcf_to_concat} - - BGZIP_VC_STRELKA_GENOME.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_genome_vcf_to_concat} - - CONCAT_STRELKA(strelka_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_STRELKA_GENOME(strelka_genome_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - - strelka_vcf_gz = CONCAT_STRELKA.out.vcf - strelka_genome_vcf_gz = CONCAT_STRELKA_GENOME.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) - } - - strelka_vcf = strelka_vcf.mix(strelka_vcf_gz,strelka_genome_vcf_gz ) } @@ -259,4 +129,5 @@ workflow TUMOR_ONLY_VARIANT_CALLING { manta_vcf mutect2_vcf strelka_vcf + } diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf index de90febeae..4a4c81700f 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/local/variantcalling/freebayes.nf @@ -47,4 +47,5 @@ workflow RUN_FREEBAYES { emit: versions = ch_versions + freebayes_vcf } diff --git a/subworkflows/local/variantcalling/haplotypecaller.nf b/subworkflows/local/variantcalling/haplotypecaller.nf index 250783a7f9..a4208acec4 100644 --- a/subworkflows/local/variantcalling/haplotypecaller.nf +++ b/subworkflows/local/variantcalling/haplotypecaller.nf @@ -14,6 +14,8 @@ workflow RUN_HAPLOTYPECALLER { dbsnp dbsnp_tbi num_intervals + intervals_bed_combine_gz + intervals_bed_combine_gz_tbi main: @@ -115,4 +117,6 @@ workflow RUN_HAPLOTYPECALLER { emit: versions = ch_versions + genotype_gvcf + haplotypecaller_gvcf } diff --git a/subworkflows/local/variantcalling/manta.nf b/subworkflows/local/variantcalling/manta_germline.nf similarity index 99% rename from subworkflows/local/variantcalling/manta.nf rename to subworkflows/local/variantcalling/manta_germline.nf index 8c5a4846cb..8b3c5a23c7 100644 --- a/subworkflows/local/variantcalling/manta.nf +++ b/subworkflows/local/variantcalling/manta_germline.nf @@ -12,6 +12,7 @@ workflow RUN_MANTA { fasta fasta_fai num_intervals + intervals_bed_combine_gz main: diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf new file mode 100644 index 0000000000..545d18eba1 --- /dev/null +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -0,0 +1,81 @@ +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../modules/local/concat_vcf/main' +include { MANTA_TUMORONLY } from '../../../modules/local/manta/tumoronly/main' + +workflow RUN_MANTA_SOMATIC { + take: + cram_recalibrated_intervals_gz_tbi + fasta + fasta_fai + num_intervals + intervals_bed_combine_gz + + main: + + ch_versions = Channel.empty() + MANTA_TUMORONLY( + cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai + ) + + ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) + + if(no_intervals){ + manta_candidate_small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf + manta_candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf + manta_tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf + }else{ + + BGZIP_VC_MANTA_SV(MANTA_TUMORONLY.out.candidate_small_indels_vcf) + BGZIP_VC_MANTA_SMALL_INDELS(MANTA_TUMORONLY.out.candidate_sv_vcf) + BGZIP_VC_MANTA_TUMOR(MANTA_TUMORONLY.out.tumor_sv_vcf) + + BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals) + .set{manta_sv_vcf_to_concat} + + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals) + .set{manta_small_indels_vcf_to_concat} + + BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals) + .set{manta_tumor_sv_vcf_to_concat} + + CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) + CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) + CONCAT_MANTA_TUMOR(manta_tumor_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) + + manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf + manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf + manta_tumor_sv_vcf = CONCAT_MANTA_TUMOR.out.vcf + + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) + + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) + } + + manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + + emit: + versions = ch_versions + manta_vcf +} diff --git a/subworkflows/local/variantcalling/strelka.nf b/subworkflows/local/variantcalling/strelka.nf index 8ec2197c1f..e8d5770553 100644 --- a/subworkflows/local/variantcalling/strelka.nf +++ b/subworkflows/local/variantcalling/strelka.nf @@ -10,6 +10,7 @@ workflow RUN_STRELKA { fasta fasta_fai num_intervals + intervals_bed_combine_gz main: From 9a0ed30ae3dbe7e1a8155cec1e054b80ca70f475 Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 21 Mar 2022 17:24:58 +0100 Subject: [PATCH 07/52] fix output variable names --- subworkflows/local/germline_variant_calling.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 7141a177aa..e9b6f90640 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -26,11 +26,11 @@ workflow GERMLINE_VARIANT_CALLING { main: - ch_versions = Channel.empty() - deepvariant = Channel.empty() - freebayes_vcf = Channel.empty() - haplotypecaller_gvcf = Channel.empty() - genotype_gvcf = Channel.empty() + ch_versions = Channel.empty() + deepvariant_vcf = Channel.empty() + freebayes_vcf = Channel.empty() + haplotypecaller_gvcf = Channel.empty() + genotype_gvcf = Channel.empty() manta_vcf = Channel.empty() strelka_vcf = Channel.empty() @@ -121,7 +121,7 @@ workflow GERMLINE_VARIANT_CALLING { //TODO emit: - deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf + deepvariant_vcf freebayes_vcf haplotypecaller_gvcf genotype_gvcf From a1b6a978198bf1505c62caecca53ba7d3ae1a46f Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 21 Mar 2022 22:34:35 +0100 Subject: [PATCH 08/52] more refactoring --- .../local/germline_variant_calling.nf | 1 - subworkflows/local/pair_variant_calling.nf | 81 ++---------------- subworkflows/local/tumor_variant_calling.nf | 7 +- .../local/variantcalling/manta_somatic.nf | 82 +++++++++++-------- .../local/variantcalling/manta_tumoronly.nf | 81 ++++++++++++++++++ 5 files changed, 137 insertions(+), 115 deletions(-) create mode 100644 subworkflows/local/variantcalling/manta_tumoronly.nf diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index e9b6f90640..8f64f2eda3 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -75,7 +75,6 @@ workflow GERMLINE_VARIANT_CALLING { ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) } - // HAPLOTYPECALLER if (params.tools.contains('haplotypecaller')){ RUN_HAPLOTYPECALLER(cram_recalibrated_intervals, diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 5c6ef808ee..5f627c2f84 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -1,23 +1,16 @@ // // PAIRED VARIANT CALLING // -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' include { BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../modules/local/bgzip' include { BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../modules/local/concat_vcf/main' include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main' -include { MANTA_SOMATIC } from '../../modules/local/manta/somatic/main' include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main' include { STRELKA_SOMATIC } from '../../modules/nf-core/modules/strelka/somatic/main' +include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' + workflow PAIR_VARIANT_CALLING { take: tools @@ -73,71 +66,11 @@ workflow PAIR_VARIANT_CALLING { } if (tools.contains('manta')) { - MANTA_SOMATIC( - cram_pair_intervals_gz_tbi, - fasta, - fasta_fai) - - ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) - - if (no_intervals) { - manta_candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf - manta_diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf - manta_somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf - } else { - BGZIP_VC_MANTA_SV(MANTA_SOMATIC.out.candidate_small_indels_vcf) - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_SOMATIC.out.candidate_sv_vcf) - BGZIP_VC_MANTA_DIPLOID(MANTA_SOMATIC.out.diploid_sv_vcf) - BGZIP_VC_MANTA_SOMATIC(MANTA_SOMATIC.out.somatic_sv_vcf) - - manta_sv_vcf_to_concat = BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - manta_small_indels_vcf_to_concat = BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - manta_diploid_vcf_to_concat = BGZIP_VC_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - manta_somatic_sv_vcf_to_concat = BGZIP_VC_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_DIPLOID(manta_diploid_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SOMATIC(manta_somatic_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf - manta_diploid_sv_vcf = CONCAT_MANTA_DIPLOID.out.vcf - manta_somatic_sv_vcf = CONCAT_MANTA_SOMATIC.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) - - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) - - } - - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf,manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) + RUN_MANTA_SOMATIC(cram_pair_intervals_gz_tbi, + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) } cram_pair_strelka = Channel.empty() diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 3cab2b4a00..0eeb34f1ba 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -6,8 +6,8 @@ include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' include { RUN_STRELKA } from './variantcalling/strelka.nf' -include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' -include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' +include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' +include { RUN_MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: @@ -29,7 +29,6 @@ workflow TUMOR_ONLY_VARIANT_CALLING { panel_of_normals panel_of_normals_tbi - main: if(!tools) tools = "" @@ -97,7 +96,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { if (tools.contains('manta')){ //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - RUN_MANTA_SOMATIC(cram_recalibrated_intervals_gz_tbi, + RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, num_intervals, diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index 545d18eba1..4b4b2e7cd7 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -1,14 +1,16 @@ -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../modules/local/concat_vcf/main' -include { MANTA_TUMORONLY } from '../../../modules/local/manta/tumoronly/main' +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' +include { MANTA_SOMATIC } from '../../modules/local/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: - cram_recalibrated_intervals_gz_tbi + cram_pair_intervals_gz_tbi fasta fasta_fai num_intervals @@ -17,65 +19,73 @@ workflow RUN_MANTA_SOMATIC { main: ch_versions = Channel.empty() - MANTA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, + MANTA_SOMATIC( + cram_pair_intervals_gz_tbi, fasta, - fasta_fai - ) + fasta_fai) - ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) + ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) - if(no_intervals){ - manta_candidate_small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf - manta_tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf - }else{ + if (no_intervals) { + manta_candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf + manta_candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf + manta_diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf + manta_somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf + } else { + BGZIP_VC_MANTA_SV(MANTA_SOMATIC.out.candidate_small_indels_vcf) + BGZIP_VC_MANTA_SMALL_INDELS(MANTA_SOMATIC.out.candidate_sv_vcf) + BGZIP_VC_MANTA_DIPLOID(MANTA_SOMATIC.out.diploid_sv_vcf) + BGZIP_VC_MANTA_SOMATIC(MANTA_SOMATIC.out.somatic_sv_vcf) - BGZIP_VC_MANTA_SV(MANTA_TUMORONLY.out.candidate_small_indels_vcf) - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_TUMORONLY.out.candidate_sv_vcf) - BGZIP_VC_MANTA_TUMOR(MANTA_TUMORONLY.out.tumor_sv_vcf) + manta_sv_vcf_to_concat = BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals) - BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + manta_small_indels_vcf_to_concat = BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] }.groupTuple(size: num_intervals) - .set{manta_sv_vcf_to_concat} - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + manta_diploid_vcf_to_concat = BGZIP_VC_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] }.groupTuple(size: num_intervals) - .set{manta_small_indels_vcf_to_concat} - BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> + manta_somatic_sv_vcf_to_concat = BGZIP_VC_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] }.groupTuple(size: num_intervals) - .set{manta_tumor_sv_vcf_to_concat} CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_TUMOR(manta_tumor_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) + CONCAT_MANTA_DIPLOID(manta_diploid_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) + CONCAT_MANTA_SOMATIC(manta_somatic_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf - manta_tumor_sv_vcf = CONCAT_MANTA_TUMOR.out.vcf + manta_diploid_sv_vcf = CONCAT_MANTA_DIPLOID.out.vcf + manta_somatic_sv_vcf = CONCAT_MANTA_SOMATIC.out.vcf ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) + } - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf,manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) emit: versions = ch_versions - manta_vcf + } diff --git a/subworkflows/local/variantcalling/manta_tumoronly.nf b/subworkflows/local/variantcalling/manta_tumoronly.nf new file mode 100644 index 0000000000..ea47c2ff6d --- /dev/null +++ b/subworkflows/local/variantcalling/manta_tumoronly.nf @@ -0,0 +1,81 @@ +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../modules/local/concat_vcf/main' +include { MANTA_TUMORONLY } from '../../../modules/local/manta/tumoronly/main' + +workflow RUN_MANTA_TUMORONLY { + take: + cram_recalibrated_intervals_gz_tbi + fasta + fasta_fai + num_intervals + intervals_bed_combine_gz + + main: + + ch_versions = Channel.empty() + MANTA_TUMORONLY( + cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai + ) + + ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) + + if(no_intervals){ + manta_candidate_small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf + manta_candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf + manta_tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf + }else{ + + BGZIP_VC_MANTA_SV(MANTA_TUMORONLY.out.candidate_small_indels_vcf) + BGZIP_VC_MANTA_SMALL_INDELS(MANTA_TUMORONLY.out.candidate_sv_vcf) + BGZIP_VC_MANTA_TUMOR(MANTA_TUMORONLY.out.tumor_sv_vcf) + + BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals) + .set{manta_sv_vcf_to_concat} + + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals) + .set{manta_small_indels_vcf_to_concat} + + BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals) + .set{manta_tumor_sv_vcf_to_concat} + + CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) + CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) + CONCAT_MANTA_TUMOR(manta_tumor_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) + + manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf + manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf + manta_tumor_sv_vcf = CONCAT_MANTA_TUMOR.out.vcf + + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) + + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) + } + + manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + + emit: + versions = ch_versions + manta_vcf +} From 81dccf7c8bb1b48dbc61c05c8d7992f5ef4faa23 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 09:25:17 +0100 Subject: [PATCH 09/52] Fix import path --- .../local/variantcalling/manta_somatic.nf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index 4b4b2e7cd7..c48ced6b38 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -1,12 +1,12 @@ -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { MANTA_SOMATIC } from '../../modules/local/manta/somatic/main' +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' +include { MANTA_SOMATIC } from '../../../modules/local/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: From 0f6b580995584da38d6fb2aaa916627a006fc182 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 09:39:23 +0100 Subject: [PATCH 10/52] Comment, typos and beautify deepvariant sw --- .../local/variantcalling/deepvariant.nf | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/subworkflows/local/variantcalling/deepvariant.nf b/subworkflows/local/variantcalling/deepvariant.nf index f55d24f53e..0e4c846c88 100644 --- a/subworkflows/local/variantcalling/deepvariant.nf +++ b/subworkflows/local/variantcalling/deepvariant.nf @@ -1,29 +1,26 @@ -include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../modules/local/concat_vcf/main' -include { DEEPVARIANT } from '../../../modules/nf-core/modules/deepvariant/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../modules/nf-core/modules/tabix/tabix/main' +include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../modules/local/concat_vcf/main' +include { DEEPVARIANT } from '../../../modules/nf-core/modules/deepvariant/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../modules/nf-core/modules/tabix/tabix/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../modules/nf-core/modules/tabix/tabix/main' //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine // Deepvariant: https://github.com/google/deepvariant/issues/510 workflow RUN_DEEPVARIANT { take: - cram_recalibrated_intervals // channel: [mandatory] - fasta - fasta_fai - intervals_bed_combine_gz - num_intervals + cram // channel: [mandatory] [meta, cram, crai, interval] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. main: ch_versions = Channel.empty() - DEEPVARIANT( - cram_recalibrated_intervals, - fasta, - fasta_fai) + DEEPVARIANT(cram, fasta, fasta_fai) // Only when no intervals TABIX_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) @@ -41,7 +38,7 @@ workflow RUN_DEEPVARIANT { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) + intervals_bed_gz) CONCAT_DEEPVARIANT_GVCF( BGZIP_VC_DEEPVARIANT_GVCF.out.vcf @@ -51,9 +48,10 @@ workflow RUN_DEEPVARIANT { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) + intervals_bed_gz) - deepvariant_vcf = channel.empty().mix( + // Mix output channels for "no intervals" and "with intervals" results + deepvariant_vcf = Channel.empty().mix( CONCAT_DEEPVARIANT_GVCF.out.vcf, CONCAT_DEEPVARIANT_VCF.out.vcf, DEEPVARIANT.out.gvcf.join(TABIX_VC_DEEPVARIANT_GVCF.out.tbi), From 89358317ed58e7bb092ad145de8e84a05fcf0209 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 10:23:34 +0100 Subject: [PATCH 11/52] Comment, typos and beautify manta_germline sw --- .../local/germline_variant_calling.nf | 14 +- subworkflows/local/pair_variant_calling.nf | 142 +++++++++--------- subworkflows/local/tumor_variant_calling.nf | 38 ++--- .../local/variantcalling/freebayes.nf | 16 +- .../local/variantcalling/manta_germline.nf | 25 +-- 5 files changed, 119 insertions(+), 116 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 8f64f2eda3..991ac43946 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -20,13 +20,15 @@ workflow GERMLINE_VARIANT_CALLING { intervals // channel: [mandatory] intervals/target regions intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed intervals_bed_combine_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed in one file - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file + intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped in one file num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS // joint_germline // val: true/false on whether to run joint_germline calling, only works in combination with haplotypecaller at the moment main: ch_versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and everything can be handeled with "when" in the modules.config deepvariant_vcf = Channel.empty() freebayes_vcf = Channel.empty() haplotypecaller_gvcf = Channel.empty() @@ -57,19 +59,17 @@ workflow GERMLINE_VARIANT_CALLING { // DEEPVARIANT if(params.tools.contains('deepvariant')){ RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) - } + // FREEBAYES if (params.tools.contains('freebayes')){ - // Remap channel for Freebayes + // Remap channel for Freebayes cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals .map{ meta, cram, crai, intervals -> [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) @@ -97,8 +97,8 @@ workflow GERMLINE_VARIANT_CALLING { RUN_MANTA(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, - num_intervals, - intervals_bed_combine_gz) + intervals_bed_combine_gz, + num_intervals) ch_versions = ch_versions.mix(RUN_MANTA.out.versions) manta_vcf = RUN_MANTA.out.manta_vcf diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 5f627c2f84..59c4c5788a 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -65,81 +65,81 @@ workflow PAIR_VARIANT_CALLING { [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, new_bed, new_tbi] } - if (tools.contains('manta')) { - RUN_MANTA_SOMATIC(cram_pair_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals, - intervals_bed_combine_gz) - } + // if (tools.contains('manta')) { + // RUN_MANTA_SOMATIC(cram_pair_intervals_gz_tbi, + // fasta, + // fasta_fai, + // num_intervals, + // intervals_bed_combine_gz) + // } - cram_pair_strelka = Channel.empty() - if (tools.contains('strelka') && tools.contains('manta')) { - cram_pair_strelka = cram_pair.join(manta_somatic_sv_vcf).combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, new_bed, new_tbi] - } - } else if (tools.contains('strelka') && !tools.contains('manta')) { - cram_pair_strelka = cram_pair.combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], new_bed, new_tbi] - } - } + // cram_pair_strelka = Channel.empty() + // if (tools.contains('strelka') && tools.contains('manta')) { + // cram_pair_strelka = cram_pair.join(manta_somatic_sv_vcf).combine(intervals_bed_gz_tbi) + // .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi -> + // normal_id = meta.normal_id + // tumor_id = meta.tumor_id + + // new_bed = bed.simpleName != "no_intervals" ? bed : [] + // new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] + // id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id + // new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] + // [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, new_bed, new_tbi] + // } + // } else if (tools.contains('strelka') && !tools.contains('manta')) { + // cram_pair_strelka = cram_pair.combine(intervals_bed_gz_tbi) + // .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> + // normal_id = meta.normal_id + // tumor_id = meta.tumor_id + + // new_bed = bed.simpleName != "no_intervals" ? bed : [] + // new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] + // id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id + // new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] + + // [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], new_bed, new_tbi] + // } + // } - if (tools.contains('strelka')) { + // if (tools.contains('strelka')) { - STRELKA_SOMATIC( - cram_pair_strelka, - fasta, - fasta_fai - ) - - if (no_intervals) { - strelka_snvs_vcf_gz = STRELKA_SOMATIC.out.vcf_snvs - strelka_indels_vcf_gz = STRELKA_SOMATIC.out.vcf_indels - } else { - BGZIP_VC_STRELKA_SNVS(STRELKA_SOMATIC.out.vcf_snvs) - BGZIP_VC_STRELKA_INDELS(STRELKA_SOMATIC.out.vcf_indels) - - strelka_snvs_vcf_to_concat = BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - strelka_indels_vcf_to_concat = BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - CONCAT_STRELKA_SNVS(strelka_snvs_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_STRELKA_INDELS(strelka_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - - strelka_snvs_vcf_gz = CONCAT_STRELKA_SNVS.out.vcf - strelka_indels_vcf_gz = CONCAT_STRELKA_INDELS.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) - } + // STRELKA_SOMATIC( + // cram_pair_strelka, + // fasta, + // fasta_fai + // ) - strelka_vcf = strelka_vcf.mix(strelka_snvs_vcf_gz,strelka_indels_vcf_gz) - } + // if (no_intervals) { + // strelka_snvs_vcf_gz = STRELKA_SOMATIC.out.vcf_snvs + // strelka_indels_vcf_gz = STRELKA_SOMATIC.out.vcf_indels + // } else { + // BGZIP_VC_STRELKA_SNVS(STRELKA_SOMATIC.out.vcf_snvs) + // BGZIP_VC_STRELKA_INDELS(STRELKA_SOMATIC.out.vcf_indels) + + // strelka_snvs_vcf_to_concat = BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> + // new_meta = meta.clone() + // new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + // [new_meta, vcf] + // }.groupTuple(size: num_intervals) + + // strelka_indels_vcf_to_concat = BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> + // new_meta = meta.clone() + // new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + // [new_meta, vcf] + // }.groupTuple(size: num_intervals) + + // CONCAT_STRELKA_SNVS(strelka_snvs_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) + // CONCAT_STRELKA_INDELS(strelka_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) + + // strelka_snvs_vcf_gz = CONCAT_STRELKA_SNVS.out.vcf + // strelka_indels_vcf_gz = CONCAT_STRELKA_INDELS.out.vcf + + // ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) + // ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) + // } + + // strelka_vcf = strelka_vcf.mix(strelka_snvs_vcf_gz,strelka_indels_vcf_gz) + // } if (tools.contains('msisensorpro')) { diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 0eeb34f1ba..d86398d9c2 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -93,29 +93,29 @@ workflow TUMOR_ONLY_VARIANT_CALLING { //mutect2_vcf_tbi = mutect2_vcf_tbi.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf_gz_tbi) } - if (tools.contains('manta')){ - //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too + // if (tools.contains('manta')){ + // //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals, - intervals_bed_combine_gz) + // RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, + // fasta, + // fasta_fai, + // num_intervals, + // intervals_bed_combine_gz) - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) - ch_versions = ch_versions.mix(RUN_MANTA.out.versions) - } + // manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + // ch_versions = ch_versions.mix(RUN_MANTA.out.versions) + // } - if (tools.contains('strelka')) { - RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals, - intervals_bed_combine_gz) - ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) - strelka_vcf = RUN_STRELKA.out.strelka_vcf + // if (tools.contains('strelka')) { + // RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, + // fasta, + // fasta_fai, + // num_intervals, + // intervals_bed_combine_gz) + // ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) + // strelka_vcf = RUN_STRELKA.out.strelka_vcf - } + // } // if (tools.contains('tiddit')){ diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf index 4a4c81700f..644d582355 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/local/variantcalling/freebayes.nf @@ -3,19 +3,20 @@ include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../modules/loc include { FREEBAYES } from '../../../modules/nf-core/modules/freebayes/main' include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/modules/tabix/tabix/main' - workflow RUN_FREEBAYES { take: - cram_recalibrated_intervals_freebayes - fasta - fasta_fai + cram // channel: [mandatory] [meta, cram, crai, [], [], interval] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. main: ch_versions = Channel.empty() FREEBAYES( - cram_recalibrated_intervals_freebayes, + cram, fasta, fasta_fai, [], [], []) @@ -34,8 +35,9 @@ workflow RUN_FREEBAYES { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) + intervals_bed_gz) + // Mix output channels for "no intervals" and "with intervals" results freebayes_vcf = Channel.empty().mix( CONCAT_FREEBAYES.out.vcf, FREEBAYES.out.vcf.join(TABIX_VC_FREEBAYES.out.tbi)) @@ -46,6 +48,6 @@ workflow RUN_FREEBAYES { ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) emit: - versions = ch_versions freebayes_vcf + versions = ch_versions } diff --git a/subworkflows/local/variantcalling/manta_germline.nf b/subworkflows/local/variantcalling/manta_germline.nf index 8b3c5a23c7..390c4c8b7c 100644 --- a/subworkflows/local/variantcalling/manta_germline.nf +++ b/subworkflows/local/variantcalling/manta_germline.nf @@ -6,22 +6,22 @@ include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/loc include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' include { MANTA_GERMLINE } from '../../../modules/local/manta/germline/main' +// TODO: Research if splitting by intervals is ok, we pretend for now it is fine. +// Seems to be the consensus on upstream modules implementation too workflow RUN_MANTA { take: - cram_recalibrated_intervals_gz_tbi - fasta - fasta_fai - num_intervals - intervals_bed_combine_gz + cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. main: ch_versions = Channel.empty() - // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. - // Seems to be the consensus on upstream modules implementation too MANTA_GERMLINE( - cram_recalibrated_intervals_gz_tbi, + cram, fasta, fasta_fai) @@ -55,7 +55,7 @@ workflow RUN_MANTA { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) + intervals_bed_gz) BGZIP_VC_MANTA_SMALL_INDELS(MANTA_GERMLINE.out.candidate_small_indels_vcf) @@ -67,7 +67,7 @@ workflow RUN_MANTA { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) + intervals_bed_gz) BGZIP_VC_MANTA_SV(MANTA_GERMLINE.out.candidate_sv_vcf) @@ -79,8 +79,9 @@ workflow RUN_MANTA { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) + intervals_bed_gz) + // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( CONCAT_MANTA_DIPLOID.out.vcf, CONCAT_MANTA_SMALL_INDELS.out.vcf, @@ -98,6 +99,6 @@ workflow RUN_MANTA { ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) emit: - versions = ch_versions manta_vcf + versions = ch_versions } From 87fd89b7a50d6f758ef621ff87f6a93898f0af9f Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 10:32:11 +0100 Subject: [PATCH 12/52] Comment, typos and beautify strelka sw --- .../local/germline_variant_calling.nf | 4 +-- .../local/variantcalling/manta_germline.nf | 5 +--- subworkflows/local/variantcalling/strelka.nf | 27 +++++++++---------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 991ac43946..4078db3389 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -109,8 +109,8 @@ workflow GERMLINE_VARIANT_CALLING { RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, - num_intervals, - intervals_bed_combine_gz) + intervals_bed_combine_gz, + num_intervals) ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) strelka_vcf = RUN_STRELKA.out.strelka_vcf diff --git a/subworkflows/local/variantcalling/manta_germline.nf b/subworkflows/local/variantcalling/manta_germline.nf index 390c4c8b7c..489f815b59 100644 --- a/subworkflows/local/variantcalling/manta_germline.nf +++ b/subworkflows/local/variantcalling/manta_germline.nf @@ -20,10 +20,7 @@ workflow RUN_MANTA { ch_versions = Channel.empty() - MANTA_GERMLINE( - cram, - fasta, - fasta_fai) + MANTA_GERMLINE(cram, fasta, fasta_fai) // Figure out if using intervals or no_intervals MANTA_GERMLINE.out.candidate_small_indels_vcf.groupTuple(size: num_intervals) diff --git a/subworkflows/local/variantcalling/strelka.nf b/subworkflows/local/variantcalling/strelka.nf index e8d5770553..d2d729b735 100644 --- a/subworkflows/local/variantcalling/strelka.nf +++ b/subworkflows/local/variantcalling/strelka.nf @@ -4,25 +4,21 @@ include { CONCAT_VCF as CONCAT_STRELKA } from '../../../modules/loc include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../modules/local/concat_vcf/main' include { STRELKA_GERMLINE } from '../../../modules/nf-core/modules/strelka/germline/main' +// TODO: Research if splitting by intervals is ok, we pretend for now it is fine. +// Seems to be the consensus on upstream modules implementation too workflow RUN_STRELKA { take: - cram_recalibrated_intervals_gz_tbi - fasta - fasta_fai - num_intervals - intervals_bed_combine_gz + cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. main: ch_versions = Channel.empty() - // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. - // Seems to be the consensus on upstream modules implementation too - - STRELKA_GERMLINE( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai) + STRELKA_GERMLINE(cram, fasta, fasta_fai) // Figure out if using intervals or no_intervals STRELKA_GERMLINE.out.vcf.groupTuple(size: num_intervals) @@ -48,7 +44,7 @@ workflow RUN_STRELKA { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) + intervals_bed_gz) BGZIP_VC_STRELKA_GENOME(STRELKA_GERMLINE.out.genome_vcf) @@ -60,8 +56,9 @@ workflow RUN_STRELKA { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) + intervals_bed_gz) + // Mix output channels for "no intervals" and "with intervals" results strelka_vcf = Channel.empty().mix( CONCAT_STRELKA.out.vcf, CONCAT_STRELKA_GENOME.out.vcf, @@ -73,6 +70,6 @@ workflow RUN_STRELKA { ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) emit: - versions = ch_versions strelka_vcf + versions = ch_versions } From 5854a8aa7d84b3086cb847d6bb9320000e5a8ad3 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 10:41:55 +0100 Subject: [PATCH 13/52] Rename from RUN_TOOL to TOOL --- .../local/germline_variant_calling.nf | 38 +++++++++---------- .../local/variantcalling/deepvariant.nf | 2 +- .../local/variantcalling/freebayes.nf | 2 +- .../local/variantcalling/haplotypecaller.nf | 2 +- .../local/variantcalling/manta_germline.nf | 2 +- subworkflows/local/variantcalling/strelka.nf | 2 +- subworkflows/local/variantcalling/tiddit.nf | 2 +- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 4078db3389..313fbbcbc8 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -2,12 +2,12 @@ // GERMLINE VARIANT CALLING // -include { RUN_DEEPVARIANT } from './variantcalling/deepvariant.nf' -include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' -include { RUN_HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' -include { RUN_MANTA } from './variantcalling/manta_germline.nf' -include { RUN_STRELKA } from './variantcalling/strelka.nf' -//include { RUN_TIDDIT } from './variantcalling/tiddit.nf' +include { DEEPVARIANT } from './variantcalling/deepvariant.nf' +include { FREEBAYES } from './variantcalling/freebayes.nf' +include { HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' +include { MANTA } from './variantcalling/manta_germline.nf' +include { STRELKA } from './variantcalling/strelka.nf' +//include { TIDDIT } from './variantcalling/tiddit.nf' workflow GERMLINE_VARIANT_CALLING { take: @@ -58,7 +58,7 @@ workflow GERMLINE_VARIANT_CALLING { // DEEPVARIANT if(params.tools.contains('deepvariant')){ - RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) + DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) } @@ -70,14 +70,14 @@ workflow GERMLINE_VARIANT_CALLING { .map{ meta, cram, crai, intervals -> [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) + FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) } // HAPLOTYPECALLER if (params.tools.contains('haplotypecaller')){ - RUN_HAPLOTYPECALLER(cram_recalibrated_intervals, + HAPLOTYPECALLER(cram_recalibrated_intervals, fasta, fasta_fai, dict, @@ -94,11 +94,11 @@ workflow GERMLINE_VARIANT_CALLING { // MANTA if (params.tools.contains('manta')){ - RUN_MANTA(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - intervals_bed_combine_gz, - num_intervals) + MANTA(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) ch_versions = ch_versions.mix(RUN_MANTA.out.versions) manta_vcf = RUN_MANTA.out.manta_vcf @@ -106,11 +106,11 @@ workflow GERMLINE_VARIANT_CALLING { // STRELKA if (params.tools.contains('strelka')){ - RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - intervals_bed_combine_gz, - num_intervals) + STRELKA(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) strelka_vcf = RUN_STRELKA.out.strelka_vcf diff --git a/subworkflows/local/variantcalling/deepvariant.nf b/subworkflows/local/variantcalling/deepvariant.nf index 0e4c846c88..698e97ef75 100644 --- a/subworkflows/local/variantcalling/deepvariant.nf +++ b/subworkflows/local/variantcalling/deepvariant.nf @@ -8,7 +8,7 @@ include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../modules/nf-c //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine // Deepvariant: https://github.com/google/deepvariant/issues/510 -workflow RUN_DEEPVARIANT { +workflow DEEPVARIANT { take: cram // channel: [mandatory] [meta, cram, crai, interval] fasta // channel: [mandatory] diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf index 644d582355..fc13be3ba1 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/local/variantcalling/freebayes.nf @@ -3,7 +3,7 @@ include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../modules/loc include { FREEBAYES } from '../../../modules/nf-core/modules/freebayes/main' include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/modules/tabix/tabix/main' -workflow RUN_FREEBAYES { +workflow FREEBAYES { take: cram // channel: [mandatory] [meta, cram, crai, [], [], interval] fasta // channel: [mandatory] diff --git a/subworkflows/local/variantcalling/haplotypecaller.nf b/subworkflows/local/variantcalling/haplotypecaller.nf index a4208acec4..3556abf8cc 100644 --- a/subworkflows/local/variantcalling/haplotypecaller.nf +++ b/subworkflows/local/variantcalling/haplotypecaller.nf @@ -5,7 +5,7 @@ include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../modules/nf- include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../modules/nf-core/modules/tabix/tabix/main' -workflow RUN_HAPLOTYPECALLER { +workflow HAPLOTYPECALLER { take: cram_recalibrated_intervals fasta diff --git a/subworkflows/local/variantcalling/manta_germline.nf b/subworkflows/local/variantcalling/manta_germline.nf index 489f815b59..611d3c3c8d 100644 --- a/subworkflows/local/variantcalling/manta_germline.nf +++ b/subworkflows/local/variantcalling/manta_germline.nf @@ -8,7 +8,7 @@ include { MANTA_GERMLINE } from '../../../modules/loc // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too -workflow RUN_MANTA { +workflow MANTA { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] fasta // channel: [mandatory] diff --git a/subworkflows/local/variantcalling/strelka.nf b/subworkflows/local/variantcalling/strelka.nf index d2d729b735..9a453d3c5d 100644 --- a/subworkflows/local/variantcalling/strelka.nf +++ b/subworkflows/local/variantcalling/strelka.nf @@ -6,7 +6,7 @@ include { STRELKA_GERMLINE } from '../../../modules/nf- // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too -workflow RUN_STRELKA { +workflow STRELKA { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] fasta // channel: [mandatory] diff --git a/subworkflows/local/variantcalling/tiddit.nf b/subworkflows/local/variantcalling/tiddit.nf index 87ec6050b1..354697101e 100644 --- a/subworkflows/local/variantcalling/tiddit.nf +++ b/subworkflows/local/variantcalling/tiddit.nf @@ -2,7 +2,7 @@ include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../modules/nf- include { TIDDIT_SV } from '../../../modules/nf-core/modules/tiddit/sv/main' //TODO: UNDER CONSTRUCTIONS -workflow RUN_TIDDIT { +workflow TIDDIT { take: From fed5f2a2356c4ad65a0231427c0c7e20a7c4dc7b Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 10:42:58 +0100 Subject: [PATCH 14/52] Rename MANTA to MANTA_GERMLINE --- subworkflows/local/germline_variant_calling.nf | 12 ++++++------ subworkflows/local/variantcalling/manta_germline.nf | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 313fbbcbc8..010c7f5310 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -5,7 +5,7 @@ include { DEEPVARIANT } from './variantcalling/deepvariant.nf' include { FREEBAYES } from './variantcalling/freebayes.nf' include { HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' -include { MANTA } from './variantcalling/manta_germline.nf' +include { MANTA_GERMLINE } from './variantcalling/manta_germline.nf' include { STRELKA } from './variantcalling/strelka.nf' //include { TIDDIT } from './variantcalling/tiddit.nf' @@ -94,11 +94,11 @@ workflow GERMLINE_VARIANT_CALLING { // MANTA if (params.tools.contains('manta')){ - MANTA(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - intervals_bed_combine_gz, - num_intervals) + MANTA_GERMLINE (cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) ch_versions = ch_versions.mix(RUN_MANTA.out.versions) manta_vcf = RUN_MANTA.out.manta_vcf diff --git a/subworkflows/local/variantcalling/manta_germline.nf b/subworkflows/local/variantcalling/manta_germline.nf index 611d3c3c8d..58abec65eb 100644 --- a/subworkflows/local/variantcalling/manta_germline.nf +++ b/subworkflows/local/variantcalling/manta_germline.nf @@ -8,7 +8,7 @@ include { MANTA_GERMLINE } from '../../../modules/loc // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too -workflow MANTA { +workflow MANTA_GERMLINE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] fasta // channel: [mandatory] From d42eb45d10c82e00e4a17a5827e624b309b9661e Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 11:19:45 +0100 Subject: [PATCH 15/52] Start including mutect with new when syntax --- conf/modules.config | 44 +++---- .../local/germline_variant_calling.nf | 2 +- subworkflows/local/tumor_variant_calling.nf | 50 ++++---- .../local/variantcalling/manta_tumoronly.nf | 111 ++++++++++-------- 4 files changed, 112 insertions(+), 95 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 4f2b06b0a6..dd08e387ba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -616,29 +616,43 @@ process{ // TUMOR_VARIANT_CALLING - withName: 'MERGEMUTECTSTATS' { - ext.prefix = { "${meta.id}.vcf.gz" } + //MUTECT2 + withName: 'FILTERMUTECTCALLS'{ + ext.prefix = {"${meta.id}.filtered."} + publishDir = [ + enabled: true, + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } + ] } + withName: 'GATHERPILEUPSUMMARIES' { ext.prefix = { "${meta.id}.table" } } -// PAIR_VARIANT_CALLING - - withName: 'MUTECT2'{ + withName: 'MERGEMUTECTSTATS' { + ext.prefix = { "${meta.id}.vcf.gz" } publishDir = [ - enabled: "${params.no_intervals}", + enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] } - withName: 'GATK4_MUTECT2'{ + + withName: 'MUTECT2'{ + ext.when = { params.tools && params.tools.contains('mutect2') } + ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" } publishDir = [ enabled: "${params.no_intervals}", mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] } + + + +// PAIR_VARIANT_CALLING + withName: 'CONCAT_MUTECT2' { publishDir = [ enabled: "${!params.no_intervals}", @@ -646,21 +660,7 @@ process{ path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] } - withName: 'GATK4_MERGEMUTECTSTATS' { - publishDir = [ - enabled: true, - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } - ] - } - withName: 'GATK4_FILTERMUTECTCALLS'{ - ext.prefix = {"${meta.id}.filtered."} - publishDir = [ - enabled: true, - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } - ] - } + } // withName: 'GATK4_CALCULATECONTAMINATION'{ // ext.args = '' diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 010c7f5310..f19c5332c4 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -28,7 +28,7 @@ workflow GERMLINE_VARIANT_CALLING { ch_versions = Channel.empty() - //TODO: Temporary until the if's can be removed and everything can be handeled with "when" in the modules.config + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config deepvariant_vcf = Channel.empty() freebayes_vcf = Channel.empty() haplotypecaller_gvcf = Channel.empty() diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index d86398d9c2..395f4daf3a 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -5,9 +5,9 @@ include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' -include { RUN_STRELKA } from './variantcalling/strelka.nf' -include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' -include { RUN_MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' +include { STRELKA } from './variantcalling/strelka.nf' +include { FREEBAYES } from './variantcalling/freebayes.nf' +include { MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: @@ -31,9 +31,9 @@ workflow TUMOR_ONLY_VARIANT_CALLING { main: - if(!tools) tools = "" - ch_versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config freebayes_vcf = Channel.empty() manta_vcf = Channel.empty() mutect2_vcf = Channel.empty() @@ -65,7 +65,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) + FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) } @@ -93,29 +93,29 @@ workflow TUMOR_ONLY_VARIANT_CALLING { //mutect2_vcf_tbi = mutect2_vcf_tbi.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf_gz_tbi) } - // if (tools.contains('manta')){ - // //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too + if (tools.contains('manta')){ + //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - // RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, - // fasta, - // fasta_fai, - // num_intervals, - // intervals_bed_combine_gz) + MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) - // manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) - // ch_versions = ch_versions.mix(RUN_MANTA.out.versions) - // } + manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + ch_versions = ch_versions.mix(RUN_MANTA.out.versions) + } - // if (tools.contains('strelka')) { - // RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, - // fasta, - // fasta_fai, - // num_intervals, - // intervals_bed_combine_gz) - // ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) - // strelka_vcf = RUN_STRELKA.out.strelka_vcf + if (tools.contains('strelka')) { + RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) + ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) + strelka_vcf = RUN_STRELKA.out.strelka_vcf - // } + } // if (tools.contains('tiddit')){ diff --git a/subworkflows/local/variantcalling/manta_tumoronly.nf b/subworkflows/local/variantcalling/manta_tumoronly.nf index ea47c2ff6d..6cbbe95266 100644 --- a/subworkflows/local/variantcalling/manta_tumoronly.nf +++ b/subworkflows/local/variantcalling/manta_tumoronly.nf @@ -6,76 +6,93 @@ include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../modules/local/concat_vcf/main' include { MANTA_TUMORONLY } from '../../../modules/local/manta/tumoronly/main' -workflow RUN_MANTA_TUMORONLY { +// TODO: Research if splitting by intervals is ok, we pretend for now it is fine. +// Seems to be the consensus on upstream modules implementation too +workflow MANTA_TUMORONLY { take: - cram_recalibrated_intervals_gz_tbi - fasta - fasta_fai - num_intervals - intervals_bed_combine_gz + cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. main: ch_versions = Channel.empty() - MANTA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai - ) - ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) + MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai) - if(no_intervals){ - manta_candidate_small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf - manta_tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf - }else{ + // Figure out if using intervals or no_intervals + MANTA_TUMORONLY.out.candidate_small_indels_vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{manta_small_indels_vcf} - BGZIP_VC_MANTA_SV(MANTA_TUMORONLY.out.candidate_small_indels_vcf) - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_TUMORONLY.out.candidate_sv_vcf) - BGZIP_VC_MANTA_TUMOR(MANTA_TUMORONLY.out.tumor_sv_vcf) + MANTA_TUMORONLY.out.candidate_sv_vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{manta_candidate_sv_vcf} - BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + MANTA_TUMORONLY.out.tumor_sv_vcf.groupTuple(size: num_intervals) + .branch{ + intervals: it[1].size() > 1 + no_intervals: it[1].size() == 1 + }.set{manta_tumor_sv_vcf} + + //Only when using intervals + BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf) + + CONCAT_MANTA_SMALL_INDELS( + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_sv_vcf_to_concat} + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_MANTA_SV(manta_candidate_sv_vcf) - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + CONCAT_MANTA_SV( + BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_small_indels_vcf_to_concat} + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) - BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> + BGZIP_VC_MANTA_TUMOR(manta_tumor_sv_vcf) + + CONCAT_MANTA_TUMOR( + BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_tumor_sv_vcf_to_concat} - - CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_TUMOR(manta_tumor_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) - manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf - manta_tumor_sv_vcf = CONCAT_MANTA_TUMOR.out.vcf + manta_vcf = Channel.empty().mix( + CONCAT_MANTA_SMALL_INDELS.out.vcf, + CONCAT_MANTA_SV.out.vcf, + CONCAT_MANTA_TUMOR.out.vcf, + manta_small_indels_vcf.no_intervals, + manta_candidate_sv_vcf.no_intervals, + manta_tumor_sv_vcf.no_intervals + ) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) - - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) - } - - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) + ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) emit: - versions = ch_versions manta_vcf + versions = ch_versions } From 176136c156af7881081910320215dc7a4c6c35a4 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 11:27:42 +0100 Subject: [PATCH 16/52] rename subworkflows when accessing output --- .../local/germline_variant_calling.nf | 25 +++++----- subworkflows/local/tumor_variant_calling.nf | 49 +++++++++---------- 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index f19c5332c4..7acdacba1b 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -59,8 +59,9 @@ workflow GERMLINE_VARIANT_CALLING { // DEEPVARIANT if(params.tools.contains('deepvariant')){ DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf - ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) + + deepvariant_vcf = DEEPVARIANT.out.deepvariant_vcf + ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) } // FREEBAYES @@ -71,8 +72,9 @@ workflow GERMLINE_VARIANT_CALLING { [meta, cram, crai, [], [], intervals] } FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) - freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf - ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) + + freebayes_vcf = FREEBAYES.out.freebayes_vcf + ch_versions = ch_versions.mix(FREEBAYES.out.versions) } // HAPLOTYPECALLER @@ -86,9 +88,10 @@ workflow GERMLINE_VARIANT_CALLING { num_intervals, intervals_bed_combine_gz, intervals_bed_combine_gz_tbi) - ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) - haplotypecaller_gvcf = RUN_HAPLOTYPECALLER.out.haplotypecaller_gvcf - genotype_gvcf = RUN_HAPLOTYPECALLER.out.genotype_gvcf + + haplotypecaller_gvcf = HAPLOTYPECALLER.out.haplotypecaller_gvcf + genotype_gvcf = HAPLOTYPECALLER.out.genotype_gvcf + ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) } @@ -99,9 +102,9 @@ workflow GERMLINE_VARIANT_CALLING { fasta_fai, intervals_bed_combine_gz, num_intervals) - ch_versions = ch_versions.mix(RUN_MANTA.out.versions) - manta_vcf = RUN_MANTA.out.manta_vcf + manta_vcf = MANTA_GERMLINE.out.manta_vcf + ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) } // STRELKA @@ -111,9 +114,9 @@ workflow GERMLINE_VARIANT_CALLING { fasta_fai, intervals_bed_combine_gz, num_intervals) - ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) - strelka_vcf = RUN_STRELKA.out.strelka_vcf + strelka_vcf = STRELKA.out.strelka_vcf + ch_versions = ch_versions.mix(STRELKA.out.versions) } //TIDDIT diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 395f4daf3a..0c5440a156 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -66,31 +66,30 @@ workflow TUMOR_ONLY_VARIANT_CALLING { } FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) - freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf - ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) + + freebayes_vcf = FREEBAYES.out.freebayes_vcf + ch_versions = ch_versions.mix(FREEBAYES.out.versions) } if (tools.contains('mutect2')) { which_norm = [] cram_recalibrated_intervals.map{ meta, cram, crai, intervals -> [meta, cram, crai, intervals, which_norm]}.set{cram_recalibrated_mutect2} - GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING( - cram_recalibrated_mutect2, - fasta, - fasta_fai, - dict, - germline_resource, - germline_resource_tbi, - panel_of_normals, - panel_of_normals_tbi, - num_intervals, - no_intervals, - intervals_bed_combine_gz - ) + GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING(cram_recalibrated_mutect2, + fasta, + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + panel_of_normals, + panel_of_normals_tbi, + num_intervals, + no_intervals, + intervals_bed_combine_gz) + //mutect2_vcf_tbi = mutect2_vcf_tbi.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf_gz_tbi) ch_versions = ch_versions.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.versions) - //mutect2_vcf_tbi = mutect2_vcf_tbi.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf_gz_tbi) } if (tools.contains('manta')){ @@ -102,19 +101,19 @@ workflow TUMOR_ONLY_VARIANT_CALLING { num_intervals, intervals_bed_combine_gz) - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) - ch_versions = ch_versions.mix(RUN_MANTA.out.versions) + manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) } if (tools.contains('strelka')) { - RUN_STRELKA(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals, - intervals_bed_combine_gz) - ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) - strelka_vcf = RUN_STRELKA.out.strelka_vcf + STRELKA(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) + strelka_vcf = STRELKA.out.strelka_vcf + ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) } From 6b120a6f4bd26bb504d9c662bb7b5270a62e6d65 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 11:41:31 +0100 Subject: [PATCH 17/52] Rename Strelka sw to STRELKA_SINGLE --- .../local/germline_variant_calling.nf | 8 +++---- subworkflows/local/tumor_variant_calling.nf | 21 +++++++++---------- .../{strelka.nf => strelka_single.nf} | 2 +- 3 files changed, 15 insertions(+), 16 deletions(-) rename subworkflows/local/variantcalling/{strelka.nf => strelka_single.nf} (99%) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 7acdacba1b..419d1ad76d 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -6,7 +6,7 @@ include { DEEPVARIANT } from './variantcalling/deepvariant.nf' include { FREEBAYES } from './variantcalling/freebayes.nf' include { HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' include { MANTA_GERMLINE } from './variantcalling/manta_germline.nf' -include { STRELKA } from './variantcalling/strelka.nf' +include { STRELKA_SINGLE } from './variantcalling/strelka_single.nf' //include { TIDDIT } from './variantcalling/tiddit.nf' workflow GERMLINE_VARIANT_CALLING { @@ -109,14 +109,14 @@ workflow GERMLINE_VARIANT_CALLING { // STRELKA if (params.tools.contains('strelka')){ - STRELKA(cram_recalibrated_intervals_gz_tbi, + STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - strelka_vcf = STRELKA.out.strelka_vcf - ch_versions = ch_versions.mix(STRELKA.out.versions) + strelka_vcf = STRELKA_SINGLE.out.strelka_vcf + ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) } //TIDDIT diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 0c5440a156..5bf6b7c2f7 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -3,11 +3,10 @@ // Should be only run on patients without normal sample // - -include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' -include { STRELKA } from './variantcalling/strelka.nf' include { FREEBAYES } from './variantcalling/freebayes.nf' +include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' include { MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' +include { STRELKA_SINGLE } from './variantcalling/strelka_single.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: @@ -106,14 +105,14 @@ workflow TUMOR_ONLY_VARIANT_CALLING { } if (tools.contains('strelka')) { - STRELKA(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals, - intervals_bed_combine_gz) - - strelka_vcf = STRELKA.out.strelka_vcf - ch_versions = ch_versions.mix(RUN_STRELKA.out.versions) + STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) + + strelka_vcf = STRELKA_SINGLE.out.strelka_vcf + ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) } diff --git a/subworkflows/local/variantcalling/strelka.nf b/subworkflows/local/variantcalling/strelka_single.nf similarity index 99% rename from subworkflows/local/variantcalling/strelka.nf rename to subworkflows/local/variantcalling/strelka_single.nf index 9a453d3c5d..101ce3788e 100644 --- a/subworkflows/local/variantcalling/strelka.nf +++ b/subworkflows/local/variantcalling/strelka_single.nf @@ -6,7 +6,7 @@ include { STRELKA_GERMLINE } from '../../../modules/nf- // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too -workflow STRELKA { +workflow STRELKA_SINGLE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] fasta // channel: [mandatory] From 4f49c77f9e4608feaf3fe6f3d470888d3814b638 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 14:24:05 +0100 Subject: [PATCH 18/52] Name back to RUN_TOOL, subworkflows and modules can't have the same name, subworkflow will take priority over included module --- .../local/germline_variant_calling.nf | 42 +++---- subworkflows/local/pair_variant_calling.nf | 103 ++++++------------ subworkflows/local/tumor_variant_calling.nf | 24 ++-- .../local/variantcalling/deepvariant.nf | 2 +- .../local/variantcalling/freebayes.nf | 2 +- .../local/variantcalling/haplotypecaller.nf | 2 +- .../local/variantcalling/manta_germline.nf | 20 ++-- .../local/variantcalling/manta_tumoronly.nf | 8 +- .../local/variantcalling/strelka_single.nf | 12 +- .../local/variantcalling/strelka_somatic.nf | 73 +++++++++++++ subworkflows/local/variantcalling/tiddit.nf | 2 +- 11 files changed, 162 insertions(+), 128 deletions(-) create mode 100644 subworkflows/local/variantcalling/strelka_somatic.nf diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 419d1ad76d..8d4b8a2cc2 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -2,11 +2,11 @@ // GERMLINE VARIANT CALLING // -include { DEEPVARIANT } from './variantcalling/deepvariant.nf' -include { FREEBAYES } from './variantcalling/freebayes.nf' -include { HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' -include { MANTA_GERMLINE } from './variantcalling/manta_germline.nf' -include { STRELKA_SINGLE } from './variantcalling/strelka_single.nf' +include { RUN_DEEPVARIANT } from './variantcalling/deepvariant.nf' +include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' +include { RUN_HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' +include { RUN_MANTA_GERMLINE } from './variantcalling/manta_germline.nf' +include { RUN_STRELKA_SINGLE } from './variantcalling/strelka_single.nf' //include { TIDDIT } from './variantcalling/tiddit.nf' workflow GERMLINE_VARIANT_CALLING { @@ -58,10 +58,10 @@ workflow GERMLINE_VARIANT_CALLING { // DEEPVARIANT if(params.tools.contains('deepvariant')){ - DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) + RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - deepvariant_vcf = DEEPVARIANT.out.deepvariant_vcf - ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) + deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf + ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) } // FREEBAYES @@ -71,15 +71,15 @@ workflow GERMLINE_VARIANT_CALLING { .map{ meta, cram, crai, intervals -> [meta, cram, crai, [], [], intervals] } - FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) - freebayes_vcf = FREEBAYES.out.freebayes_vcf - ch_versions = ch_versions.mix(FREEBAYES.out.versions) + freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf + ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) } // HAPLOTYPECALLER if (params.tools.contains('haplotypecaller')){ - HAPLOTYPECALLER(cram_recalibrated_intervals, + RUN_HAPLOTYPECALLER(cram_recalibrated_intervals, fasta, fasta_fai, dict, @@ -89,34 +89,34 @@ workflow GERMLINE_VARIANT_CALLING { intervals_bed_combine_gz, intervals_bed_combine_gz_tbi) - haplotypecaller_gvcf = HAPLOTYPECALLER.out.haplotypecaller_gvcf - genotype_gvcf = HAPLOTYPECALLER.out.genotype_gvcf - ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) + haplotypecaller_gvcf = RUN_HAPLOTYPECALLER.out.haplotypecaller_gvcf + genotype_gvcf = RUN_HAPLOTYPECALLER.out.genotype_gvcf + ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) } // MANTA if (params.tools.contains('manta')){ - MANTA_GERMLINE (cram_recalibrated_intervals_gz_tbi, + RUN_MANTA_GERMLINE (cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - manta_vcf = MANTA_GERMLINE.out.manta_vcf - ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) + manta_vcf = RUN_MANTA_GERMLINE.out.manta_vcf + ch_versions = ch_versions.mix(RUN_MANTA_GERMLINE.out.versions) } // STRELKA if (params.tools.contains('strelka')){ - STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, + RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - strelka_vcf = STRELKA_SINGLE.out.strelka_vcf - ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) + strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf + ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) } //TIDDIT diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 59c4c5788a..770cefb3c7 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -1,15 +1,11 @@ // // PAIRED VARIANT CALLING // -include { BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../modules/local/concat_vcf/main' include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main' include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main' -include { STRELKA_SOMATIC } from '../../modules/nf-core/modules/strelka/somatic/main' -include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' +include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' +include { RUN_STRELKA_SOMATIC } from './variantcalling/strelka_somatic.nf' workflow PAIR_VARIANT_CALLING { take: @@ -73,73 +69,38 @@ workflow PAIR_VARIANT_CALLING { // intervals_bed_combine_gz) // } - // cram_pair_strelka = Channel.empty() - // if (tools.contains('strelka') && tools.contains('manta')) { - // cram_pair_strelka = cram_pair.join(manta_somatic_sv_vcf).combine(intervals_bed_gz_tbi) - // .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi -> - // normal_id = meta.normal_id - // tumor_id = meta.tumor_id - - // new_bed = bed.simpleName != "no_intervals" ? bed : [] - // new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - // id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - // new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - // [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, new_bed, new_tbi] - // } - // } else if (tools.contains('strelka') && !tools.contains('manta')) { - // cram_pair_strelka = cram_pair.combine(intervals_bed_gz_tbi) - // .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> - // normal_id = meta.normal_id - // tumor_id = meta.tumor_id - - // new_bed = bed.simpleName != "no_intervals" ? bed : [] - // new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - // id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - // new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - - // [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], new_bed, new_tbi] - // } - // } - - // if (tools.contains('strelka')) { - - // STRELKA_SOMATIC( - // cram_pair_strelka, - // fasta, - // fasta_fai - // ) - - // if (no_intervals) { - // strelka_snvs_vcf_gz = STRELKA_SOMATIC.out.vcf_snvs - // strelka_indels_vcf_gz = STRELKA_SOMATIC.out.vcf_indels - // } else { - // BGZIP_VC_STRELKA_SNVS(STRELKA_SOMATIC.out.vcf_snvs) - // BGZIP_VC_STRELKA_INDELS(STRELKA_SOMATIC.out.vcf_indels) - - // strelka_snvs_vcf_to_concat = BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> - // new_meta = meta.clone() - // new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - // [new_meta, vcf] - // }.groupTuple(size: num_intervals) - - // strelka_indels_vcf_to_concat = BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> - // new_meta = meta.clone() - // new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - // [new_meta, vcf] - // }.groupTuple(size: num_intervals) - - // CONCAT_STRELKA_SNVS(strelka_snvs_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - // CONCAT_STRELKA_INDELS(strelka_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - - // strelka_snvs_vcf_gz = CONCAT_STRELKA_SNVS.out.vcf - // strelka_indels_vcf_gz = CONCAT_STRELKA_INDELS.out.vcf + cram_pair_strelka = Channel.empty() + if (tools.contains('strelka') && tools.contains('manta')) { + cram_pair_strelka = cram_pair.join(manta_somatic_sv_vcf).combine(intervals_bed_gz_tbi) + .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi -> + normal_id = meta.normal_id + tumor_id = meta.tumor_id + + new_bed = bed.simpleName != "no_intervals" ? bed : [] + new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] + id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id + new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] + [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, new_bed, new_tbi] + } + } else if (tools.contains('strelka') && !tools.contains('manta')) { + cram_pair_strelka = cram_pair.combine(intervals_bed_gz_tbi) + .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> + normal_id = meta.normal_id + tumor_id = meta.tumor_id + + new_bed = bed.simpleName != "no_intervals" ? bed : [] + new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] + id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id + new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] + + [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], new_bed, new_tbi] + } + } - // ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) - // ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) - // } + if (tools.contains('strelka')) { + RUN_STRELKA_SOMATIC(cram_pair_strelka, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - // strelka_vcf = strelka_vcf.mix(strelka_snvs_vcf_gz,strelka_indels_vcf_gz) - // } + } if (tools.contains('msisensorpro')) { diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 5bf6b7c2f7..05a30e8b5b 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -3,10 +3,10 @@ // Should be only run on patients without normal sample // -include { FREEBAYES } from './variantcalling/freebayes.nf' +include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' -include { MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' -include { STRELKA_SINGLE } from './variantcalling/strelka_single.nf' +include { RUN_MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' +include { RUN_STRELKA_SINGLE } from './variantcalling/strelka_single.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: @@ -64,10 +64,10 @@ workflow TUMOR_ONLY_VARIANT_CALLING { [meta, cram, crai, [], [], intervals] } - FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) - freebayes_vcf = FREEBAYES.out.freebayes_vcf - ch_versions = ch_versions.mix(FREEBAYES.out.versions) + freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf + ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) } if (tools.contains('mutect2')) { @@ -94,25 +94,25 @@ workflow TUMOR_ONLY_VARIANT_CALLING { if (tools.contains('manta')){ //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, + RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, num_intervals, intervals_bed_combine_gz) - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) - ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) + manta_vcf = RUN_MANTA_TUMORONLY.out.manta_vcf + ch_versions = ch_versions.mix(RUN_MANTA_TUMORONLY.out.versions) } if (tools.contains('strelka')) { - STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, + RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - strelka_vcf = STRELKA_SINGLE.out.strelka_vcf - ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) + strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf + ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) } diff --git a/subworkflows/local/variantcalling/deepvariant.nf b/subworkflows/local/variantcalling/deepvariant.nf index 698e97ef75..0e4c846c88 100644 --- a/subworkflows/local/variantcalling/deepvariant.nf +++ b/subworkflows/local/variantcalling/deepvariant.nf @@ -8,7 +8,7 @@ include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../modules/nf-c //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine // Deepvariant: https://github.com/google/deepvariant/issues/510 -workflow DEEPVARIANT { +workflow RUN_DEEPVARIANT { take: cram // channel: [mandatory] [meta, cram, crai, interval] fasta // channel: [mandatory] diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf index fc13be3ba1..644d582355 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/local/variantcalling/freebayes.nf @@ -3,7 +3,7 @@ include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../modules/loc include { FREEBAYES } from '../../../modules/nf-core/modules/freebayes/main' include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/modules/tabix/tabix/main' -workflow FREEBAYES { +workflow RUN_FREEBAYES { take: cram // channel: [mandatory] [meta, cram, crai, [], [], interval] fasta // channel: [mandatory] diff --git a/subworkflows/local/variantcalling/haplotypecaller.nf b/subworkflows/local/variantcalling/haplotypecaller.nf index 3556abf8cc..a4208acec4 100644 --- a/subworkflows/local/variantcalling/haplotypecaller.nf +++ b/subworkflows/local/variantcalling/haplotypecaller.nf @@ -5,7 +5,7 @@ include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../modules/nf- include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../modules/nf-core/modules/tabix/tabix/main' -workflow HAPLOTYPECALLER { +workflow RUN_HAPLOTYPECALLER { take: cram_recalibrated_intervals fasta diff --git a/subworkflows/local/variantcalling/manta_germline.nf b/subworkflows/local/variantcalling/manta_germline.nf index 58abec65eb..7d9eb7b614 100644 --- a/subworkflows/local/variantcalling/manta_germline.nf +++ b/subworkflows/local/variantcalling/manta_germline.nf @@ -8,7 +8,7 @@ include { MANTA_GERMLINE } from '../../../modules/loc // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too -workflow MANTA_GERMLINE { +workflow RUN_MANTA_GERMLINE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] fasta // channel: [mandatory] @@ -42,10 +42,10 @@ workflow MANTA_GERMLINE { }.set{manta_diploid_sv_vcf} // Only when using intervals - BGZIP_VC_MANTA_DIPLOID(MANTA_GERMLINE.out.diploid_sv_vcf) + BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) - CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.vcf + CONCAT_MANTA_SMALL_INDELS( + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample @@ -54,10 +54,10 @@ workflow MANTA_GERMLINE { fasta_fai, intervals_bed_gz) - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_GERMLINE.out.candidate_small_indels_vcf) + BGZIP_VC_MANTA_SV(manta_sv_vcf.intervals) - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf + CONCAT_MANTA_SV( + BGZIP_VC_MANTA_SV.out.vcf .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample @@ -66,10 +66,10 @@ workflow MANTA_GERMLINE { fasta_fai, intervals_bed_gz) - BGZIP_VC_MANTA_SV(MANTA_GERMLINE.out.candidate_sv_vcf) + BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.vcf + CONCAT_MANTA_DIPLOID( + BGZIP_VC_MANTA_DIPLOID.out.vcf .map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample diff --git a/subworkflows/local/variantcalling/manta_tumoronly.nf b/subworkflows/local/variantcalling/manta_tumoronly.nf index 6cbbe95266..c27f04c733 100644 --- a/subworkflows/local/variantcalling/manta_tumoronly.nf +++ b/subworkflows/local/variantcalling/manta_tumoronly.nf @@ -8,7 +8,7 @@ include { MANTA_TUMORONLY } from '../../../modules/local // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too -workflow MANTA_TUMORONLY { +workflow RUN_MANTA_TUMORONLY { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] fasta // channel: [mandatory] @@ -42,7 +42,7 @@ workflow MANTA_TUMORONLY { }.set{manta_tumor_sv_vcf} //Only when using intervals - BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf) + BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) CONCAT_MANTA_SMALL_INDELS( BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> @@ -53,7 +53,7 @@ workflow MANTA_TUMORONLY { fasta_fai, intervals_bed_gz) - BGZIP_VC_MANTA_SV(manta_candidate_sv_vcf) + BGZIP_VC_MANTA_SV(manta_candidate_sv_vcf.intervals) CONCAT_MANTA_SV( BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> @@ -64,7 +64,7 @@ workflow MANTA_TUMORONLY { fasta_fai, intervals_bed_gz) - BGZIP_VC_MANTA_TUMOR(manta_tumor_sv_vcf) + BGZIP_VC_MANTA_TUMOR(manta_tumor_sv_vcf.intervals) CONCAT_MANTA_TUMOR( BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> diff --git a/subworkflows/local/variantcalling/strelka_single.nf b/subworkflows/local/variantcalling/strelka_single.nf index 101ce3788e..42d32ec5ff 100644 --- a/subworkflows/local/variantcalling/strelka_single.nf +++ b/subworkflows/local/variantcalling/strelka_single.nf @@ -1,12 +1,12 @@ include { BGZIP as BGZIP_VC_STRELKA } from '../../../modules/local/bgzip' include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../modules/local/concat_vcf/main' -include { STRELKA_GERMLINE } from '../../../modules/nf-core/modules/strelka/germline/main' +include { CONCAT_VCF as CONCAT_STRELKA } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../modules/local/concat_vcf/main' +include { STRELKA_GERMLINE } from '../../../modules/nf-core/modules/strelka/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too -workflow STRELKA_SINGLE { +workflow RUN_STRELKA_SINGLE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] fasta // channel: [mandatory] @@ -34,7 +34,7 @@ workflow STRELKA_SINGLE { }.set{strelka_genome_vcf} // Only when using intervals - BGZIP_VC_STRELKA(STRELKA_GERMLINE.out.vcf) + BGZIP_VC_STRELKA(strelka_vcf.intervals) CONCAT_STRELKA( BGZIP_VC_STRELKA.out.vcf @@ -46,7 +46,7 @@ workflow STRELKA_SINGLE { fasta_fai, intervals_bed_gz) - BGZIP_VC_STRELKA_GENOME(STRELKA_GERMLINE.out.genome_vcf) + BGZIP_VC_STRELKA_GENOME(strelka_genome_vcf.intervals) CONCAT_STRELKA_GENOME( BGZIP_VC_STRELKA_GENOME.out.vcf diff --git a/subworkflows/local/variantcalling/strelka_somatic.nf b/subworkflows/local/variantcalling/strelka_somatic.nf new file mode 100644 index 0000000000..5285ac11ce --- /dev/null +++ b/subworkflows/local/variantcalling/strelka_somatic.nf @@ -0,0 +1,73 @@ +include { BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../../modules/local/concat_vcf/main' +include { STRELKA_SOMATIC } from '../../../modules/nf-core/modules/strelka/somatic/main' + +// TODO: Research if splitting by intervals is ok, we pretend for now it is fine. +// Seems to be the consensus on upstream modules implementation too +workflow RUN_STRELKA_SOMATIC { + take: + cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, interval.bed.gz, interval.bed.gz.tbi] manta* are optional + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + cram.view() + STRELKA_SOMATIC(cram, fasta, fasta_fai ) + + // // Figure out if using intervals or no_intervals + // STRELKA_SOMATIC.out.vcf_snvs.groupTuple(size: num_intervals) + // .branch{ + // intervals: it[1].size() > 1 + // no_intervals: it[1].size() == 1 + // }.set{strelka_vcf_snvs} + + // STRELKA_SOMATIC.out.vcf_indels.groupTuple(size: num_intervals) + // .branch{ + // intervals: it[1].size() > 1 + // no_intervals: it[1].size() == 1 + // }.set{strelka_vcf_indels} + + // // Only when using intervals + // BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals) + + // CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> + // new_meta = meta.clone() + // new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + // [new_meta, vcf] + // }.groupTuple(size: num_intervals), + // fasta_fai, + // intervals_bed_gz) + + // BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals) + + // CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> + // new_meta = meta.clone() + // new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + // [new_meta, vcf] + // }.groupTuple(size: num_intervals), + // fasta_fai, + // intervals_bed_gz) + + + + // ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) + // ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) + // ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) + + strelka_vcf = Channel.empty()//.mix( + //CONCAT_STRELKA_SNVS.out.vcf, + //CONCAT_STRELKA_INDELS.out.vcf, + //strelka_vcf_snvs.no_intervals, + //strelka_vcf_indels.no_intervals) + + emit: + strelka_vcf + versions = ch_versions +} diff --git a/subworkflows/local/variantcalling/tiddit.nf b/subworkflows/local/variantcalling/tiddit.nf index 354697101e..87ec6050b1 100644 --- a/subworkflows/local/variantcalling/tiddit.nf +++ b/subworkflows/local/variantcalling/tiddit.nf @@ -2,7 +2,7 @@ include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../modules/nf- include { TIDDIT_SV } from '../../../modules/nf-core/modules/tiddit/sv/main' //TODO: UNDER CONSTRUCTIONS -workflow TIDDIT { +workflow RUN_TIDDIT { take: From c6f4b03cd367f30087b2caae8ad62233e4f4ebec Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 17:13:55 +0100 Subject: [PATCH 19/52] Remove groupTuple, do branching based on num_intervals, since meta maps have different id fields when intervals are present --- subworkflows/local/pair_variant_calling.nf | 26 +++---- .../local/variantcalling/manta_germline.nf | 21 +++--- .../local/variantcalling/manta_somatic.nf | 18 ++--- .../local/variantcalling/manta_tumoronly.nf | 23 +++--- .../local/variantcalling/strelka_single.nf | 23 +++--- .../local/variantcalling/strelka_somatic.nf | 73 +++++++++---------- 6 files changed, 85 insertions(+), 99 deletions(-) diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 770cefb3c7..880ce40400 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -30,9 +30,9 @@ workflow PAIR_VARIANT_CALLING { main: - if (!tools) tools = "" - ch_versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config manta_vcf = Channel.empty() strelka_vcf = Channel.empty() msisensorpro_output = Channel.empty() @@ -61,13 +61,13 @@ workflow PAIR_VARIANT_CALLING { [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, new_bed, new_tbi] } - // if (tools.contains('manta')) { - // RUN_MANTA_SOMATIC(cram_pair_intervals_gz_tbi, - // fasta, - // fasta_fai, - // num_intervals, - // intervals_bed_combine_gz) - // } + if (tools.contains('manta')) { + RUN_MANTA_SOMATIC(cram_pair_intervals_gz_tbi, + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) + } cram_pair_strelka = Channel.empty() if (tools.contains('strelka') && tools.contains('manta')) { @@ -99,17 +99,11 @@ workflow PAIR_VARIANT_CALLING { if (tools.contains('strelka')) { RUN_STRELKA_SOMATIC(cram_pair_strelka, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - } if (tools.contains('msisensorpro')) { - - MSISENSORPRO_MSI_SOMATIC( - cram_pair_intervals, - fasta, - msisensorpro_scan) + MSISENSORPRO_MSI_SOMATIC(cram_pair_intervals, fasta, msisensorpro_scan) ch_versions = ch_versions.mix(MSISENSORPRO_MSI_SOMATIC.out.versions) - msisensorpro_output = msisensorpro_output.mix(MSISENSORPRO_MSI_SOMATIC.out.output_report) } diff --git a/subworkflows/local/variantcalling/manta_germline.nf b/subworkflows/local/variantcalling/manta_germline.nf index 7d9eb7b614..6ce86a4720 100644 --- a/subworkflows/local/variantcalling/manta_germline.nf +++ b/subworkflows/local/variantcalling/manta_germline.nf @@ -23,22 +23,19 @@ workflow RUN_MANTA_GERMLINE { MANTA_GERMLINE(cram, fasta, fasta_fai) // Figure out if using intervals or no_intervals - MANTA_GERMLINE.out.candidate_small_indels_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 + MANTA_GERMLINE.out.candidate_small_indels_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 }.set{manta_small_indels_vcf} - MANTA_GERMLINE.out.candidate_sv_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 + MANTA_GERMLINE.out.candidate_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 }.set{manta_sv_vcf} - MANTA_GERMLINE.out.diploid_sv_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 + MANTA_GERMLINE.out.diploid_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 }.set{manta_diploid_sv_vcf} // Only when using intervals diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index c48ced6b38..310a318462 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -10,21 +10,17 @@ include { MANTA_SOMATIC } from '../../../modules/loc workflow RUN_MANTA_SOMATIC { take: - cram_pair_intervals_gz_tbi - fasta - fasta_fai - num_intervals - intervals_bed_combine_gz + cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. main: ch_versions = Channel.empty() - MANTA_SOMATIC( - cram_pair_intervals_gz_tbi, - fasta, - fasta_fai) + MANTA_SOMATIC(cram_pair_intervals_gz_tbi, fasta, fasta_fai) - ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) if (no_intervals) { manta_candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf @@ -85,6 +81,8 @@ workflow RUN_MANTA_SOMATIC { manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf,manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) + ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) + emit: versions = ch_versions diff --git a/subworkflows/local/variantcalling/manta_tumoronly.nf b/subworkflows/local/variantcalling/manta_tumoronly.nf index c27f04c733..31fe396cc7 100644 --- a/subworkflows/local/variantcalling/manta_tumoronly.nf +++ b/subworkflows/local/variantcalling/manta_tumoronly.nf @@ -20,25 +20,22 @@ workflow RUN_MANTA_TUMORONLY { ch_versions = Channel.empty() - MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai) + MANTA_TUMORONLY(cram, fasta, fasta_fai) // Figure out if using intervals or no_intervals - MANTA_TUMORONLY.out.candidate_small_indels_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 + MANTA_TUMORONLY.out.candidate_small_indels_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 }.set{manta_small_indels_vcf} - MANTA_TUMORONLY.out.candidate_sv_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 + MANTA_TUMORONLY.out.candidate_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 }.set{manta_candidate_sv_vcf} - MANTA_TUMORONLY.out.tumor_sv_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 + MANTA_TUMORONLY.out.tumor_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 }.set{manta_tumor_sv_vcf} //Only when using intervals diff --git a/subworkflows/local/variantcalling/strelka_single.nf b/subworkflows/local/variantcalling/strelka_single.nf index 42d32ec5ff..2811dd48ca 100644 --- a/subworkflows/local/variantcalling/strelka_single.nf +++ b/subworkflows/local/variantcalling/strelka_single.nf @@ -21,16 +21,21 @@ workflow RUN_STRELKA_SINGLE { STRELKA_GERMLINE(cram, fasta, fasta_fai) // Figure out if using intervals or no_intervals - STRELKA_GERMLINE.out.vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{strelka_vcf} + STRELKA_GERMLINE.out.vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + } + .set{strelka_vcf} - STRELKA_GERMLINE.out.genome_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 + strelka_vcf.intervals.view() + strelka_vcf.no_intervals.view() + + // STRELKA_GERMLINE.out.vcf.view() + + STRELKA_GERMLINE.out.genome_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + } }.set{strelka_genome_vcf} // Only when using intervals diff --git a/subworkflows/local/variantcalling/strelka_somatic.nf b/subworkflows/local/variantcalling/strelka_somatic.nf index 5285ac11ce..8c1a14ccba 100644 --- a/subworkflows/local/variantcalling/strelka_somatic.nf +++ b/subworkflows/local/variantcalling/strelka_somatic.nf @@ -18,54 +18,49 @@ workflow RUN_STRELKA_SOMATIC { ch_versions = Channel.empty() - cram.view() STRELKA_SOMATIC(cram, fasta, fasta_fai ) - // // Figure out if using intervals or no_intervals - // STRELKA_SOMATIC.out.vcf_snvs.groupTuple(size: num_intervals) - // .branch{ - // intervals: it[1].size() > 1 - // no_intervals: it[1].size() == 1 - // }.set{strelka_vcf_snvs} + // Figure out if using intervals or no_intervals + STRELKA_SOMATIC.out.vcf_snvs.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{strelka_vcf_snvs} - // STRELKA_SOMATIC.out.vcf_indels.groupTuple(size: num_intervals) - // .branch{ - // intervals: it[1].size() > 1 - // no_intervals: it[1].size() == 1 - // }.set{strelka_vcf_indels} + STRELKA_SOMATIC.out.vcf_indels.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{strelka_vcf_indels} - // // Only when using intervals - // BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals) + // Only when using intervals + BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals) - // CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> - // new_meta = meta.clone() - // new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - // [new_meta, vcf] - // }.groupTuple(size: num_intervals), - // fasta_fai, - // intervals_bed_gz) + CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) - // BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals) + BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals) - // CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> - // new_meta = meta.clone() - // new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - // [new_meta, vcf] - // }.groupTuple(size: num_intervals), - // fasta_fai, - // intervals_bed_gz) + CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) - - // ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) - // ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) - // ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) - - strelka_vcf = Channel.empty()//.mix( - //CONCAT_STRELKA_SNVS.out.vcf, - //CONCAT_STRELKA_INDELS.out.vcf, - //strelka_vcf_snvs.no_intervals, - //strelka_vcf_indels.no_intervals) + strelka_vcf = Channel.empty().mix( + CONCAT_STRELKA_SNVS.out.vcf, + CONCAT_STRELKA_INDELS.out.vcf, + strelka_vcf_snvs.no_intervals, + strelka_vcf_indels.no_intervals) emit: strelka_vcf From 534c387a7690b141e20c548b5862a71973e2b7b2 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 17:57:17 +0100 Subject: [PATCH 20/52] Rework manta and strelka_somatic subworkflow --- .../local/germline_variant_calling.nf | 1 - subworkflows/local/pair_variant_calling.nf | 81 +++++------ subworkflows/local/tumor_variant_calling.nf | 18 +-- .../local/variantcalling/manta_somatic.nf | 130 +++++++++++------- .../local/variantcalling/strelka_single.nf | 6 - 5 files changed, 123 insertions(+), 113 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 8d4b8a2cc2..cff5c88663 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -92,7 +92,6 @@ workflow GERMLINE_VARIANT_CALLING { haplotypecaller_gvcf = RUN_HAPLOTYPECALLER.out.haplotypecaller_gvcf genotype_gvcf = RUN_HAPLOTYPECALLER.out.genotype_gvcf ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) - } // MANTA diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 880ce40400..e4fc560f6e 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -3,7 +3,6 @@ // include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main' include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main' - include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' include { RUN_STRELKA_SOMATIC } from './variantcalling/strelka_somatic.nf' @@ -38,17 +37,6 @@ workflow PAIR_VARIANT_CALLING { msisensorpro_output = Channel.empty() mutect2_vcf = Channel.empty() - - cram_pair_intervals = cram_pair.combine(intervals) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - new_intervals = intervals.baseName != "no_intervals" ? intervals : [] - id = new_intervals ? tumor_id + "_vs_" + normal_id + "_" + new_intervals.baseName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals] - } - cram_pair_intervals_gz_tbi = cram_pair.combine(intervals_bed_gz_tbi) .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> normal_id = meta.normal_id @@ -61,44 +49,49 @@ workflow PAIR_VARIANT_CALLING { [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, new_bed, new_tbi] } + cram_pair_intervals = cram_pair.combine(intervals) + .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> + normal_id = meta.normal_id + tumor_id = meta.tumor_id + new_intervals = intervals.baseName != "no_intervals" ? intervals : [] + id = new_intervals ? tumor_id + "_vs_" + normal_id + "_" + new_intervals.baseName : tumor_id + "_vs_" + normal_id + new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ] + [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals] + } + if (tools.contains('manta')) { - RUN_MANTA_SOMATIC(cram_pair_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals, - intervals_bed_combine_gz) + RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) + + manta_vcf = RUN_MANTA_SOMATIC.out.manta_vcf + ch_versions = ch_versions.mix(RUN_MANTA_SOMATIC.out.versions) } - cram_pair_strelka = Channel.empty() - if (tools.contains('strelka') && tools.contains('manta')) { - cram_pair_strelka = cram_pair.join(manta_somatic_sv_vcf).combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, new_bed, new_tbi] + if (tools.contains('strelka')) { + + if (tools.contains('manta')) { + cram_pair_strelka = intervals_bed_gz_tbi.join(manta_somatic_sv_vcf).map{ + meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi, manta_vcf, manta_tbi -> + [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi] } - } else if (tools.contains('strelka') && !tools.contains('manta')) { - cram_pair_strelka = cram_pair.combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], new_bed, new_tbi] + } else { + cram_pair_strelka = cram_pair_intervals_gz_tbi.map{ + meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> + [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], bed, tbi] } - } + } - if (tools.contains('strelka')) { - RUN_STRELKA_SOMATIC(cram_pair_strelka, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) + RUN_STRELKA_SOMATIC(cram_pair_strelka, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) + + strelka_vcf = RUN_STRELKA_SOMATIC.out.strelka_vcf + ch_versions = ch_versions.mix(RUN_STRELKA_SOMATIC.out.versions) } if (tools.contains('msisensorpro')) { diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 05a30e8b5b..72767ab8d4 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -95,21 +95,21 @@ workflow TUMOR_ONLY_VARIANT_CALLING { //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - num_intervals, - intervals_bed_combine_gz) + fasta, + fasta_fai, + num_intervals, + intervals_bed_combine_gz) manta_vcf = RUN_MANTA_TUMORONLY.out.manta_vcf ch_versions = ch_versions.mix(RUN_MANTA_TUMORONLY.out.versions) } if (tools.contains('strelka')) { - RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - intervals_bed_combine_gz, - num_intervals) + RUN_STRELKA_SINGLE( cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index 310a318462..20132cc978 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -1,12 +1,12 @@ -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' -include { MANTA_SOMATIC } from '../../../modules/local/manta/somatic/main' +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' +include { MANTA_SOMATIC } from '../../../modules/local/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: @@ -19,71 +19,95 @@ workflow RUN_MANTA_SOMATIC { main: ch_versions = Channel.empty() - MANTA_SOMATIC(cram_pair_intervals_gz_tbi, fasta, fasta_fai) + MANTA_SOMATIC(cram, fasta, fasta_fai) - if (no_intervals) { - manta_candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf - manta_diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf - manta_somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf - } else { - BGZIP_VC_MANTA_SV(MANTA_SOMATIC.out.candidate_small_indels_vcf) - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_SOMATIC.out.candidate_sv_vcf) - BGZIP_VC_MANTA_DIPLOID(MANTA_SOMATIC.out.diploid_sv_vcf) - BGZIP_VC_MANTA_SOMATIC(MANTA_SOMATIC.out.somatic_sv_vcf) + MANTA_SOMATIC.out.candidate_small_indels_vcf..branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_candidate_small_indels_vcf} - manta_sv_vcf_to_concat = BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + MANTA_SOMATIC.out.candidate_sv_vcf..branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_candidate_sv_vcf} + + MANTA_SOMATIC.out.diploid_sv_vcf..branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_diploid_sv_vcf} + + MANTA_SOMATIC.out.somatic_sv_vcf..branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_somatic_sv_vcf} + + //Only when using intervals + + BGZIP_VC_MANTA_SV(manta_candidate_small_indels_vcf.intervals) + BGZIP_VC_MANTA_SMALL_INDELS(manta_candidate_sv_vcf.intervals) + BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) + BGZIP_VC_MANTA_SOMATIC(manta_somatic_sv_vcf.intervals) + + CONCAT_MANTA_SV( + BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] - }.groupTuple(size: num_intervals) + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) - manta_small_indels_vcf_to_concat = BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + CONCAT_MANTA_SMALL_INDELS( + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] - }.groupTuple(size: num_intervals) + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) - manta_diploid_vcf_to_concat = BGZIP_VC_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> + CONCAT_MANTA_DIPLOID( + BGZIP_VC_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] - }.groupTuple(size: num_intervals) + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) - manta_somatic_sv_vcf_to_concat = BGZIP_VC_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> + CONCAT_MANTA_SOMATIC( + BGZIP_VC_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id [new_meta, vcf] - }.groupTuple(size: num_intervals) - - CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_DIPLOID(manta_diploid_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SOMATIC(manta_somatic_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf - manta_diploid_sv_vcf = CONCAT_MANTA_DIPLOID.out.vcf - manta_somatic_sv_vcf = CONCAT_MANTA_SOMATIC.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) - - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) - - } - - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf,manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) - + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) + manta_vcf = Channel.empty().mix( + CONCAT_MANTA_SV.out.vcf, + CONCAT_MANTA_SMALL_INDELS.out.vcf, + CONCAT_MANTA_DIPLOID.out.vcf, + CONCAT_MANTA_SOMATIC.out.vcf, + manta_candidate_small_indels_vcf.no_intervals, + manta_candidate_sv_vcf.no_intervals, + manta_diploid_sv_vcf.no_intervals, + manta_somatic_sv_vc.no_intervalsf + ) + emit: + manta_vcf versions = ch_versions } diff --git a/subworkflows/local/variantcalling/strelka_single.nf b/subworkflows/local/variantcalling/strelka_single.nf index 2811dd48ca..d66a97093f 100644 --- a/subworkflows/local/variantcalling/strelka_single.nf +++ b/subworkflows/local/variantcalling/strelka_single.nf @@ -27,15 +27,9 @@ workflow RUN_STRELKA_SINGLE { } .set{strelka_vcf} - strelka_vcf.intervals.view() - strelka_vcf.no_intervals.view() - - // STRELKA_GERMLINE.out.vcf.view() - STRELKA_GERMLINE.out.genome_vcf.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 - } }.set{strelka_genome_vcf} // Only when using intervals From 580c3f22442294597f4d44c7b0afd7447b0a11b8 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 18:00:26 +0100 Subject: [PATCH 21/52] Reorder to follow same structure everywhere --- subworkflows/local/variantcalling/manta_somatic.nf | 10 +++++++--- subworkflows/local/variantcalling/strelka_single.nf | 3 +-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index 20132cc978..ae9fc0f0f9 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -45,9 +45,6 @@ workflow RUN_MANTA_SOMATIC { //Only when using intervals BGZIP_VC_MANTA_SV(manta_candidate_small_indels_vcf.intervals) - BGZIP_VC_MANTA_SMALL_INDELS(manta_candidate_sv_vcf.intervals) - BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) - BGZIP_VC_MANTA_SOMATIC(manta_somatic_sv_vcf.intervals) CONCAT_MANTA_SV( BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> @@ -58,6 +55,8 @@ workflow RUN_MANTA_SOMATIC { fasta_fai, intervals_bed_gz) + BGZIP_VC_MANTA_SMALL_INDELS(manta_candidate_sv_vcf.intervals) + CONCAT_MANTA_SMALL_INDELS( BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> new_meta = meta.clone() @@ -67,6 +66,8 @@ workflow RUN_MANTA_SOMATIC { fasta_fai, intervals_bed_gz) + BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) + CONCAT_MANTA_DIPLOID( BGZIP_VC_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> new_meta = meta.clone() @@ -76,6 +77,9 @@ workflow RUN_MANTA_SOMATIC { fasta_fai, intervals_bed_gz) + + BGZIP_VC_MANTA_SOMATIC(manta_somatic_sv_vcf.intervals) + CONCAT_MANTA_SOMATIC( BGZIP_VC_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> new_meta = meta.clone() diff --git a/subworkflows/local/variantcalling/strelka_single.nf b/subworkflows/local/variantcalling/strelka_single.nf index d66a97093f..6a6051bf7b 100644 --- a/subworkflows/local/variantcalling/strelka_single.nf +++ b/subworkflows/local/variantcalling/strelka_single.nf @@ -24,8 +24,7 @@ workflow RUN_STRELKA_SINGLE { STRELKA_GERMLINE.out.vcf.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 - } - .set{strelka_vcf} + }.set{strelka_vcf} STRELKA_GERMLINE.out.genome_vcf.branch{ intervals: num_intervals > 1 From 71daee5f5cd12c5b811019293890ac8b4baa5f80 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 18:02:54 +0100 Subject: [PATCH 22/52] Add missing versions --- subworkflows/local/variantcalling/haplotypecaller.nf | 1 + subworkflows/local/variantcalling/strelka_single.nf | 2 ++ subworkflows/local/variantcalling/strelka_somatic.nf | 2 ++ 3 files changed, 5 insertions(+) diff --git a/subworkflows/local/variantcalling/haplotypecaller.nf b/subworkflows/local/variantcalling/haplotypecaller.nf index a4208acec4..c0128f64d4 100644 --- a/subworkflows/local/variantcalling/haplotypecaller.nf +++ b/subworkflows/local/variantcalling/haplotypecaller.nf @@ -112,6 +112,7 @@ workflow RUN_HAPLOTYPECALLER { ch_versions = ch_versions.mix(BGZIP_VC_HAPLOTYPECALLER.out.versions) ch_versions = ch_versions.mix(CONCAT_HAPLOTYPECALLER.out.versions) ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) + //ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) ch_versions = ch_versions.mix(TABIX_VC_HAPLOTYPECALLER.out.versions) diff --git a/subworkflows/local/variantcalling/strelka_single.nf b/subworkflows/local/variantcalling/strelka_single.nf index 6a6051bf7b..c2ef225ec2 100644 --- a/subworkflows/local/variantcalling/strelka_single.nf +++ b/subworkflows/local/variantcalling/strelka_single.nf @@ -64,7 +64,9 @@ workflow RUN_STRELKA_SINGLE { strelka_vcf.no_intervals) ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) emit: diff --git a/subworkflows/local/variantcalling/strelka_somatic.nf b/subworkflows/local/variantcalling/strelka_somatic.nf index 8c1a14ccba..bd8dcc7501 100644 --- a/subworkflows/local/variantcalling/strelka_somatic.nf +++ b/subworkflows/local/variantcalling/strelka_somatic.nf @@ -53,7 +53,9 @@ workflow RUN_STRELKA_SOMATIC { intervals_bed_gz) ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_INDELS.out.versions) ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA_INDELS.out.versions) ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) strelka_vcf = Channel.empty().mix( From 24373f96bd980e09ac64ef9a63f0890775b009dd Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 18:06:24 +0100 Subject: [PATCH 23/52] indent comments --- subworkflows/local/pair_variant_calling.nf | 36 +++++++++---------- subworkflows/local/tumor_variant_calling.nf | 38 ++++++++++----------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index e4fc560f6e..c2efd66316 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -3,29 +3,29 @@ // include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main' include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main' -include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' -include { RUN_STRELKA_SOMATIC } from './variantcalling/strelka_somatic.nf' +include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' +include { RUN_STRELKA_SOMATIC } from './variantcalling/strelka_somatic.nf' workflow PAIR_VARIANT_CALLING { take: tools - cram_pair // channel: [mandatory] cram - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combined_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file - num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS + cram_pair // channel: [mandatory] cram + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + intervals // channel: [mandatory] intervals/target regions + intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combined_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file + num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS no_intervals - msisensorpro_scan // channel: [optional] msisensorpro_scan - germline_resource // channel: [optional] germline_resource - germline_resource_tbi // channel: [optional] germline_resource_tbi - panel_of_normals // channel: [optional] panel_of_normals - panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi + msisensorpro_scan // channel: [optional] msisensorpro_scan + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource_tbi + panel_of_normals // channel: [optional] panel_of_normals + panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi main: diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 72767ab8d4..076aa269db 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -3,30 +3,30 @@ // Should be only run on patients without normal sample // -include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' +include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' -include { RUN_MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' -include { RUN_STRELKA_SINGLE } from './variantcalling/strelka_single.nf' +include { RUN_MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' +include { RUN_STRELKA_SINGLE } from './variantcalling/strelka_single.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: - tools // Mandatory, list of tools to apply - cram_recalibrated // channel: [mandatory] cram - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file - num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS + tools // Mandatory, list of tools to apply + cram_recalibrated // channel: [mandatory] cram + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + intervals // channel: [mandatory] intervals/target regions + intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combine_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file + num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS no_intervals - germline_resource - germline_resource_tbi // channel - panel_of_normals - panel_of_normals_tbi + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource_tbi + panel_of_normals // channel: [optional] panel_of_normals + panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi main: From 8a94abaee7c551b1979b714b2f1e286c3eae94f2 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 18:12:31 +0100 Subject: [PATCH 24/52] Formatting --- subworkflows/local/variantcalling/manta_somatic.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index ae9fc0f0f9..d811c76d84 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -22,7 +22,8 @@ workflow RUN_MANTA_SOMATIC { MANTA_SOMATIC(cram, fasta, fasta_fai) - MANTA_SOMATIC.out.candidate_small_indels_vcf..branch{ + // Figure out if using intervals or no_intervals + MANTA_SOMATIC.out.candidate_small_indels_vcf.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 }.set{manta_candidate_small_indels_vcf} @@ -43,7 +44,6 @@ workflow RUN_MANTA_SOMATIC { }.set{manta_somatic_sv_vcf} //Only when using intervals - BGZIP_VC_MANTA_SV(manta_candidate_small_indels_vcf.intervals) CONCAT_MANTA_SV( From fdfcb0f32ac3426b38801c08044e803727f2d02f Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 18:14:36 +0100 Subject: [PATCH 25/52] Formatting --- .../local/variantcalling/manta_somatic.nf | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index d811c76d84..d5c21f13c1 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -89,16 +89,7 @@ workflow RUN_MANTA_SOMATIC { fasta_fai, intervals_bed_gz) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) - ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) - + // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( CONCAT_MANTA_SV.out.vcf, CONCAT_MANTA_SMALL_INDELS.out.vcf, @@ -110,6 +101,16 @@ workflow RUN_MANTA_SOMATIC { manta_somatic_sv_vc.no_intervalsf ) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) + ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) + emit: manta_vcf versions = ch_versions From 6b4bdbb038ccc8b6f279e382fe7a4cb98ec197f5 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 18:16:26 +0100 Subject: [PATCH 26/52] More Formatting & comments --- .../local/variantcalling/manta_tumoronly.nf | 1 + .../local/variantcalling/strelka_somatic.nf | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/variantcalling/manta_tumoronly.nf b/subworkflows/local/variantcalling/manta_tumoronly.nf index 31fe396cc7..b127813c89 100644 --- a/subworkflows/local/variantcalling/manta_tumoronly.nf +++ b/subworkflows/local/variantcalling/manta_tumoronly.nf @@ -72,6 +72,7 @@ workflow RUN_MANTA_TUMORONLY { fasta_fai, intervals_bed_gz) + // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( CONCAT_MANTA_SMALL_INDELS.out.vcf, CONCAT_MANTA_SV.out.vcf, diff --git a/subworkflows/local/variantcalling/strelka_somatic.nf b/subworkflows/local/variantcalling/strelka_somatic.nf index bd8dcc7501..f5767c534a 100644 --- a/subworkflows/local/variantcalling/strelka_somatic.nf +++ b/subworkflows/local/variantcalling/strelka_somatic.nf @@ -52,18 +52,19 @@ workflow RUN_STRELKA_SOMATIC { fasta_fai, intervals_bed_gz) - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_INDELS.out.versions) - ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) - + // Mix output channels for "no intervals" and "with intervals" results strelka_vcf = Channel.empty().mix( CONCAT_STRELKA_SNVS.out.vcf, CONCAT_STRELKA_INDELS.out.vcf, strelka_vcf_snvs.no_intervals, strelka_vcf_indels.no_intervals) + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA_INDELS.out.versions) + ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) + emit: strelka_vcf versions = ch_versions From 6bfb86ff7286c1c48c9eb9f59851af62e7677751 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 18:30:51 +0100 Subject: [PATCH 27/52] Sort input correctly helps a lot, also fix typos --- subworkflows/local/tumor_variant_calling.nf | 4 ++-- subworkflows/local/variantcalling/manta_somatic.nf | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 076aa269db..7ab31d9178 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -97,8 +97,8 @@ workflow TUMOR_ONLY_VARIANT_CALLING { RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, fasta, fasta_fai, - num_intervals, - intervals_bed_combine_gz) + intervals_bed_combine_gz, + num_intervals) manta_vcf = RUN_MANTA_TUMORONLY.out.manta_vcf ch_versions = ch_versions.mix(RUN_MANTA_TUMORONLY.out.versions) diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index d5c21f13c1..0a8de96efb 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -28,17 +28,17 @@ workflow RUN_MANTA_SOMATIC { no_intervals: num_intervals == 1 }.set{manta_candidate_small_indels_vcf} - MANTA_SOMATIC.out.candidate_sv_vcf..branch{ + MANTA_SOMATIC.out.candidate_sv_vcf.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 }.set{manta_candidate_sv_vcf} - MANTA_SOMATIC.out.diploid_sv_vcf..branch{ + MANTA_SOMATIC.out.diploid_sv_vcf.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 }.set{manta_diploid_sv_vcf} - MANTA_SOMATIC.out.somatic_sv_vcf..branch{ + MANTA_SOMATIC.out.somatic_sv_vcf.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 }.set{manta_somatic_sv_vcf} @@ -98,7 +98,7 @@ workflow RUN_MANTA_SOMATIC { manta_candidate_small_indels_vcf.no_intervals, manta_candidate_sv_vcf.no_intervals, manta_diploid_sv_vcf.no_intervals, - manta_somatic_sv_vc.no_intervalsf + manta_somatic_sv_vcf.no_intervals ) ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) From 9412cde2bd27cb26945e01973585d53d210e5774 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 18:44:39 +0100 Subject: [PATCH 28/52] Use nf-core/manta --- modules.json | 9 +++++++++ .../modules}/manta/germline/main.nf | 1 + .../modules}/manta/germline/meta.yml | 16 ++++++++-------- .../modules}/manta/somatic/main.nf | 0 .../modules}/manta/somatic/meta.yml | 16 ++++++++-------- .../modules}/manta/tumoronly/main.nf | 0 .../modules}/manta/tumoronly/meta.yml | 16 ++++++++-------- subworkflows/local/variantcalling/freebayes.nf | 8 ++++---- .../local/variantcalling/haplotypecaller.nf | 12 ++++++------ .../local/variantcalling/manta_germline.nf | 14 +++++++------- .../local/variantcalling/manta_somatic.nf | 3 +-- .../local/variantcalling/manta_tumoronly.nf | 2 +- .../local/variantcalling/strelka_single.nf | 4 ++-- 13 files changed, 55 insertions(+), 46 deletions(-) rename modules/{local => nf-core/modules}/manta/germline/main.nf (99%) rename modules/{local => nf-core/modules}/manta/germline/meta.yml (100%) rename modules/{local => nf-core/modules}/manta/somatic/main.nf (100%) rename modules/{local => nf-core/modules}/manta/somatic/meta.yml (100%) rename modules/{local => nf-core/modules}/manta/tumoronly/main.nf (100%) rename modules/{local => nf-core/modules}/manta/tumoronly/meta.yml (100%) diff --git a/modules.json b/modules.json index 8024315c10..3aa090862a 100644 --- a/modules.json +++ b/modules.json @@ -114,6 +114,15 @@ "gatk4/variantrecalibrator": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "manta/germline": { + "git_sha": "979e57b7ac6a405a395dd7a6dbe1a275c5bc226b" + }, + "manta/somatic": { + "git_sha": "979e57b7ac6a405a395dd7a6dbe1a275c5bc226b" + }, + "manta/tumoronly": { + "git_sha": "979e57b7ac6a405a395dd7a6dbe1a275c5bc226b" + }, "msisensorpro/msi_somatic": { "git_sha": "c8ebd0de36c649a14fc92f2f73cbd9f691a8ce0a" }, diff --git a/modules/local/manta/germline/main.nf b/modules/nf-core/modules/manta/germline/main.nf similarity index 99% rename from modules/local/manta/germline/main.nf rename to modules/nf-core/modules/manta/germline/main.nf index 1fa6aa9614..ef6bd4a32e 100644 --- a/modules/local/manta/germline/main.nf +++ b/modules/nf-core/modules/manta/germline/main.nf @@ -12,6 +12,7 @@ process MANTA_GERMLINE { path fasta path fasta_fai + output: tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf tuple val(meta), path("*candidate_small_indels.vcf.gz.tbi"), emit: candidate_small_indels_vcf_tbi diff --git a/modules/local/manta/germline/meta.yml b/modules/nf-core/modules/manta/germline/meta.yml similarity index 100% rename from modules/local/manta/germline/meta.yml rename to modules/nf-core/modules/manta/germline/meta.yml index d6297eadb6..b719f0756c 100644 --- a/modules/local/manta/germline/meta.yml +++ b/modules/nf-core/modules/manta/germline/meta.yml @@ -31,14 +31,6 @@ input: type: file description: BAM/CRAM/SAM index file. For joint calling use a list of files. pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fasta_fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - target_bed: type: file description: BED file containing target regions for variant calling @@ -47,6 +39,14 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + - fasta_fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" output: - meta: diff --git a/modules/local/manta/somatic/main.nf b/modules/nf-core/modules/manta/somatic/main.nf similarity index 100% rename from modules/local/manta/somatic/main.nf rename to modules/nf-core/modules/manta/somatic/main.nf diff --git a/modules/local/manta/somatic/meta.yml b/modules/nf-core/modules/manta/somatic/meta.yml similarity index 100% rename from modules/local/manta/somatic/meta.yml rename to modules/nf-core/modules/manta/somatic/meta.yml index ec9cc86957..457d66a5fd 100644 --- a/modules/local/manta/somatic/meta.yml +++ b/modules/nf-core/modules/manta/somatic/meta.yml @@ -39,14 +39,6 @@ input: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - target_bed: type: file description: BED file containing target regions for variant calling @@ -55,6 +47,14 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" output: - meta: diff --git a/modules/local/manta/tumoronly/main.nf b/modules/nf-core/modules/manta/tumoronly/main.nf similarity index 100% rename from modules/local/manta/tumoronly/main.nf rename to modules/nf-core/modules/manta/tumoronly/main.nf diff --git a/modules/local/manta/tumoronly/meta.yml b/modules/nf-core/modules/manta/tumoronly/meta.yml similarity index 100% rename from modules/local/manta/tumoronly/meta.yml rename to modules/nf-core/modules/manta/tumoronly/meta.yml index f902bc77a3..398d684365 100644 --- a/modules/local/manta/tumoronly/meta.yml +++ b/modules/nf-core/modules/manta/tumoronly/meta.yml @@ -31,14 +31,6 @@ input: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - target_bed: type: file description: BED file containing target regions for variant calling @@ -47,6 +39,14 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" output: - meta: diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf index 644d582355..dfb6095cb3 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/local/variantcalling/freebayes.nf @@ -1,7 +1,7 @@ -include { BGZIP as BGZIP_VC_FREEBAYES } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../../modules/nf-core/modules/freebayes/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/modules/tabix/tabix/main' +include { BGZIP as BGZIP_VC_FREEBAYES } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../modules/local/concat_vcf/main' +include { FREEBAYES } from '../../../modules/nf-core/modules/freebayes/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_FREEBAYES { take: diff --git a/subworkflows/local/variantcalling/haplotypecaller.nf b/subworkflows/local/variantcalling/haplotypecaller.nf index c0128f64d4..7b2d03ed1e 100644 --- a/subworkflows/local/variantcalling/haplotypecaller.nf +++ b/subworkflows/local/variantcalling/haplotypecaller.nf @@ -1,9 +1,9 @@ -include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../modules/local/concat_vcf/main' -include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' -include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../modules/nf-core/modules/tabix/tabix/main' +include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../modules/local/concat_vcf/main' +include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../modules/nf-core/modules/gatk4/haplotypecaller/main' +include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' +include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_HAPLOTYPECALLER { take: diff --git a/subworkflows/local/variantcalling/manta_germline.nf b/subworkflows/local/variantcalling/manta_germline.nf index 6ce86a4720..fe3ab37729 100644 --- a/subworkflows/local/variantcalling/manta_germline.nf +++ b/subworkflows/local/variantcalling/manta_germline.nf @@ -1,10 +1,10 @@ -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' -include { MANTA_GERMLINE } from '../../../modules/local/manta/germline/main' +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' +include { MANTA_GERMLINE } from '../../../modules/nf-core/modules/manta/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/local/variantcalling/manta_somatic.nf index 0a8de96efb..c9e77d7ae5 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/local/variantcalling/manta_somatic.nf @@ -6,7 +6,7 @@ include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' -include { MANTA_SOMATIC } from '../../../modules/local/manta/somatic/main' +include { MANTA_SOMATIC } from '../../../modules/nf-core/modules/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: @@ -77,7 +77,6 @@ workflow RUN_MANTA_SOMATIC { fasta_fai, intervals_bed_gz) - BGZIP_VC_MANTA_SOMATIC(manta_somatic_sv_vcf.intervals) CONCAT_MANTA_SOMATIC( diff --git a/subworkflows/local/variantcalling/manta_tumoronly.nf b/subworkflows/local/variantcalling/manta_tumoronly.nf index b127813c89..8a15015c71 100644 --- a/subworkflows/local/variantcalling/manta_tumoronly.nf +++ b/subworkflows/local/variantcalling/manta_tumoronly.nf @@ -4,7 +4,7 @@ include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../modules/local include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../modules/local/concat_vcf/main' -include { MANTA_TUMORONLY } from '../../../modules/local/manta/tumoronly/main' +include { MANTA_TUMORONLY } from '../../../modules/nf-core/modules/manta/tumoronly/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too diff --git a/subworkflows/local/variantcalling/strelka_single.nf b/subworkflows/local/variantcalling/strelka_single.nf index c2ef225ec2..0f3aac321d 100644 --- a/subworkflows/local/variantcalling/strelka_single.nf +++ b/subworkflows/local/variantcalling/strelka_single.nf @@ -1,5 +1,5 @@ -include { BGZIP as BGZIP_VC_STRELKA } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA } from '../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../modules/local/bgzip' include { CONCAT_VCF as CONCAT_STRELKA } from '../../../modules/local/concat_vcf/main' include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../modules/local/concat_vcf/main' include { STRELKA_GERMLINE } from '../../../modules/nf-core/modules/strelka/germline/main' From c1f54c1fad0ebcad9865e26b4523983276ab3cce Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 19:42:20 +0100 Subject: [PATCH 29/52] Add in a bunch of tests for mainly tumor / somatic tools --- tests/csv/3.0/recalibrated_somatic.csv | 3 + tests/csv/3.0/recalibrated_tumoronly.csv | 2 + tests/test_tools.yml | 124 +++++++++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 tests/csv/3.0/recalibrated_somatic.csv create mode 100644 tests/csv/3.0/recalibrated_tumoronly.csv diff --git a/tests/csv/3.0/recalibrated_somatic.csv b/tests/csv/3.0/recalibrated_somatic.csv new file mode 100644 index 0000000000..3ed631bc31 --- /dev/null +++ b/tests/csv/3.0/recalibrated_somatic.csv @@ -0,0 +1,3 @@ +patient,gender,status,sample,cram,crai +test,XX,0,sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai +test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/csv/3.0/recalibrated_tumoronly.csv b/tests/csv/3.0/recalibrated_tumoronly.csv new file mode 100644 index 0000000000..a62e7c21bd --- /dev/null +++ b/tests/csv/3.0/recalibrated_tumoronly.csv @@ -0,0 +1,2 @@ +patient,gender,status,sample,cram,crai +test,XX,1,sample,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/test_tools.yml b/tests/test_tools.yml index a308ebb444..94f59909b3 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -46,6 +46,48 @@ - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi +- name: Run variant calling on germline sample with freebayes + command: nextflow run main.nf -profile test,tools_germline,docker --tools freebayes + tags: + - freebayes + - germline + - variant_calling + files: + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + +- name: Run variant calling on germline sample with freebayes without intervals + command: nextflow run main.nf -profile test,tools_germline,docker --tools freebayes --no_intervals + tags: + - freebayes + - germline + - no_intervals + - variant_calling + files: + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + +- name: Run variant calling on tumor_only sample with freebayes + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes + tags: + - freebayes + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + +- name: Run variant calling on tumor_only sample with freebayes without intervals + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes --no_intervals + tags: + - freebayes + - no_intervals + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + - name: Run variant calling on germline sample with haplotypecaller command: nextflow run main.nf -profile test,tools_germline,docker --tools haplotypecaller tags: @@ -123,6 +165,80 @@ - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz.tbi +- name: Run variant calling on tumor_only sample with manta + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools manta + tags: + - manta + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz.tbi + +- name: Run variant calling on tumor_only sample with manta without intervals + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools manta --no_intervals + tags: + - manta + - no_intervals + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz.tbi + +- name: Run variant calling on somatic sample with manta + command: nextflow run main.nf -profile test,tools_somatic,docker --tools manta + tags: + - manta + - somatic + - variant_calling + files: + - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz.tbi + +- name: Run variant calling on somatic sample with manta without intervals + command: nextflow run main.nf -profile test,tools_somatic,docker --tools manta --no_intervals + tags: + - manta + - no_intervals + - somatic + - variant_calling + files: + - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz.tbi + - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz + - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz.tbi + +#- name: Run variant calling on tumoronly sample with mutect2 + +#- name: Run variant calling on tumoronly sample with mutect2 without intervals + +#- name: Run variant calling on somatic sample with mutect2 + +#- name: Run variant calling on somatic sample with mutect2 without intervals + +#- name: Run variant calling on somatic sample with msisensor-pro + +#- name: Run variant calling on somatic sample with msisensor-pro without intervals + - name: Run variant calling on germline sample with strelka command: nextflow run main.nf -profile test,tools_germline,docker --tools strelka tags: @@ -147,3 +263,11 @@ - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz.tbi - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz.tbi +#- name: Run variant calling on tumoronly sample with strelka +# +#- name: Run variant calling on tumoronly sample with strelka without intervals +# +#- name: Run variant calling on somatic sample with strelka +# +#- name: Run variant calling on somatic sample with strelka without intervals +#TODO: Test for strelka + manta From d738a1bec025184a90b0e38b6765da750db45525 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 19:43:55 +0100 Subject: [PATCH 30/52] Add in a bunch of tests for mainly tumor / somatic tools --- conf/test.config | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index 706d1daab3..97d7814731 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,8 +15,8 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' + max_cpus = 4 + max_memory = '8.GB' max_time = '8.h' // Input data @@ -91,6 +91,30 @@ profiles { params.input = "${baseDir}/tests/csv/3.0/recalibrated_germline.csv" params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + params.step = 'variant_calling' + params.joint_germline = true + params.wes = true + params.genome = 'WBcel235' + params.vep_genome = 'WBcel235' + } + tools_tumoronly { + params.input = "${baseDir}/tests/csv/3.0/recalibrated_tumoronly.csv" + params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" + params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" + params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" + params.step = 'variant_calling' + params.joint_germline = true + params.wes = true + params.genome = 'WBcel235' + params.vep_genome = 'WBcel235' + } + tools_somatic { + params.input = "${baseDir}/tests/csv/3.0/recalibrated_somatic.csv" + params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" + params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" From 35019fe548cf508c8f5aa1b609ff7dfe3e81c036 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 19:46:00 +0100 Subject: [PATCH 31/52] Revert test ressource --- conf/test.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index 97d7814731..ac0f738b2f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,8 +15,8 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 4 - max_memory = '8.GB' + max_cpus = 2 + max_memory = '6.GB' max_time = '8.h' // Input data From 2a7062f97d9ef3fcaa4367ff811daef7035039bd Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 20:03:04 +0100 Subject: [PATCH 32/52] add freebayes tests --- .github/workflows/ci.yml | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8cf26fac54..624f89a705 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,25 +25,26 @@ jobs: # Nextflow versions include: # Test pipeline minimum Nextflow version - - NXF_VER: '21.10.3' - NXF_EDGE: '' + - NXF_VER: "21.10.3" + NXF_EDGE: "" # Test latest edge release of Nextflow - - NXF_VER: '' - NXF_EDGE: '1' + - NXF_VER: "" + NXF_EDGE: "1" test: - - 'aligner' - - 'annotation' - - 'default' - - 'deepvariant' - - 'gatk4_spark' - - 'haplotypecaller' - - 'manta' + - "aligner" + - "annotation" + - "default" + - "deepvariant" + - "freebayes" + - "gatk4_spark" + - "haplotypecaller" + - "manta" # - 'save_bam_mapped' - - 'skip_markduplicates' - - 'strelka' - - 'split_fastq' - - 'targeted' - - 'tumor_normal_pair' + - "skip_markduplicates" + - "strelka" + - "split_fastq" + - "targeted" + - "tumor_normal_pair" steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -61,7 +62,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: "3.x" - name: Install dependencies run: python -m pip install --upgrade pip pytest-workflow From 53f12e5571cc3cc56d3536a2575b089d087b70b3 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 20:06:58 +0100 Subject: [PATCH 33/52] update sample names to make sure the tests are not overwritting each other --- tests/csv/3.0/recalibrated_somatic.csv | 4 ++-- tests/test_tools.yml | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/csv/3.0/recalibrated_somatic.csv b/tests/csv/3.0/recalibrated_somatic.csv index 3ed631bc31..d7364586ef 100644 --- a/tests/csv/3.0/recalibrated_somatic.csv +++ b/tests/csv/3.0/recalibrated_somatic.csv @@ -1,3 +1,3 @@ patient,gender,status,sample,cram,crai -test,XX,0,sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai -test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test3,XX,0,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai +test3,XX,1,sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/test_tools.yml b/tests/test_tools.yml index 94f59909b3..1cc127a03a 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -172,12 +172,12 @@ - tumor_only - variant_calling files: - - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.candidate_sv.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.tumor_sv.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.tumor_sv.vcf.gz.tbi - name: Run variant calling on tumor_only sample with manta without intervals command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools manta --no_intervals @@ -187,12 +187,12 @@ - tumor_only - variant_calling files: - - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.candidate_sv.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.tumor_sv.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.tumor_sv.vcf.gz.tbi - name: Run variant calling on somatic sample with manta command: nextflow run main.nf -profile test,tools_somatic,docker --tools manta From 7cb1c0ae773c961274ca4ea1f16ac43769a7926f Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 20:07:12 +0100 Subject: [PATCH 34/52] update sample names to make sure the tests are not overwritting each other --- tests/csv/3.0/recalibrated_tumoronly.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/csv/3.0/recalibrated_tumoronly.csv b/tests/csv/3.0/recalibrated_tumoronly.csv index a62e7c21bd..1b453883dc 100644 --- a/tests/csv/3.0/recalibrated_tumoronly.csv +++ b/tests/csv/3.0/recalibrated_tumoronly.csv @@ -1,2 +1,2 @@ patient,gender,status,sample,cram,crai -test,XX,1,sample,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test1,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai From f600e6abe350fc891265711f07257d99976ba6b0 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 22:57:58 +0100 Subject: [PATCH 35/52] Fix manta tests --- tests/test_tools.yml | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/tests/test_tools.yml b/tests/test_tools.yml index 1cc127a03a..fe6a82acbb 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -201,14 +201,18 @@ - somatic - variant_calling files: - - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.candidate_sv.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.diploid_sv.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.diploid_sv.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_sv.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz.tbi - name: Run variant calling on somatic sample with manta without intervals command: nextflow run main.nf -profile test,tools_somatic,docker --tools manta --no_intervals @@ -218,14 +222,18 @@ - somatic - variant_calling files: - - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.candidate_small_indels.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.candidate_sv.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz.tbi - - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz - - path: results/variant_calling/sample1/manta/sample1.tumor_sv.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.candidate_sv.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.diploid_sv.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.diploid_sv.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_sv.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz.tbi #- name: Run variant calling on tumoronly sample with mutect2 From 8d6c7b847b67759d5b277c1e850af9acbba6c944 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 22:58:16 +0100 Subject: [PATCH 36/52] refactor mutect2 tumor_only with new syntax --- subworkflows/local/pair_variant_calling.nf | 36 ++--- subworkflows/local/tumor_variant_calling.nf | 2 +- .../local/variantcalling/deepvariant.nf | 4 +- .../main.nf | 126 ++++++++++-------- 4 files changed, 92 insertions(+), 76 deletions(-) diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index c2efd66316..4d01dade6a 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -101,24 +101,24 @@ workflow PAIR_VARIANT_CALLING { } if (tools.contains('mutect2')) { - cram_pair_intervals.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> - [meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai], intervals, ['normal']] - }.set{cram_pair_mutect2} - - GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING( - cram_pair_mutect2, - fasta, - fasta_fai, - dict, - germline_resource, - germline_resource_tbi, - panel_of_normals, - panel_of_normals_tbi, - no_intervals, - num_intervals, - intervals_bed_combine_gz - ) - ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions) + // cram_pair_intervals.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> + // [meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai], intervals, ['normal']] + // }.set{cram_pair_mutect2} + + // GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING( + // cram_pair_mutect2, + // fasta, + // fasta_fai, + // dict, + // germline_resource, + // germline_resource_tbi, + // panel_of_normals, + // panel_of_normals_tbi, + // no_intervals, + // num_intervals, + // intervals_bed_combine_gz + // ) + // ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions) } // if (tools.contains('tiddit')) { diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 7ab31d9178..2a4ff271e5 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -86,7 +86,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { no_intervals, intervals_bed_combine_gz) - //mutect2_vcf_tbi = mutect2_vcf_tbi.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf_gz_tbi) + mutect2_vcf = GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf ch_versions = ch_versions.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.versions) } diff --git a/subworkflows/local/variantcalling/deepvariant.nf b/subworkflows/local/variantcalling/deepvariant.nf index 0e4c846c88..a2cd69160c 100644 --- a/subworkflows/local/variantcalling/deepvariant.nf +++ b/subworkflows/local/variantcalling/deepvariant.nf @@ -53,8 +53,10 @@ workflow RUN_DEEPVARIANT { // Mix output channels for "no intervals" and "with intervals" results deepvariant_vcf = Channel.empty().mix( CONCAT_DEEPVARIANT_GVCF.out.vcf, + CONCAT_DEEPVARIANT_GVCF.out.tbi, CONCAT_DEEPVARIANT_VCF.out.vcf, - DEEPVARIANT.out.gvcf.join(TABIX_VC_DEEPVARIANT_GVCF.out.tbi), + CONCAT_DEEPVARIANT_VCF.out.tbi, + DEEPVARIANT.out.gvcf.join(TABIX_VC_DEEPVARIANT_GVCF.out.tbi), //TODO: properly not needed if published DEEPVARIANT.out.vcf.join(TABIX_VC_DEEPVARIANT_VCF.out.tbi)) ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index d8b44a46e8..9d4f316eb4 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -2,14 +2,14 @@ // Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls // -include { BGZIP as BGZIP_MUTECT2 } from '../../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' -include { GATK4_MUTECT2 as MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mutect2/main' -include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../../modules/nf-core/modules/gatk4/mergemutectstats/main' -include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' -include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/gatherpileupsummaries/main' -include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../../modules/nf-core/modules/gatk4/calculatecontamination/main' -include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' +include { BGZIP as BGZIP_VC_MUTECT2 } from '../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' +include { GATK4_MUTECT2 as MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mutect2/main' +include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../../modules/nf-core/modules/gatk4/mergemutectstats/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/gatherpileupsummaries/main' +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../../modules/nf-core/modules/gatk4/calculatecontamination/main' +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { take: @@ -25,16 +25,13 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { no_intervals intervals_bed_combine_gz - main: ch_versions = Channel.empty() // //Perform variant calling using mutect2 module in tumor single mode. // - mutect2_vcf_gz_tbi = Channel.empty() MUTECT2 ( input , true , false , false , fasta , fai , dict , germline_resource , germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) - ch_versions = ch_versions.mix(MUTECT2.out.versions) // //Generate pileup summary table using getepileupsummaries. @@ -44,60 +41,68 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { [meta, input_file, input_index, intervals] } GETPILEUPSUMMARIES ( pileup_input , fasta, fai, dict, germline_resource , germline_resource_tbi ) - ch_versions = ch_versions.mix(GETPILEUPSUMMARIES.out.versions) - if(no_intervals){ - mutect2_vcf_gz_tbi = MUTECT2.out.vcf.join(MUTECT2.out.tbi) - mutect2_stats = MUTECT2.out.stats - pileup_table = GETPILEUPSUMMARIES.out.table - }else{ + // Figure out if using intervals or no_intervals + MUTECT2.out.vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_vcf } - //Merge Mutect2 VCF - BGZIP_MUTECT2(MUTECT2.out.vcf) - BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.set{bgzip_mutect2} + MUTECT2.out.stats.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_stats } - mutect2_vcf_to_concat = bgzip_mutect2.groupTuple(size: num_intervals) + GETPILEUPSUMMARIES.out.table.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }set{ pileup_table } - CONCAT_VCF_MUTECT2(mutect2_vcf_to_concat, fai, intervals_bed_combine_gz) - mutect2_vcf_gz_tbi = CONCAT_VCF_MUTECT2.out.vcf.join(CONCAT_VCF_MUTECT2.out.tbi) - ch_versions = ch_versions.mix(BGZIP_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MUTECT2.out.versions) + //Only when using intervals + //Merge Mutect2 VCF + BGZIP_VC_MUTECT2(mutect2_vcf_gz_tbi.intervals) - //Merge Muteect2 Stats - MUTECT2.out.stats.map{ meta, stats -> + CONCAT_MUTECT2(BGZIP_VC_MUTECT2.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample - [new_meta, stats] - }.groupTuple(size: num_intervals).set{mutect2_stats_to_merge} - - MERGEMUTECTSTATS(mutect2_stats_to_merge) - mutect2_stats = MERGEMUTECTSTATS.out.stats - ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) - - //Merge Pileup Summaries - pileup_tables_to_gather = GETPILEUPSUMMARIES.out.table.map{ meta, table -> + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fai, + intervals_bed_combine_gz) + + mutect2_vcf = Channel.empty().mix( + CONCAT_MUTECT2.out.vcf, + mutect2_vcf.no_intervals) + + //Merge Muteect2 Stats + MERGEMUTECTSTATS(mutect2_stats.intervals.map{ meta, stats -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, stats] + }.groupTuple(size: num_intervals)) + + mutect2_stats = Channel.empty().mix( + MERGEMUTECTSTATS.out.stats, + mutect2_stats.no_intervals) + + //Merge Pileup Summaries + GATHERPILEUPSUMMARIES( GETPILEUPSUMMARIES.out.table.map{ meta, table -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, table] - }.groupTuple(size: num_intervals) + }.groupTuple(size: num_intervals), + dict) - GATHERPILEUPSUMMARIES(pileup_tables_to_gather, dict) - pileup_table = GATHERPILEUPSUMMARIES.out.table - - } + pileup_table = Channel.empty().mix( + GATHERPILEUPSUMMARIES.out.table, + pileup_table.no_intervals) // //Contamination and segmentation tables created using calculatecontamination on the pileup summary table. // pileup_table.map{meta, table -> [meta, table, []]}.set{table_contamination} CALCULATECONTAMINATION ( table_contamination, true ) - ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) - // //Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables. @@ -107,20 +112,29 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { .join(CALCULATECONTAMINATION.out.contamination) ch_filtermutect_in = ch_filtermutect.map{ meta, vcf, tbi, stats, seg, cont -> [meta, vcf, tbi, stats, [], seg, cont, []] } FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) + + ch_versions = ch_versions.mix(BGZIP_VC_MUTECT2.out.versions) + ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) + ch_versions = ch_versions.mix(CONCAT_MUTECT2.out.versions) ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) + ch_versions = ch_versions.mix(GATHERPILEUPSUMMARIES.out.versions) + ch_versions = ch_versions.mix(GETPILEUPSUMMARIES.out.versions) + ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) + ch_versions = ch_versions.mix(MUTECT2.out.versions) + emit: - mutect2_vcf_gz_tbi = mutect2_vcf_gz_tbi // channel: [ val(meta), [ vcf ] ] - mutect2_stats = MUTECT2.out.stats // channel: [ val(meta), [ stats ] ] + mutect2_vcf = mutect2_vcf // channel: [ val(meta), [ vcf ] ] + mutect2_stats = MUTECT2.out.stats // channel: [ val(meta), [ stats ] ] - pileup_table = pileup_table // channel: [ val(meta), [ table ] ] + pileup_table = pileup_table // channel: [ val(meta), [ table ] ] - contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] - segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] + contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] + segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] - filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] - filtered_index = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] - filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] + filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] + filtered_index = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] + filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } From f52695c458fe8030f21e0833957fcf8c9f3d1644 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 23:16:54 +0100 Subject: [PATCH 37/52] Add in mutect confs --- conf/modules.config | 61 ++++++++++++------- .../main.nf | 8 +-- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index dd08e387ba..3eeb8d1d6b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -616,7 +616,29 @@ process{ // TUMOR_VARIANT_CALLING + //MANTA + withName: 'CONCAT_MANTA_TUMOR' { + ext.prefix = {"${meta.id}.tumor_sv"} + } + //MUTECT2 + withName: 'GATK4_CALCULATECONTAMINATION' { + publishDir = [ + enabled: true, + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } + ] + } + + withName: 'CONCAT_MUTECT2.*' { + publishDir = [ + enabled: "${!params.no_intervals}", + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + withName: 'FILTERMUTECTCALLS'{ ext.prefix = {"${meta.id}.filtered."} publishDir = [ @@ -628,6 +650,19 @@ process{ withName: 'GATHERPILEUPSUMMARIES' { ext.prefix = { "${meta.id}.table" } + publishDir = [ + enabled: "${!params.no_intervals}", + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } + ] + } + + withName: 'GETPILEUPSUMMARIES' { + publishDir = [ + enabled: "${params.no_intervals}", + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } + ] } withName: 'MERGEMUTECTSTATS' { @@ -653,6 +688,10 @@ process{ // PAIR_VARIANT_CALLING + withName: 'CONCAT_MANTA_SOMATIC' { + ext.prefix = {"${meta.id}.somatic_sv"} + } + withName: 'CONCAT_MUTECT2' { publishDir = [ enabled: "${!params.no_intervals}", @@ -662,27 +701,7 @@ process{ } } -// withName: 'GATK4_CALCULATECONTAMINATION'{ -// ext.args = '' -// publishDir = [ -// enabled: false, -// mode: params.publish_dir_mode -// ] -//} -//withName: 'GATK4_FILTERMUTECTCALLS'{ -// ext.args = '' -// publishDir = [ -// enabled: false, -// mode: params.publish_dir_mode -// ] -//} -//withName: 'GATK4_GETPILEUPSUMMARIES'{ -// ext.args = '' -// publishDir = [ -// enabled: false, -// mode: params.publish_dir_mode -// ] -//} + //withName: 'GENOMICSDBIMPORT' { // //} diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index 9d4f316eb4..20c81d4e8e 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -61,7 +61,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { //Only when using intervals //Merge Mutect2 VCF - BGZIP_VC_MUTECT2(mutect2_vcf_gz_tbi.intervals) + BGZIP_VC_MUTECT2(mutect2_vcf.intervals) CONCAT_MUTECT2(BGZIP_VC_MUTECT2.out.vcf.map{ meta, vcf -> new_meta = meta.clone() @@ -107,9 +107,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { // //Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables. // - ch_filtermutect = mutect2_vcf_gz_tbi.join(mutect2_stats) - .join(CALCULATECONTAMINATION.out.segmentation) - .join(CALCULATECONTAMINATION.out.contamination) + ch_filtermutect = mutect2_vcf.join(mutect2_stats) + .join(CALCULATECONTAMINATION.out.segmentation) + .join(CALCULATECONTAMINATION.out.contamination) ch_filtermutect_in = ch_filtermutect.map{ meta, vcf, tbi, stats, seg, cont -> [meta, vcf, tbi, stats, [], seg, cont, []] } FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) From 50ff2f716597d0fd58be52bcc25147a4b67b0775 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 22 Mar 2022 23:41:52 +0100 Subject: [PATCH 38/52] update strelka output paths --- .github/workflows/ci.yml | 2 ++ tests/test_tools.yml | 73 ++++++++++++++++++++++++++++++++++------ 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 624f89a705..6035a74617 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,8 @@ jobs: - "gatk4_spark" - "haplotypecaller" - "manta" + #- "mutect2" + #- "msisensorpro" # - 'save_bam_mapped' - "skip_markduplicates" - "strelka" diff --git a/tests/test_tools.yml b/tests/test_tools.yml index fe6a82acbb..e02e6f9f5b 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -74,8 +74,8 @@ - tumor_only - variant_calling files: - - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz - - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz + - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi - name: Run variant calling on tumor_only sample with freebayes without intervals command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes --no_intervals @@ -85,8 +85,8 @@ - tumor_only - variant_calling files: - - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz - - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz + - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi - name: Run variant calling on germline sample with haplotypecaller command: nextflow run main.nf -profile test,tools_germline,docker --tools haplotypecaller @@ -271,11 +271,62 @@ - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz.tbi - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz.tbi -#- name: Run variant calling on tumoronly sample with strelka -# -#- name: Run variant calling on tumoronly sample with strelka without intervals -# -#- name: Run variant calling on somatic sample with strelka -# -#- name: Run variant calling on somatic sample with strelka without intervals + +- name: Run variant calling on tumoronly sample with strelka + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools strelka + tags: + - strelka + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz.tbi + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz.tbi + +- name: Run variant calling on tumoronly sample with strelka without intervals + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools strelka --no_intervals + tags: + - no_intervals + - strelka + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz.tbi + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz.tbi + +- name: Run variant calling on somatic sample with strelka + command: nextflow run main.nf -profile test,tools_somatic,docker --tools strelka + tags: + - somatic + - strelka + - variant_calling + files: + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz.tbi + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz.tbi + +- name: Run variant calling on somatic sample with strelka without intervals + command: nextflow run main.nf -profile test,tools_somatic,docker --tools strelka --no_intervals + tags: + - no_intervals + - somatic + - strelka + - variant_calling + files: + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz.tbi + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz.tbi #TODO: Test for strelka + manta From 670e9149c6fd948d05dbe8d36b5990ef932cdc16 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 08:51:27 +0100 Subject: [PATCH 39/52] remove tbi from sw outputs as not needed for annotation --- subworkflows/local/variantcalling/deepvariant.nf | 6 ++---- subworkflows/local/variantcalling/freebayes.nf | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/variantcalling/deepvariant.nf b/subworkflows/local/variantcalling/deepvariant.nf index a2cd69160c..da3014459e 100644 --- a/subworkflows/local/variantcalling/deepvariant.nf +++ b/subworkflows/local/variantcalling/deepvariant.nf @@ -53,11 +53,9 @@ workflow RUN_DEEPVARIANT { // Mix output channels for "no intervals" and "with intervals" results deepvariant_vcf = Channel.empty().mix( CONCAT_DEEPVARIANT_GVCF.out.vcf, - CONCAT_DEEPVARIANT_GVCF.out.tbi, CONCAT_DEEPVARIANT_VCF.out.vcf, - CONCAT_DEEPVARIANT_VCF.out.tbi, - DEEPVARIANT.out.gvcf.join(TABIX_VC_DEEPVARIANT_GVCF.out.tbi), //TODO: properly not needed if published - DEEPVARIANT.out.vcf.join(TABIX_VC_DEEPVARIANT_VCF.out.tbi)) + DEEPVARIANT.out.gvcf, + DEEPVARIANT.out.vcf) ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_VCF.out.versions) diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/local/variantcalling/freebayes.nf index dfb6095cb3..3ba8c71bfb 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/local/variantcalling/freebayes.nf @@ -40,7 +40,7 @@ workflow RUN_FREEBAYES { // Mix output channels for "no intervals" and "with intervals" results freebayes_vcf = Channel.empty().mix( CONCAT_FREEBAYES.out.vcf, - FREEBAYES.out.vcf.join(TABIX_VC_FREEBAYES.out.tbi)) + FREEBAYES.out.vcf) ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) From efc1b268e374591c43046fc241d0a8c427b7904e Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 09:19:37 +0100 Subject: [PATCH 40/52] Fix run_freebayes input channels --- subworkflows/local/germline_variant_calling.nf | 2 +- subworkflows/local/tumor_variant_calling.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index cff5c88663..717522b8af 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -71,7 +71,7 @@ workflow GERMLINE_VARIANT_CALLING { .map{ meta, cram, crai, intervals -> [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 2a4ff271e5..91d5a58fec 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -64,7 +64,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) From 5496ce1a9ea03a4e4c93ead3e8894a38704619a9 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 10:29:11 +0100 Subject: [PATCH 41/52] Reorganize vc subworkflow after code review --- subworkflows/local/germline_variant_calling.nf | 12 +++++++----- subworkflows/local/pair_variant_calling.nf | 4 ++-- subworkflows/local/prepare_intervals.nf | 1 + subworkflows/local/tumor_variant_calling.nf | 6 +++--- .../variantcalling/deepvariant/main.nf} | 14 +++++++------- .../variantcalling/freebayes/main.nf} | 8 ++++---- .../variantcalling/haplotypecaller/main.nf} | 12 ++++++------ .../variantcalling/manta/germline/main.nf} | 14 +++++++------- .../variantcalling/manta/somatic/main.nf} | 18 +++++++++--------- .../variantcalling/manta/tumoronly/main.nf} | 14 +++++++------- .../variantcalling/strelka/single/main.nf} | 10 +++++----- .../variantcalling/strelka/somatic/main.nf} | 10 +++++----- .../variantcalling/tiddit/main.nf} | 4 ++-- 13 files changed, 65 insertions(+), 62 deletions(-) rename subworkflows/{local/variantcalling/deepvariant.nf => nf-core/variantcalling/deepvariant/main.nf} (91%) rename subworkflows/{local/variantcalling/freebayes.nf => nf-core/variantcalling/freebayes/main.nf} (80%) rename subworkflows/{local/variantcalling/haplotypecaller.nf => nf-core/variantcalling/haplotypecaller/main.nf} (92%) rename subworkflows/{local/variantcalling/manta_germline.nf => nf-core/variantcalling/manta/germline/main.nf} (93%) rename subworkflows/{local/variantcalling/manta_somatic.nf => nf-core/variantcalling/manta/somatic/main.nf} (93%) rename subworkflows/{local/variantcalling/manta_tumoronly.nf => nf-core/variantcalling/manta/tumoronly/main.nf} (93%) rename subworkflows/{local/variantcalling/strelka_single.nf => nf-core/variantcalling/strelka/single/main.nf} (83%) rename subworkflows/{local/variantcalling/strelka_somatic.nf => nf-core/variantcalling/strelka/somatic/main.nf} (84%) rename subworkflows/{local/variantcalling/tiddit.nf => nf-core/variantcalling/tiddit/main.nf} (91%) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 717522b8af..ec5f80e9d7 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -2,11 +2,11 @@ // GERMLINE VARIANT CALLING // -include { RUN_DEEPVARIANT } from './variantcalling/deepvariant.nf' -include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' -include { RUN_HAPLOTYPECALLER } from './variantcalling/haplotypecaller.nf' -include { RUN_MANTA_GERMLINE } from './variantcalling/manta_germline.nf' -include { RUN_STRELKA_SINGLE } from './variantcalling/strelka_single.nf' +include { RUN_DEEPVARIANT } from '../nf-core/variantcalling/deepvariant/main.nf' +include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf' +include { RUN_HAPLOTYPECALLER } from '../nf-core/variantcalling/haplotypecaller/main.nf' +include { RUN_MANTA_GERMLINE } from '../nf-core/variantcalling/manta/germline/main.nf' +include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf' //include { TIDDIT } from './variantcalling/tiddit.nf' workflow GERMLINE_VARIANT_CALLING { @@ -40,6 +40,7 @@ workflow GERMLINE_VARIANT_CALLING { cram_recalibrated_intervals = cram_recalibrated.combine(intervals) .map{ meta, cram, crai, intervals -> sample = meta.sample + //new_intervals = num_intervals > 1 ? intervals : [] new_intervals = intervals.baseName != "no_intervals" ? intervals : [] id = new_intervals ? sample + "_" + new_intervals.baseName : sample [[ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ], cram, crai, new_intervals] @@ -49,6 +50,7 @@ workflow GERMLINE_VARIANT_CALLING { cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi) .map{ meta, cram, crai, bed, tbi -> sample = meta.sample + //new_bed = num_intervals > 1 ? bed : [] //TODO can I pass in empty lists? Then I only need to work with the id line new_bed = bed.simpleName != "no_intervals" ? bed : [] new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] id = new_bed ? sample + "_" + new_bed.simpleName : sample diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 4d01dade6a..39ca161ef8 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -3,8 +3,8 @@ // include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main' include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main' -include { RUN_MANTA_SOMATIC } from './variantcalling/manta_somatic.nf' -include { RUN_STRELKA_SOMATIC } from './variantcalling/strelka_somatic.nf' +include { RUN_MANTA_SOMATIC } from '../nf-core/variantcalling/manta/somatic/main.nf' +include { RUN_STRELKA_SOMATIC } from '../nf-core/variantcalling/strelka/somatic/main.nf' workflow PAIR_VARIANT_CALLING { take: diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 2af55711fe..87805b0fdf 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -20,6 +20,7 @@ workflow PREPARE_INTERVALS { ch_versions = Channel.empty() + // TODO maybe instead [] ch_intervals = Channel.empty() ch_intervals_bed_gz_tbi = Channel.empty() ch_intervals_combined_bed_gz_tbi = Channel.empty() // Create bed.gz and bed.gz.tbi for input/or created interval file. Contains ALL regions. diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 91d5a58fec..3ea3d34489 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -3,10 +3,10 @@ // Should be only run on patients without normal sample // -include { RUN_FREEBAYES } from './variantcalling/freebayes.nf' +include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf' include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' -include { RUN_MANTA_TUMORONLY } from './variantcalling/manta_tumoronly.nf' -include { RUN_STRELKA_SINGLE } from './variantcalling/strelka_single.nf' +include { RUN_MANTA_TUMORONLY } from '../nf-core/variantcalling/manta/tumoronly/main.nf' +include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: diff --git a/subworkflows/local/variantcalling/deepvariant.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf similarity index 91% rename from subworkflows/local/variantcalling/deepvariant.nf rename to subworkflows/nf-core/variantcalling/deepvariant/main.nf index da3014459e..e5069f2bab 100644 --- a/subworkflows/local/variantcalling/deepvariant.nf +++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf @@ -1,10 +1,10 @@ -include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../modules/local/concat_vcf/main' -include { DEEPVARIANT } from '../../../modules/nf-core/modules/deepvariant/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../modules/nf-core/modules/tabix/tabix/main' +include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../../modules/local/concat_vcf/main' +include { DEEPVARIANT } from '../../../../modules/nf-core/modules/deepvariant/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine // Deepvariant: https://github.com/google/deepvariant/issues/510 diff --git a/subworkflows/local/variantcalling/freebayes.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf similarity index 80% rename from subworkflows/local/variantcalling/freebayes.nf rename to subworkflows/nf-core/variantcalling/freebayes/main.nf index 3ba8c71bfb..3e909a5e4e 100644 --- a/subworkflows/local/variantcalling/freebayes.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -1,7 +1,7 @@ -include { BGZIP as BGZIP_VC_FREEBAYES } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../../modules/nf-core/modules/freebayes/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/modules/tabix/tabix/main' +include { BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../../modules/local/concat_vcf/main' +include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_FREEBAYES { take: diff --git a/subworkflows/local/variantcalling/haplotypecaller.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf similarity index 92% rename from subworkflows/local/variantcalling/haplotypecaller.nf rename to subworkflows/nf-core/variantcalling/haplotypecaller/main.nf index 7b2d03ed1e..7e955f3d66 100644 --- a/subworkflows/local/variantcalling/haplotypecaller.nf +++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf @@ -1,9 +1,9 @@ -include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../modules/local/concat_vcf/main' -include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' -include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../modules/nf-core/modules/tabix/tabix/main' +include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../../modules/local/concat_vcf/main' +include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/gatk4/haplotypecaller/main' +include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' +include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_HAPLOTYPECALLER { take: diff --git a/subworkflows/local/variantcalling/manta_germline.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf similarity index 93% rename from subworkflows/local/variantcalling/manta_germline.nf rename to subworkflows/nf-core/variantcalling/manta/germline/main.nf index fe3ab37729..dec574f959 100644 --- a/subworkflows/local/variantcalling/manta_germline.nf +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -1,10 +1,10 @@ -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' -include { MANTA_GERMLINE } from '../../../modules/nf-core/modules/manta/germline/main' +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too diff --git a/subworkflows/local/variantcalling/manta_somatic.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf similarity index 93% rename from subworkflows/local/variantcalling/manta_somatic.nf rename to subworkflows/nf-core/variantcalling/manta/somatic/main.nf index c9e77d7ae5..34e2f5eb9c 100644 --- a/subworkflows/local/variantcalling/manta_somatic.nf +++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf @@ -1,12 +1,12 @@ -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' -include { MANTA_SOMATIC } from '../../../modules/nf-core/modules/manta/somatic/main' +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: diff --git a/subworkflows/local/variantcalling/manta_tumoronly.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf similarity index 93% rename from subworkflows/local/variantcalling/manta_tumoronly.nf rename to subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf index 8a15015c71..13696c0db7 100644 --- a/subworkflows/local/variantcalling/manta_tumoronly.nf +++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf @@ -1,10 +1,10 @@ -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../modules/local/concat_vcf/main' -include { MANTA_TUMORONLY } from '../../../modules/nf-core/modules/manta/tumoronly/main' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too diff --git a/subworkflows/local/variantcalling/strelka_single.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf similarity index 83% rename from subworkflows/local/variantcalling/strelka_single.nf rename to subworkflows/nf-core/variantcalling/strelka/single/main.nf index 0f3aac321d..e90ec8118a 100644 --- a/subworkflows/local/variantcalling/strelka_single.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -1,8 +1,8 @@ -include { BGZIP as BGZIP_VC_STRELKA } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../modules/local/concat_vcf/main' -include { STRELKA_GERMLINE } from '../../../modules/nf-core/modules/strelka/germline/main' +include { BGZIP as BGZIP_VC_STRELKA } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_STRELKA } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../../../modules/local/concat_vcf/main' +include { STRELKA_GERMLINE } from '../../../../../modules/nf-core/modules/strelka/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too diff --git a/subworkflows/local/variantcalling/strelka_somatic.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf similarity index 84% rename from subworkflows/local/variantcalling/strelka_somatic.nf rename to subworkflows/nf-core/variantcalling/strelka/somatic/main.nf index f5767c534a..b0bc7509d8 100644 --- a/subworkflows/local/variantcalling/strelka_somatic.nf +++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf @@ -1,8 +1,8 @@ -include { BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../../modules/local/concat_vcf/main' -include { STRELKA_SOMATIC } from '../../../modules/nf-core/modules/strelka/somatic/main' +include { BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../../../../modules/local/concat_vcf/main' +include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too diff --git a/subworkflows/local/variantcalling/tiddit.nf b/subworkflows/nf-core/variantcalling/tiddit/main.nf similarity index 91% rename from subworkflows/local/variantcalling/tiddit.nf rename to subworkflows/nf-core/variantcalling/tiddit/main.nf index 87ec6050b1..c21e2e845a 100644 --- a/subworkflows/local/variantcalling/tiddit.nf +++ b/subworkflows/nf-core/variantcalling/tiddit/main.nf @@ -1,5 +1,5 @@ -include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TIDDIT_SV } from '../../../modules/nf-core/modules/tiddit/sv/main' +include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' +include { TIDDIT_SV } from '../../../../modules/nf-core/modules/tiddit/sv/main' //TODO: UNDER CONSTRUCTIONS workflow RUN_TIDDIT { From 191fad79f161d9c82cf71a369fd61c665bc4d496 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 12:12:27 +0100 Subject: [PATCH 42/52] add mutect2 test --- .github/workflows/ci.yml | 2 +- conf/modules.config | 1 + tests/test_tools.yml | 13 +++++++++++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6035a74617..ca0bbb3532 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,7 +39,7 @@ jobs: - "gatk4_spark" - "haplotypecaller" - "manta" - #- "mutect2" + - "mutect2" #- "msisensorpro" # - 'save_bam_mapped' - "skip_markduplicates" diff --git a/conf/modules.config b/conf/modules.config index 3eeb8d1d6b..57328404fd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -681,6 +681,7 @@ process{ enabled: "${params.no_intervals}", mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } + // pattern: "*{gz,gz.tbi,stats}" ] } diff --git a/tests/test_tools.yml b/tests/test_tools.yml index e02e6f9f5b..19bd555e82 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -235,9 +235,18 @@ - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz.tbi -#- name: Run variant calling on tumoronly sample with mutect2 +- name: Run variant calling on tumoronly sample with mutect2 + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools mutect2 + tags: + - mutect2 + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.tbi + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.stats -#- name: Run variant calling on tumoronly sample with mutect2 without intervals +# - name: Run variant calling on tumoronly sample with mutect2 without intervals #- name: Run variant calling on somatic sample with mutect2 From 94067b3ed4c21a66c0a1cf2589c6fc4ee9eb7c6b Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 15:43:02 +0100 Subject: [PATCH 43/52] tests pass locally, whats wrong with this --- conf/modules.config | 10 +++++++--- conf/test.config | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 57328404fd..f7290d7b4a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -227,6 +227,10 @@ process{ process { withName: "SEQKIT_SPLIT2" { + //For unknown reasons occasionally 'java.util.ConcurrentModificationException' is thrown + //Rerunning usually works + //TODO not sure this works, since the error appears randomly + errorStrategy = {task.exitStatus == 'java.util.ConcurrentModificationException' ? 'retry' : 'ignore'} ext.args = { "--by-size ${params.split_fastq}" } ext.when = { params.split_fastq > 1 } publishDir = [ @@ -469,7 +473,7 @@ process{ enabled: "${params.no_intervals}", mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" + pattern: "*{vcf.gz,vcf.gz.tbi}" //TODO gvcf should only be outputted when --generate_gvcf is set ] } withName : 'TABIX_VC_DEEPVARIANT_GVCF' { @@ -680,8 +684,8 @@ process{ publishDir = [ enabled: "${params.no_intervals}", mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } - // pattern: "*{gz,gz.tbi,stats}" + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, + pattern: "*{gz,gz.tbi,stats}" ] } diff --git a/conf/test.config b/conf/test.config index ac0f738b2f..97d7814731 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,8 +15,8 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' + max_cpus = 4 + max_memory = '8.GB' max_time = '8.h' // Input data From dde5dd57ab047dc7b580734b9a30876a38c1b0c5 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 17:38:26 +0100 Subject: [PATCH 44/52] Add tumor only mutects + tests, cause thats important too --- conf/modules.config | 6 +- .../local/germline_variant_calling.nf | 4 +- .../joint_germline_variant_calling/main.nf | 21 ----- .../main.nf | 13 ++- .../variantcalling/haplotypecaller/main.nf | 92 +++++++++---------- 5 files changed, 61 insertions(+), 75 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f7290d7b4a..6d5a560f01 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -528,7 +528,7 @@ process{ ] } withName: 'HAPLOTYPECALLER' { - ext.args = '-ERC GVCF' + ext.args = { params.joint_germline ? "-ERC GVCF" : "" } ext.prefix = {"${meta.id}.g"} ext.when = { params.tools && params.tools.contains('haplotypecaller') } publishDir = [ @@ -539,7 +539,7 @@ process{ ] } withName: 'GENOTYPEGVCFS' { - ext.when = { params.tools && params.tools.contains('haplotypecaller') } + ext.when = { params.tools && params.tools.contains('haplotypecaller') && params.joint_germline} publishDir = [ enabled: true, mode: params.publish_dir_mode, @@ -644,7 +644,7 @@ process{ } withName: 'FILTERMUTECTCALLS'{ - ext.prefix = {"${meta.id}.filtered."} + ext.prefix = {"${meta.id}.filtered"} publishDir = [ enabled: true, mode: params.publish_dir_mode, diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index ec5f80e9d7..9738880697 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -87,9 +87,9 @@ workflow GERMLINE_VARIANT_CALLING { dict, dbsnp, dbsnp_tbi, - num_intervals, intervals_bed_combine_gz, - intervals_bed_combine_gz_tbi) + intervals_bed_combine_gz_tbi, + num_intervals) haplotypecaller_gvcf = RUN_HAPLOTYPECALLER.out.haplotypecaller_gvcf genotype_gvcf = RUN_HAPLOTYPECALLER.out.genotype_gvcf diff --git a/subworkflows/nf-core/gatk4/joint_germline_variant_calling/main.nf b/subworkflows/nf-core/gatk4/joint_germline_variant_calling/main.nf index 3e2240423d..9e1d1d95bf 100644 --- a/subworkflows/nf-core/gatk4/joint_germline_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/joint_germline_variant_calling/main.nf @@ -29,32 +29,12 @@ workflow GATK_JOINT_GERMLINE_VARIANT_CALLING { main: ch_versions = Channel.empty() - // haplotypecaller can be skipped if input samples are already in gvcf format, essentially making the subworkflow joint genotyping. - if (run_haplotc) { - haplotc_input = channel.from(input) - // - //Perform variant calling using haplotypecaller module. Additional argument "-ERC GVCF" used to run in gvcf mode. - // - HAPLOTYPECALLER ( haplotc_input, fasta, fai, dict, sites, sites_index ) - - ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions.first()) - ch_vcf = HAPLOTYPECALLER.out.vcf.collect{it[1]}.toList() - ch_index = HAPLOTYPECALLER.out.tbi.collect{it[1]}.toList() - - } else { - // if haplotypecaller is skipped, this channels the input to genomicsdbimport instead of the output vcfs and tbis that normally come from haplotypecaller - direct_input = channel.from(input) - ch_vcf = direct_input.collect{it[1]}.toList() - ch_index = direct_input.collect{it[2]}.toList() - } - // //Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport. // gendb_input = Channel.of([[ id:joint_id ]]).combine(ch_vcf).combine(ch_index).combine([interval_file]).combine(['']).combine([dict]) GENOMICSDBIMPORT ( gendb_input, false, false, false ) - ch_versions = ch_versions.mix(GENOMICSDBIMPORT.out.versions) // @@ -65,7 +45,6 @@ workflow GATK_JOINT_GERMLINE_VARIANT_CALLING { ch_genotype_in.add([]) GENOTYPEGVCFS ( ch_genotype_in, fasta, fai, dict, sites, sites_index ) - ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) // setting run_vqsr to false skips the VQSR process, for if user does not wish to perform VQSR, diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index 20c81d4e8e..7a1fa4a0e3 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -48,6 +48,11 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { no_intervals: num_intervals == 1 }.set{ mutect2_vcf } + MUTECT2.out.tbi.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_tbi } + MUTECT2.out.stats.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 @@ -75,6 +80,10 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { CONCAT_MUTECT2.out.vcf, mutect2_vcf.no_intervals) + mutect2_tbi = Channel.empty().mix( + CONCAT_MUTECT2.out.tbi, + mutect2_tbi.no_intervals) + //Merge Muteect2 Stats MERGEMUTECTSTATS(mutect2_stats.intervals.map{ meta, stats -> new_meta = meta.clone() @@ -107,10 +116,12 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { // //Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables. // - ch_filtermutect = mutect2_vcf.join(mutect2_stats) + ch_filtermutect = mutect2_vcf.join(mutect2_tbi) + .join(mutect2_stats) .join(CALCULATECONTAMINATION.out.segmentation) .join(CALCULATECONTAMINATION.out.contamination) ch_filtermutect_in = ch_filtermutect.map{ meta, vcf, tbi, stats, seg, cont -> [meta, vcf, tbi, stats, [], seg, cont, []] } + FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) ch_versions = ch_versions.mix(BGZIP_VC_MUTECT2.out.versions) diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf index 7e955f3d66..0a8a6958f5 100644 --- a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf +++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf @@ -7,33 +7,39 @@ include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../../modules/n workflow RUN_HAPLOTYPECALLER { take: - cram_recalibrated_intervals - fasta - fasta_fai - dict - dbsnp - dbsnp_tbi - num_intervals - intervals_bed_combine_gz - intervals_bed_combine_gz_tbi + cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + dict // channel: [mandatory] + dbsnp // channel: [mandatory] + dbsnp_tbi // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + intervals_bed_combine_gz_tbi // channel: [optional] Contains a [bed.gz, bed.gz.tbi ]file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. main: ch_versions = Channel.empty() HAPLOTYPECALLER( - cram_recalibrated_intervals, + cram, fasta, fasta_fai, dict, dbsnp, dbsnp_tbi) + // Figure out if using intervals or no_intervals + HAPLOTYPECALLER.out.vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{haplotypecaller_vcf_branch} + // Only when no intervals - TABIX_VC_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) + TABIX_VC_HAPLOTYPECALLER(haplotypecaller_vcf_branch.no_intervals) // Only when using intervals - BGZIP_VC_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) + BGZIP_VC_HAPLOTYPECALLER(haplotypecaller_vcf_branch.intervals) CONCAT_HAPLOTYPECALLER( BGZIP_VC_HAPLOTYPECALLER.out.vcf @@ -43,48 +49,38 @@ workflow RUN_HAPLOTYPECALLER { [new_meta, vcf] }.groupTuple(size: num_intervals), fasta_fai, - intervals_bed_combine_gz) - - HAPLOTYPECALLER.out.vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{haplotypecaller_gvcf_intervals} + intervals_bed_gz) - HAPLOTYPECALLER.out.tbi.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{haplotypecaller_gvcf_tbi_intervals} - - haplotypecaller_gvcf = Channel.empty().mix( + haplotypecaller_vcf = Channel.empty().mix( CONCAT_HAPLOTYPECALLER.out.vcf, - haplotypecaller_gvcf_intervals.no_intervals) + haplotypecaller_vcf_branch.no_intervals) - haplotypecaller_gvcf_tbi = Channel.empty().mix( + haplotypecaller_vcf_tbi = Channel.empty().mix( CONCAT_HAPLOTYPECALLER.out.tbi, - haplotypecaller_gvcf_tbi_intervals.no_intervals) + haplotypecaller_vcf_branch.no_intervals) - genotype_gvcf_to_call = haplotypecaller_gvcf.join(haplotypecaller_gvcf_tbi) - .combine(intervals_bed_combine_gz_tbi) - .map{ - meta, gvcf, gvf_tbi, intervals, intervals_tbi -> - new_intervals = intervals.simpleName != "no_intervals" ? intervals : [] - new_intervals_tbi = intervals_tbi.simpleName != "no_intervals" ? intervals_tbi : [] - [meta, gvcf, gvf_tbi, new_intervals, new_intervals_tbi] - } + // genotype_gvcf_to_call = haplotypecaller_gvcf.join(haplotypecaller_gvcf_tbi) + // .combine(intervals_bed_combine_gz_tbi) + // .map{ + // meta, gvcf, gvf_tbi, intervals, intervals_tbi -> + // new_intervals = intervals.simpleName != "no_intervals" ? intervals : [] + // new_intervals_tbi = intervals_tbi.simpleName != "no_intervals" ? intervals_tbi : [] + // [meta, gvcf, gvf_tbi, new_intervals, new_intervals_tbi] + // } // GENOTYPEGVCFS - GENOTYPEGVCFS( - genotype_gvcf_to_call, - fasta, - fasta_fai, - dict, - dbsnp, - dbsnp_tbi) + // GENOTYPEGVCFS( + // genotype_gvcf_to_call, + // fasta, + // fasta_fai, + // dict, + // dbsnp, + // dbsnp_tbi) + //workflow haplotypecaller (default mode)-> CNNScoreVariants + //workflow haplotypecaller (ERC mode) -> GenomicsDBimport -> GenotypeGVCFs -> VQSR - genotype_gvcf = GENOTYPEGVCFS.out.vcf + //genotype_gvcf = GENOTYPEGVCFS.out.vcf // if (joint_germline) { // run_haplotypecaller = false @@ -100,9 +96,9 @@ workflow RUN_HAPLOTYPECALLER { // dbsnp, // dbsnp_tbi, // "joined", - // allelespecific? - // resources? - // annotation? + // allelespecific? + // resources? + // annotation? // "BOTH", // true, // truthsensitivity -> parameter or module? From 9cf36eedcd0f41cad1dea40c2ec820b78b3d39f8 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 22:25:39 +0100 Subject: [PATCH 45/52] Add in mutect2 somatic --- conf/modules.config | 33 +++- subworkflows/local/pair_variant_calling.nf | 35 ++-- subworkflows/local/tumor_variant_calling.nf | 5 +- .../main.nf | 177 ++++++++++-------- .../main.nf | 4 +- 5 files changed, 143 insertions(+), 111 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6d5a560f01..b5af30000a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -643,7 +643,7 @@ process{ ] } - withName: 'FILTERMUTECTCALLS'{ + withName: 'FILTERMUTECTCALLS.*'{ ext.prefix = {"${meta.id}.filtered"} publishDir = [ enabled: true, @@ -652,7 +652,7 @@ process{ ] } - withName: 'GATHERPILEUPSUMMARIES' { + withName: 'GATHERPILEUPSUMMARIES.*' { ext.prefix = { "${meta.id}.table" } publishDir = [ enabled: "${!params.no_intervals}", @@ -661,7 +661,7 @@ process{ ] } - withName: 'GETPILEUPSUMMARIES' { + withName: 'GETPILEUPSUMMARIES.*' { publishDir = [ enabled: "${params.no_intervals}", mode: params.publish_dir_mode, @@ -689,19 +689,38 @@ process{ ] } - - // PAIR_VARIANT_CALLING + //MANTA withName: 'CONCAT_MANTA_SOMATIC' { ext.prefix = {"${meta.id}.somatic_sv"} } - withName: 'CONCAT_MUTECT2' { + //MUTECT2 + withName: 'CALCULATECONTAMINATION'{ + //ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, + ] + } + + withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING:GATHERPILEUPSUMMARIES.*' { + ext.prefix = { "${meta.id}.table" } publishDir = [ enabled: "${!params.no_intervals}", mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } + //use ${meta.tumor_id}_vs_${meta_normal_id} to publish in the same directory as the remainders of the + //somatic output whilst keeping the filename prefix identifieable for status type + path: { "${params.outdir}/variant_calling/${meta.tumor_id}_vs_${meta_normal_id}/mutect2" } + ] + } + + withName: 'LEARNREADORIENTATIONMODEL'{ + ext.prefix = { "${meta.id}.learnreadorientationmodel" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, ] } diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 39ca161ef8..cefe12dfa7 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -101,23 +101,24 @@ workflow PAIR_VARIANT_CALLING { } if (tools.contains('mutect2')) { - // cram_pair_intervals.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> - // [meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai], intervals, ['normal']] - // }.set{cram_pair_mutect2} - - // GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING( - // cram_pair_mutect2, - // fasta, - // fasta_fai, - // dict, - // germline_resource, - // germline_resource_tbi, - // panel_of_normals, - // panel_of_normals_tbi, - // no_intervals, - // num_intervals, - // intervals_bed_combine_gz - // ) + cram_pair_intervals.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> + [meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai], intervals, ['normal']] + }.set{cram_pair_mutect2} + + GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING( + cram_pair_mutect2, + fasta, + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + panel_of_normals, + panel_of_normals_tbi, + intervals_bed_combine_gz, + num_intervals + ) + + // mutect2_vcf = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.mutect2_vcf // ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions) } diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 3ea3d34489..59ee2d5cea 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -82,9 +82,8 @@ workflow TUMOR_ONLY_VARIANT_CALLING { germline_resource_tbi, panel_of_normals, panel_of_normals_tbi, - num_intervals, - no_intervals, - intervals_bed_combine_gz) + intervals_bed_combine_gz, + num_intervals) mutect2_vcf = GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf ch_versions = ch_versions.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.versions) diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf index 754d65dc5b..da1705017c 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf @@ -24,9 +24,8 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { germline_resource_tbi // channel: /path/to/germline/index panel_of_normals // channel: /path/to/panel/of/normals panel_of_normals_tbi // channel: /path/to/panel/of/normals/index - no_intervals - num_intervals intervals_bed_combine_gz + num_intervals main: ch_versions = Channel.empty() @@ -40,108 +39,124 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { // //Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of ch_mutect2_in, //to ensure correct file order for calculatecontamination. + input.multiMap{ meta, input_list, input_index_list, intervals, which_norm -> + tumor: [ meta, input_list[1], input_index_list[1], intervals ] + normal: [ meta, input_list[0], input_index_list[0], intervals ] + }.set{pileup} - pileup_tumor_input = input.map { - meta, input_list, input_index_list, intervals, which_norm -> - tumor_id = meta.tumor_id - id = intervals ? tumor_id + "_" + intervals.baseName : tumor_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], input_list[1], input_index_list[1], intervals] - } - - pileup_normal_input = input.map { - meta, input_list, input_index_list, intervals, which_norm -> - normal_id = meta.normal_id - id = intervals ? normal_id + "_" + intervals.baseName : normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], input_list[0], input_index_list[0], intervals] - } - GETPILEUPSUMMARIES_TUMOR ( pileup_tumor_input, fasta, fai, dict, germline_resource, germline_resource_tbi ) - GETPILEUPSUMMARIES_NORMAL ( pileup_normal_input, fasta, fai, dict, germline_resource, germline_resource_tbi ) + GETPILEUPSUMMARIES_TUMOR ( pileup.tumor, fasta, fai, dict, germline_resource, germline_resource_tbi ) + GETPILEUPSUMMARIES_NORMAL ( pileup.normal, fasta, fai, dict, germline_resource, germline_resource_tbi ) ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions) - if(no_intervals){ - mutect2_vcf_gz_tbi = MUTECT2.out.vcf.join(MUTECT2.out.tbi) - mutect2_stats = MUTECT2.out.stats - pileup_table_tumor= GETPILEUPSUMMARIES_TUMOR.out.table - pileup_table_normal= GETPILEUPSUMMARIES_NORMAL.out.table + // Figure out if using intervals or no_intervals + MUTECT2.out.vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_vcf } - }else{ + MUTECT2.out.tbi.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_tbi } - //Merge Mutect2 VCF - BGZIP_MUTECT2(MUTECT2.out.vcf) - - BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], vcf] - }.set{bgzip_mutect2} + MUTECT2.out.stats.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_stats } - mutect2_vcf_to_concat = bgzip_mutect2.groupTuple(size: num_intervals) + GETPILEUPSUMMARIES_NORMAL.out.table.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }set{ pileup_table_normal } - CONCAT_VCF_MUTECT2(mutect2_vcf_to_concat, fai, intervals_bed_combine_gz) - mutect2_vcf_gz_tbi = CONCAT_VCF_MUTECT2.out.vcf.join(CONCAT_VCF_MUTECT2.out.tbi) + GETPILEUPSUMMARIES_TUMOR.out.table.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }set{ pileup_table_tumor } - ch_versions = ch_versions.mix(BGZIP_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MUTECT2.out.versions) - //Merge Muteect2 Stats - MUTECT2.out.stats.map{ meta, stats -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], stats] - }.groupTuple(size: num_intervals).set{mutect2_stats_to_merge} + //Only when using intervals - MERGEMUTECTSTATS(mutect2_stats_to_merge) - mutect2_stats = MERGEMUTECTSTATS.out.stats - ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) + //Merge Mutect2 VCF + BGZIP_MUTECT2(MUTECT2.out.vcf) - //Merge Pileup Summaries - pileup_tumor_tables_to_gather = GETPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> - [[id: meta.tumor_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.groupTuple(size: num_intervals) - - GATHERPILEUPSUMMARIES_TUMOR(pileup_tumor_tables_to_gather, dict) - GATHERPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.set{pileup_table_tumor} - - pileup_normal_tables_to_gather = GETPILEUPSUMMARIES_NORMAL.out.table.map{ meta, table -> + CONCAT_MUTECT2( + BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], vcf] + }.groupTuple(size: num_intervals), + fai, + intervals_bed_combine_gz) + + mutect2_vcf = Channel.empty().mix( + CONCAT_MUTECT2.out.vcf, + mutect2_vcf.no_intervals) + + mutect2_tbi = Channel.empty().mix( + CONCAT_MUTECT2.out.tbi, + mutect2_tbi.no_intervals) + + ch_versions = ch_versions.mix(BGZIP_MUTECT2.out.versions) + ch_versions = ch_versions.mix(CONCAT_MUTECT2.out.versions) + + //Merge Muteect2 Stats + MERGEMUTECTSTATS(mutect2_stats.intervals.map{ meta, stats -> + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], stats] + }.groupTuple(size: num_intervals)) + + mutect2_stats = Channel.empty().mix( + MERGEMUTECTSTATS.out.stats, + mutect2_stats.no_intervals) + + //Merge Pileup Summaries + GATHERPILEUPSUMMARIES_NORMAL( + GETPILEUPSUMMARIES_NORMAL.out.table.map{ meta, table -> [[id: meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.groupTuple(size: num_intervals) + }.groupTuple(size: num_intervals), + dict) - GATHERPILEUPSUMMARIES_NORMAL(pileup_normal_tables_to_gather, dict) + gather_table_normal = Channel.empty().mix( GATHERPILEUPSUMMARIES_NORMAL.out.table.map{ meta, table -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.set{pileup_table_normal} + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] + }, + pileup_table_normal.no_intervals) + + GATHERPILEUPSUMMARIES_TUMOR( GETPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> + [[id: meta.tumor_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] + }.groupTuple(size: num_intervals), + dict) - } + gather_table_tumor = Channel.empty().mix( + GATHERPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] + }, + pileup_table_tumor.no_intervals) // //Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2. // MUTECT2.out.f1r2.map{ meta, f1f2 -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], f1f2] + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], f1f2] }.groupTuple(size: num_intervals) .set{ch_learnread_in} LEARNREADORIENTATIONMODEL (ch_learnread_in) - ch_versions = ch_versions.mix(LEARNREADORIENTATIONMODEL.out.versions) // //Contamination and segmentation tables created using calculatecontamination on the pileup summary table. // - ch_calccon_in = pileup_table_tumor.join(pileup_table_normal) + ch_calccon_in = gather_table_tumor.join(gather_table_normal) CALCULATECONTAMINATION ( ch_calccon_in, true ) ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) // //Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables. // - ch_filtermutect = mutect2_vcf_gz_tbi.join(mutect2_stats) - .join(LEARNREADORIENTATIONMODEL.out.artifactprior) - .join(CALCULATECONTAMINATION.out.segmentation) - .join(CALCULATECONTAMINATION.out.contamination) + ch_filtermutect = mutect2_vcf.join(mutect2_tbi) + .join(mutect2_stats) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior) + .join(CALCULATECONTAMINATION.out.segmentation) + .join(CALCULATECONTAMINATION.out.contamination) ch_filtermutect.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [meta, vcf, tbi, stats, orientation, seg, cont, []] }.set{ch_filtermutect_in} @@ -150,21 +165,21 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) emit: - mutect2_vcf_gz_tbi = mutect2_vcf_gz_tbi // channel: [ val(meta), [ vcf ] ] - mutect2_stats = mutect2_stats // channel: [ val(meta), [ stats ] ] - //mutect2_f1r2 = MUTECT2.out.f1r2 // channel: [ val(meta), [ f1r2 ] ] + mutect2_vcf = mutect2_vcf // channel: [ val(meta), [ vcf ] ] + mutect2_stats = mutect2_stats // channel: [ val(meta), [ stats ] ] + mutect2_f1r2 = MUTECT2.out.f1r2 // channel: [ val(meta), [ f1r2 ] ] - artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ val(meta), [ artifactprior ] ] + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ val(meta), [ artifactprior ] ] - pileup_table_tumor = pileup_table_tumor // channel: [ val(meta), [ table_tumor ] ] - pileup_table_normal = pileup_table_normal // channel: [ val(meta), [ table_normal ] ] + pileup_table_tumor = gather_table_tumor // channel: [ val(meta), [ table_tumor ] ] + pileup_table_normal = gather_table_normal // channel: [ val(meta), [ table_normal ] ] - contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] - segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] + contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] + segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] - filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] - filtered_tbi = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] - filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] + filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] + filtered_tbi = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] + filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index 7a1fa4a0e3..e83d287dea 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -21,9 +21,8 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { germline_resource_tbi // channel: /path/to/germline/index panel_of_normals // channel: /path/to/panel/of/normals panel_of_normals_tbi // channel: /path/to/panel/of/normals/index - num_intervals - no_intervals intervals_bed_combine_gz + num_intervals main: ch_versions = Channel.empty() @@ -133,7 +132,6 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) ch_versions = ch_versions.mix(MUTECT2.out.versions) - emit: mutect2_vcf = mutect2_vcf // channel: [ val(meta), [ vcf ] ] mutect2_stats = MUTECT2.out.stats // channel: [ val(meta), [ stats ] ] From b75f378eca553d8206a08b11c620665d872eda5f Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 22:58:29 +0100 Subject: [PATCH 46/52] Add msisensorpro tests and fix bed file for it --- conf/modules.config | 10 ++- subworkflows/local/pair_variant_calling.nf | 13 ++-- subworkflows/local/prepare_intervals.nf | 3 +- tests/test_tools.yml | 79 ++++++++++++++++++++-- workflows/sarek.nf | 1 + 5 files changed, 92 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b5af30000a..afe4ca1721 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -712,7 +712,7 @@ process{ mode: params.publish_dir_mode, //use ${meta.tumor_id}_vs_${meta_normal_id} to publish in the same directory as the remainders of the //somatic output whilst keeping the filename prefix identifieable for status type - path: { "${params.outdir}/variant_calling/${meta.tumor_id}_vs_${meta_normal_id}/mutect2" } + path: { "${params.outdir}/variant_calling/${meta.tumor_id}_vs_${meta.normal_id}/mutect2" } ] } @@ -724,6 +724,14 @@ process{ ] } + //MSISENSORPRO + withName: 'MSISENSORPRO_MSI_SOMATIC'{ + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/msisensorpro" }, + ] + } + } //withName: 'GENOMICSDBIMPORT' { diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index cefe12dfa7..e3ddcc1b0d 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -17,8 +17,9 @@ workflow PAIR_VARIANT_CALLING { fasta_fai // channel: [mandatory] fasta_fai intervals // channel: [mandatory] intervals/target regions intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combined_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file + intervals_bed_combined_gz_tbi // channel: [mandatory] intervals/target regions all in one file zipped and indexed + intervals_bed_combine_gz // channel: [mandatory] intervals/target regions zipped in one file + intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS no_intervals msisensorpro_scan // channel: [optional] msisensorpro_scan @@ -95,7 +96,9 @@ workflow PAIR_VARIANT_CALLING { } if (tools.contains('msisensorpro')) { - MSISENSORPRO_MSI_SOMATIC(cram_pair_intervals, fasta, msisensorpro_scan) + + cram_pair_msisensor = cram_pair.combine(intervals_bed_combined) + MSISENSORPRO_MSI_SOMATIC(cram_pair_msisensor, fasta, msisensorpro_scan) ch_versions = ch_versions.mix(MSISENSORPRO_MSI_SOMATIC.out.versions) msisensorpro_output = msisensorpro_output.mix(MSISENSORPRO_MSI_SOMATIC.out.output_report) } @@ -118,8 +121,8 @@ workflow PAIR_VARIANT_CALLING { num_intervals ) - // mutect2_vcf = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.mutect2_vcf - // ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions) + mutect2_vcf = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.mutect2_vcf + ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions) } // if (tools.contains('tiddit')) { diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 87805b0fdf..1158b14ca5 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -24,7 +24,6 @@ workflow PREPARE_INTERVALS { ch_intervals = Channel.empty() ch_intervals_bed_gz_tbi = Channel.empty() ch_intervals_combined_bed_gz_tbi = Channel.empty() // Create bed.gz and bed.gz.tbi for input/or created interval file. Contains ALL regions. - tabix_in_combined = Channel.empty() if (params.no_intervals) { @@ -78,10 +77,10 @@ workflow PREPARE_INTERVALS { ch_versions = ch_versions.mix(TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.versions) } + emit: intervals_bed = ch_intervals // path: intervals.bed [intervals split for parallel execution] intervals_bed_gz_tbi = ch_intervals_bed_gz_tbi // path: target.bed.gz, target.bed.gz.tbi [intervals split for parallel execution] intervals_combined_bed_gz_tbi = ch_intervals_combined_bed_gz_tbi // path: interval.bed.gz, interval.bed.gz.tbi [all intervals in one file] - versions = ch_versions // channel: [ versions.yml ] } diff --git a/tests/test_tools.yml b/tests/test_tools.yml index 19bd555e82..983516e48b 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -245,16 +245,83 @@ - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.tbi - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.stats + - path: results/variant_calling/sample2/mutect2/sample2.contamination.table + - path: results/variant_calling/sample2/mutect2/sample2.segmentation.table + - path: results/variant_calling/sample2/mutect2/sample2.table.pileupsummaries.table + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.tbi + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.filteringStats.tsv -# - name: Run variant calling on tumoronly sample with mutect2 without intervals - -#- name: Run variant calling on somatic sample with mutect2 +- name: Run variant calling on tumoronly sample with mutect2 without intervals + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools mutect2 --no_intervals + tags: + - mutect2 + - no_intervals + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.tbi + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.stats + - path: results/variant_calling/sample2/mutect2/sample2.contamination.table + - path: results/variant_calling/sample2/mutect2/sample2.segmentation.table + - path: results/variant_calling/sample2/mutect2/sample2.table.pileupsummaries.table + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.tbi + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.filteringStats.tsv -#- name: Run variant calling on somatic sample with mutect2 without intervals +- name: Run variant calling on somatic sample with mutect2 + command: nextflow run main.nf -profile test,tools_somatic,docker --tools mutect2 + tags: + - mutect2 + - somatic + - variant_calling + files: + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.stats + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.contamination.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.segmentation.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.table.pileupsummaries.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.f1f2.tar.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.learnreadorientationmodel.tar.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.filteringStats.tsv -#- name: Run variant calling on somatic sample with msisensor-pro +- name: Run variant calling on somatic sample with mutect2 without intervals + command: nextflow run main.nf -profile test,tools_somatic,docker --tools mutect2 --no_intervals + tags: + - mutect2 + - no_intervals + - somatic + - variant_calling + files: + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.stats + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.contamination.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.segmentation.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.table.pileupsummaries.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample3.table.pileupsummaries.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4.table.pileupsummaries.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.f1f2.tar.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.learnreadorientationmodel.tar.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.filteringStats.tsv -#- name: Run variant calling on somatic sample with msisensor-pro without intervals +- name: Run variant calling on somatic sample with msisensor-pro + command: nextflow run main.nf -profile test,tools_somatic,docker --tools msisensorpro + tags: + - mutect2 + - somatic + - variant_calling + files: + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3 + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3_dis + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3_germline + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3_somatic - name: Run variant calling on germline sample with strelka command: nextflow run main.nf -profile test,tools_germline,docker --tools strelka diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 2b86813074..8bc276aefc 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -623,6 +623,7 @@ workflow SAREK { intervals_bed_gz_tbi, intervals_bed_combined_gz_tbi, intervals_bed_combined_gz, + intervals_bed_combined, num_intervals, params.no_intervals, msisensorpro_scan, From 96b27fc4941eff075fb09ceb339a021e12ffbd5b Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 23:00:54 +0100 Subject: [PATCH 47/52] linting --- .github/workflows/ci.yml | 2 +- .../gatk4/tumor_normal_somatic_variant_calling/main.nf | 10 +++++----- .../gatk4/tumor_only_somatic_variant_calling/main.nf | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca0bbb3532..6ad394137c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,7 +40,7 @@ jobs: - "haplotypecaller" - "manta" - "mutect2" - #- "msisensorpro" + - "msisensorpro" # - 'save_bam_mapped' - "skip_markduplicates" - "strelka" diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf index da1705017c..6abe74c9e0 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf @@ -152,11 +152,11 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { // //Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables. // - ch_filtermutect = mutect2_vcf.join(mutect2_tbi) - .join(mutect2_stats) - .join(LEARNREADORIENTATIONMODEL.out.artifactprior) - .join(CALCULATECONTAMINATION.out.segmentation) - .join(CALCULATECONTAMINATION.out.contamination) + ch_filtermutect = mutect2_vcf.join(mutect2_tbi) + .join(mutect2_stats) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior) + .join(CALCULATECONTAMINATION.out.segmentation) + .join(CALCULATECONTAMINATION.out.contamination) ch_filtermutect.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [meta, vcf, tbi, stats, orientation, seg, cont, []] }.set{ch_filtermutect_in} diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index e83d287dea..4985f2891d 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -115,10 +115,10 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { // //Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables. // - ch_filtermutect = mutect2_vcf.join(mutect2_tbi) - .join(mutect2_stats) - .join(CALCULATECONTAMINATION.out.segmentation) - .join(CALCULATECONTAMINATION.out.contamination) + ch_filtermutect = mutect2_vcf.join(mutect2_tbi) + .join(mutect2_stats) + .join(CALCULATECONTAMINATION.out.segmentation) + .join(CALCULATECONTAMINATION.out.contamination) ch_filtermutect_in = ch_filtermutect.map{ meta, vcf, tbi, stats, seg, cont -> [meta, vcf, tbi, stats, [], seg, cont, []] } FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) From 595d1e84a54107f870df1600884aa0abb185d80b Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 23:15:59 +0100 Subject: [PATCH 48/52] Set back ressource values for tests --- conf/test.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index 97d7814731..ac0f738b2f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,8 +15,8 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 4 - max_memory = '8.GB' + max_cpus = 2 + max_memory = '6.GB' max_time = '8.h' // Input data From eb82a8debb3a50137463994ec521c20a196a020b Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 23 Mar 2022 23:16:51 +0100 Subject: [PATCH 49/52] Always publish ceepvariant gvcf they are generated anyways --- conf/modules.config | 10 +--------- tests/test_tools.yml | 27 ++------------------------- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index afe4ca1721..004891d125 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -473,18 +473,10 @@ process{ enabled: "${params.no_intervals}", mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" //TODO gvcf should only be outputted when --generate_gvcf is set - ] - } - withName : 'TABIX_VC_DEEPVARIANT_GVCF' { - publishDir = [ - enabled: "${params.generate_gvcf}", - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName : 'TABIX_VC_DEEPVARIANT_VCF' { + withName : 'TABIX_VC_DEEPVARIANT.*' { publishDir = [ enabled: true, mode: params.publish_dir_mode, diff --git a/tests/test_tools.yml b/tests/test_tools.yml index 983516e48b..7633969caa 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -4,40 +4,17 @@ - deepvariant - germline - variant_calling - files: - - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz - - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi - -- name: Run variant calling on germline sample with deepvariant without intervals - command: nextflow run main.nf -profile test,tools_germline,docker --tools deepvariant --no_intervals - tags: - - deepvariant - - germline - - no_intervals - - variant_calling - files: - - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz - - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi - -- name: Run variant calling on germline sample with deepvariant and generate gvcf - command: nextflow run main.nf -profile test,tools_germline,docker --tools deepvariant --generate_gvcf - tags: - - deepvariant - - germline - - gvcf - - variant_calling files: - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz.tbi - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi -- name: Run variant calling on germline sample with deepvariant without intervals and generate gvcf - command: nextflow run main.nf -profile test,tools_germline,docker --tools deepvariant --generate_gvcf --no_intervals +- name: Run variant calling on germline sample with deepvariant without intervals + command: nextflow run main.nf -profile test,tools_germline,docker --tools deepvariant --no_intervals tags: - deepvariant - germline - - gvcf - no_intervals - variant_calling files: From 9e242f5b20df6ddcb6b44bf9adc9887ddd8a4b8e Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 24 Mar 2022 00:16:11 +0100 Subject: [PATCH 50/52] Correct strelka and msisensorpro output paths --- conf/modules.config | 11 ++++++++--- tests/test_tools.yml | 10 +++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 004891d125..9bb1496286 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -452,9 +452,6 @@ process{ } // DEEPVARIANT - withName: 'BGZIP_VC_DEEPVARIANT_GVCF' { - ext.when = { params.generate_gvcf && !params.no_intervals } - } withName: 'CONCAT_DEEPVARIANT_.*' { publishDir = [ enabled: "${!params.no_intervals}", @@ -724,6 +721,14 @@ process{ ] } + //STRELKA + withName: 'CONCAT_STRELKA_INDELS' { + ext.prefix = {"${meta.id}.somatic_indels"} + } + withName: 'CONCAT_STRELKA_SNVS' { + ext.prefix = {"${meta.id}.somatic_snvs"} + } + } //withName: 'GENOMICSDBIMPORT' { diff --git a/tests/test_tools.yml b/tests/test_tools.yml index 7633969caa..cc2c31f54a 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -291,14 +291,14 @@ - name: Run variant calling on somatic sample with msisensor-pro command: nextflow run main.nf -profile test,tools_somatic,docker --tools msisensorpro tags: - - mutect2 + - msisensorpro - somatic - variant_calling files: - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3 - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3_dis - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3_germline - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3_somatic + - path: results/variant_calling/sample4_vs_sample3/msisensorpro/sample4_vs_sample3 + - path: results/variant_calling/sample4_vs_sample3/msisensorpro/sample4_vs_sample3_dis + - path: results/variant_calling/sample4_vs_sample3/msisensorpro/sample4_vs_sample3_germline + - path: results/variant_calling/sample4_vs_sample3/msisensorpro/sample4_vs_sample3_somatic - name: Run variant calling on germline sample with strelka command: nextflow run main.nf -profile test,tools_germline,docker --tools strelka From 0858cf2d715fb65d0e9843178b6b904393038a17 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 24 Mar 2022 09:40:00 +0100 Subject: [PATCH 51/52] Fix num_intervals when no_intervals --- .../main.nf | 23 +++++++++---------- tests/test_tools.yml | 20 ++++++++-------- workflows/sarek.nf | 2 +- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index 4985f2891d..97ab85b85f 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -45,27 +45,26 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { MUTECT2.out.vcf.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 - }.set{ mutect2_vcf } + }.set{ mutect2_vcf_branch } MUTECT2.out.tbi.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 - }.set{ mutect2_tbi } + }.set{ mutect2_tbi_branch } MUTECT2.out.stats.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 - }.set{ mutect2_stats } + }.set{ mutect2_stats_branch } GETPILEUPSUMMARIES.out.table.branch{ intervals: num_intervals > 1 no_intervals: num_intervals == 1 - }set{ pileup_table } - + }set{ pileup_table_branch } //Only when using intervals //Merge Mutect2 VCF - BGZIP_VC_MUTECT2(mutect2_vcf.intervals) + BGZIP_VC_MUTECT2(mutect2_vcf_branch.intervals) CONCAT_MUTECT2(BGZIP_VC_MUTECT2.out.vcf.map{ meta, vcf -> new_meta = meta.clone() @@ -77,14 +76,14 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { mutect2_vcf = Channel.empty().mix( CONCAT_MUTECT2.out.vcf, - mutect2_vcf.no_intervals) + mutect2_vcf_branch.no_intervals) mutect2_tbi = Channel.empty().mix( CONCAT_MUTECT2.out.tbi, - mutect2_tbi.no_intervals) + mutect2_tbi_branch.no_intervals) //Merge Muteect2 Stats - MERGEMUTECTSTATS(mutect2_stats.intervals.map{ meta, stats -> + MERGEMUTECTSTATS(mutect2_stats_branch.intervals.map{ meta, stats -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, stats] @@ -92,7 +91,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { mutect2_stats = Channel.empty().mix( MERGEMUTECTSTATS.out.stats, - mutect2_stats.no_intervals) + mutect2_stats_branch.no_intervals) //Merge Pileup Summaries GATHERPILEUPSUMMARIES( GETPILEUPSUMMARIES.out.table.map{ meta, table -> @@ -104,7 +103,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { pileup_table = Channel.empty().mix( GATHERPILEUPSUMMARIES.out.table, - pileup_table.no_intervals) + pileup_table_branch.no_intervals) // //Contamination and segmentation tables created using calculatecontamination on the pileup summary table. @@ -134,7 +133,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { emit: mutect2_vcf = mutect2_vcf // channel: [ val(meta), [ vcf ] ] - mutect2_stats = MUTECT2.out.stats // channel: [ val(meta), [ stats ] ] + mutect2_stats = mutect2_stats // channel: [ val(meta), [ stats ] ] pileup_table = pileup_table // channel: [ val(meta), [ table ] ] diff --git a/tests/test_tools.yml b/tests/test_tools.yml index cc2c31f54a..c372f93a8f 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -227,7 +227,7 @@ - path: results/variant_calling/sample2/mutect2/sample2.table.pileupsummaries.table - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.tbi - - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.filteringStats.tsv + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.filteringStats.tsv - name: Run variant calling on tumoronly sample with mutect2 without intervals command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools mutect2 --no_intervals @@ -242,10 +242,10 @@ - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.stats - path: results/variant_calling/sample2/mutect2/sample2.contamination.table - path: results/variant_calling/sample2/mutect2/sample2.segmentation.table - - path: results/variant_calling/sample2/mutect2/sample2.table.pileupsummaries.table + - path: results/variant_calling/sample2/mutect2/sample2.pileups.table - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.tbi - - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.filteringStats.tsv + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.filteringStats.tsv - name: Run variant calling on somatic sample with mutect2 command: nextflow run main.nf -profile test,tools_somatic,docker --tools mutect2 @@ -259,12 +259,12 @@ - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.stats - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.contamination.table - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.segmentation.table - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.table.pileupsummaries.table - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.f1f2.tar.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample3.table.pileupsummaries.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4.table.pileupsummaries.table - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.learnreadorientationmodel.tar.gz - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.tbi - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.filteringStats.tsv + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.filteringStats.tsv - name: Run variant calling on somatic sample with mutect2 without intervals command: nextflow run main.nf -profile test,tools_somatic,docker --tools mutect2 --no_intervals @@ -279,14 +279,12 @@ - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.stats - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.contamination.table - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.segmentation.table - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.table.pileupsummaries.table - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample3.table.pileupsummaries.table - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4.table.pileupsummaries.table - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.f1f2.tar.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample3.pileups.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4.pileups.table - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.learnreadorientationmodel.tar.gz - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.tbi - - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.filteringStats.tsv + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.filteringStats.tsv - name: Run variant calling on somatic sample with msisensor-pro command: nextflow run main.nf -profile test,tools_somatic,docker --tools msisensorpro diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 8bc276aefc..49447a55a8 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -252,7 +252,7 @@ workflow SAREK { intervals_bed_combined_gz = intervals_bed_combined_gz_tbi.map{ bed, tbi -> [bed]}.collect() // one file containing all intervals interval.bed.gz file intervals_for_preprocessing = (!params.wes || params.no_intervals) ? [] : PREPARE_INTERVALS.out.intervals_bed //TODO: intervals also with WGS data? Probably need a parameter if WGS for deepvariant tool, that would allow to check here too - num_intervals = params.intervals ? count_intervals(file(params.intervals)) : 1 + num_intervals = !params.no_intervals ? (params.intervals ? count_intervals(file(params.intervals)) : 1) : 1 // Gather used softwares versions ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) From 38bb8defe8cc40d87c4af5c6dda42b244a452772 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 24 Mar 2022 10:59:14 +0100 Subject: [PATCH 52/52] indent from review & add meta clone in the hopes it fixes Concurrentmodification --- conf/modules.config | 7 ++----- subworkflows/local/split_fastq.nf | 11 +++++++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9bb1496286..dc243fdcb8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -227,10 +227,6 @@ process{ process { withName: "SEQKIT_SPLIT2" { - //For unknown reasons occasionally 'java.util.ConcurrentModificationException' is thrown - //Rerunning usually works - //TODO not sure this works, since the error appears randomly - errorStrategy = {task.exitStatus == 'java.util.ConcurrentModificationException' ? 'retry' : 'ignore'} ext.args = { "--by-size ${params.split_fastq}" } ext.when = { params.split_fastq > 1 } publishDir = [ @@ -517,7 +513,7 @@ process{ ] } withName: 'HAPLOTYPECALLER' { - ext.args = { params.joint_germline ? "-ERC GVCF" : "" } + ext.args = { params.joint_germline ? "-ERC GVCF" : "" } ext.prefix = {"${meta.id}.g"} ext.when = { params.tools && params.tools.contains('haplotypecaller') } publishDir = [ @@ -643,6 +639,7 @@ process{ withName: 'GATHERPILEUPSUMMARIES.*' { ext.prefix = { "${meta.id}.table" } + ext.when = { "${!params.no_intervals}"} publishDir = [ enabled: "${!params.no_intervals}", mode: params.publish_dir_mode, diff --git a/subworkflows/local/split_fastq.nf b/subworkflows/local/split_fastq.nf index 765ebed99c..14ea551d6e 100644 --- a/subworkflows/local/split_fastq.nf +++ b/subworkflows/local/split_fastq.nf @@ -15,8 +15,15 @@ workflow SPLIT_FASTQ { ch_versions = Channel.empty() reads_no_split = reads_input.map{ meta, reads -> - meta.size = 1 - [meta, reads] + [[ id:meta.id, + patient:meta.patient, + sample:meta.sample, + gender:meta.gender, + status:meta.status, + numLanes:meta.numLanes, + read_group: meta.read_group, + data_type:meta.data_type, + size:1], reads] } // Only if we want to split fastq files