diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8cf26fac54..6ad394137c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,25 +25,28 @@ jobs: # Nextflow versions include: # Test pipeline minimum Nextflow version - - NXF_VER: '21.10.3' - NXF_EDGE: '' + - NXF_VER: "21.10.3" + NXF_EDGE: "" # Test latest edge release of Nextflow - - NXF_VER: '' - NXF_EDGE: '1' + - NXF_VER: "" + NXF_EDGE: "1" test: - - 'aligner' - - 'annotation' - - 'default' - - 'deepvariant' - - 'gatk4_spark' - - 'haplotypecaller' - - 'manta' + - "aligner" + - "annotation" + - "default" + - "deepvariant" + - "freebayes" + - "gatk4_spark" + - "haplotypecaller" + - "manta" + - "mutect2" + - "msisensorpro" # - 'save_bam_mapped' - - 'skip_markduplicates' - - 'strelka' - - 'split_fastq' - - 'targeted' - - 'tumor_normal_pair' + - "skip_markduplicates" + - "strelka" + - "split_fastq" + - "targeted" + - "tumor_normal_pair" steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -61,7 +64,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: "3.x" - name: Install dependencies run: python -m pip install --upgrade pip pytest-workflow diff --git a/conf/modules.config b/conf/modules.config index 4f2b06b0a6..dc243fdcb8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -448,9 +448,6 @@ process{ } // DEEPVARIANT - withName: 'BGZIP_VC_DEEPVARIANT_GVCF' { - ext.when = { params.generate_gvcf && !params.no_intervals } - } withName: 'CONCAT_DEEPVARIANT_.*' { publishDir = [ enabled: "${!params.no_intervals}", @@ -472,15 +469,7 @@ process{ pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName : 'TABIX_VC_DEEPVARIANT_GVCF' { - publishDir = [ - enabled: "${params.generate_gvcf}", - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - withName : 'TABIX_VC_DEEPVARIANT_VCF' { + withName : 'TABIX_VC_DEEPVARIANT.*' { publishDir = [ enabled: true, mode: params.publish_dir_mode, @@ -524,7 +513,7 @@ process{ ] } withName: 'HAPLOTYPECALLER' { - ext.args = '-ERC GVCF' + ext.args = { params.joint_germline ? "-ERC GVCF" : "" } ext.prefix = {"${meta.id}.g"} ext.when = { params.tools && params.tools.contains('haplotypecaller') } publishDir = [ @@ -535,7 +524,7 @@ process{ ] } withName: 'GENOTYPEGVCFS' { - ext.when = { params.tools && params.tools.contains('haplotypecaller') } + ext.when = { params.tools && params.tools.contains('haplotypecaller') && params.joint_germline} publishDir = [ enabled: true, mode: params.publish_dir_mode, @@ -616,73 +605,129 @@ process{ // TUMOR_VARIANT_CALLING - withName: 'MERGEMUTECTSTATS' { - ext.prefix = { "${meta.id}.vcf.gz" } - } - withName: 'GATHERPILEUPSUMMARIES' { - ext.prefix = { "${meta.id}.table" } + //MANTA + withName: 'CONCAT_MANTA_TUMOR' { + ext.prefix = {"${meta.id}.tumor_sv"} } -// PAIR_VARIANT_CALLING + //MUTECT2 + withName: 'GATK4_CALCULATECONTAMINATION' { + publishDir = [ + enabled: true, + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } + ] + } - withName: 'MUTECT2'{ + withName: 'CONCAT_MUTECT2.*' { publishDir = [ - enabled: "${params.no_intervals}", + enabled: "${!params.no_intervals}", mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName: 'GATK4_MUTECT2'{ + + withName: 'FILTERMUTECTCALLS.*'{ + ext.prefix = {"${meta.id}.filtered"} publishDir = [ - enabled: "${params.no_intervals}", + enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] } - withName: 'CONCAT_MUTECT2' { + + withName: 'GATHERPILEUPSUMMARIES.*' { + ext.prefix = { "${meta.id}.table" } + ext.when = { "${!params.no_intervals}"} publishDir = [ enabled: "${!params.no_intervals}", mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] } - withName: 'GATK4_MERGEMUTECTSTATS' { - publishDir = [ - enabled: true, + + withName: 'GETPILEUPSUMMARIES.*' { + publishDir = [ + enabled: "${params.no_intervals}", mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] } - withName: 'GATK4_FILTERMUTECTCALLS'{ - ext.prefix = {"${meta.id}.filtered."} + + withName: 'MERGEMUTECTSTATS' { + ext.prefix = { "${meta.id}.vcf.gz" } publishDir = [ enabled: true, mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" } ] } + + withName: 'MUTECT2'{ + ext.when = { params.tools && params.tools.contains('mutect2') } + ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" } + publishDir = [ + enabled: "${params.no_intervals}", + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, + pattern: "*{gz,gz.tbi,stats}" + ] + } + +// PAIR_VARIANT_CALLING + + //MANTA + withName: 'CONCAT_MANTA_SOMATIC' { + ext.prefix = {"${meta.id}.somatic_sv"} + } + + //MUTECT2 + withName: 'CALCULATECONTAMINATION'{ + //ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, + ] + } + + withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING:GATHERPILEUPSUMMARIES.*' { + ext.prefix = { "${meta.id}.table" } + publishDir = [ + enabled: "${!params.no_intervals}", + mode: params.publish_dir_mode, + //use ${meta.tumor_id}_vs_${meta_normal_id} to publish in the same directory as the remainders of the + //somatic output whilst keeping the filename prefix identifieable for status type + path: { "${params.outdir}/variant_calling/${meta.tumor_id}_vs_${meta.normal_id}/mutect2" } + ] + } + + withName: 'LEARNREADORIENTATIONMODEL'{ + ext.prefix = { "${meta.id}.learnreadorientationmodel" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, + ] + } + + //MSISENSORPRO + withName: 'MSISENSORPRO_MSI_SOMATIC'{ + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/msisensorpro" }, + ] + } + + //STRELKA + withName: 'CONCAT_STRELKA_INDELS' { + ext.prefix = {"${meta.id}.somatic_indels"} + } + withName: 'CONCAT_STRELKA_SNVS' { + ext.prefix = {"${meta.id}.somatic_snvs"} + } + } -// withName: 'GATK4_CALCULATECONTAMINATION'{ -// ext.args = '' -// publishDir = [ -// enabled: false, -// mode: params.publish_dir_mode -// ] -//} -//withName: 'GATK4_FILTERMUTECTCALLS'{ -// ext.args = '' -// publishDir = [ -// enabled: false, -// mode: params.publish_dir_mode -// ] -//} -//withName: 'GATK4_GETPILEUPSUMMARIES'{ -// ext.args = '' -// publishDir = [ -// enabled: false, -// mode: params.publish_dir_mode -// ] -//} + //withName: 'GENOMICSDBIMPORT' { // //} diff --git a/conf/test.config b/conf/test.config index 706d1daab3..ac0f738b2f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -91,6 +91,30 @@ profiles { params.input = "${baseDir}/tests/csv/3.0/recalibrated_germline.csv" params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + params.step = 'variant_calling' + params.joint_germline = true + params.wes = true + params.genome = 'WBcel235' + params.vep_genome = 'WBcel235' + } + tools_tumoronly { + params.input = "${baseDir}/tests/csv/3.0/recalibrated_tumoronly.csv" + params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" + params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" + params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" + params.step = 'variant_calling' + params.joint_germline = true + params.wes = true + params.genome = 'WBcel235' + params.vep_genome = 'WBcel235' + } + tools_somatic { + params.input = "${baseDir}/tests/csv/3.0/recalibrated_somatic.csv" + params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" + params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" diff --git a/modules.json b/modules.json index 4a82d6e626..3aa090862a 100644 --- a/modules.json +++ b/modules.json @@ -3,6 +3,9 @@ "homePage": "https://github.com/nf-core/sarek", "repos": { "nf-core/modules": { + "ascat": { + "git_sha": "d6244b42f596fa26d2ecba4ce862755821ed9da8" + }, "bcftools/stats": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -24,6 +27,9 @@ "cnvkit/batch": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "controlfreec": { + "git_sha": "c189835b1bb444e5ee87416fdbea66e2c2ba365e" + }, "custom/dumpsoftwareversions": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -108,6 +114,15 @@ "gatk4/variantrecalibrator": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "manta/germline": { + "git_sha": "979e57b7ac6a405a395dd7a6dbe1a275c5bc226b" + }, + "manta/somatic": { + "git_sha": "979e57b7ac6a405a395dd7a6dbe1a275c5bc226b" + }, + "manta/tumoronly": { + "git_sha": "979e57b7ac6a405a395dd7a6dbe1a275c5bc226b" + }, "msisensorpro/msi_somatic": { "git_sha": "c8ebd0de36c649a14fc92f2f73cbd9f691a8ce0a" }, diff --git a/modules/nf-core/modules/ascat/main.nf b/modules/nf-core/modules/ascat/main.nf new file mode 100644 index 0000000000..1d2bd96fe1 --- /dev/null +++ b/modules/nf-core/modules/ascat/main.nf @@ -0,0 +1,155 @@ +process ASCAT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::ascat=3.0.0 bioconda::cancerit-allelecount-4.3.0": null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:dfe5aaa885de434adb2b490b68972c5840c6d761-0': + 'quay.io/biocontainers/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:dfe5aaa885de434adb2b490b68972c5840c6d761-0' }" + + input: + tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) + path(allele_files) + path(loci_files) + + output: + tuple val(meta), path("*png"), emit: png + tuple val(meta), path("*cnvs.txt"), emit: cnvs + tuple val(meta), path("*purityploidy.txt"), emit: purityploidy + tuple val(meta), path("*segments.txt"), emit: segments + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def gender = args.gender ? "$args.gender" : "NULL" + def genomeVersion = args.genomeVersion ? "$args.genomeVersion" : "NULL" + def purity = args.purity ? "$args.purity" : "NULL" + def ploidy = args.ploidy ? "$args.ploidy" : "NULL" + def gc_files = args.gc_files ? "$args.gc_files" : "NULL" + + def minCounts_arg = args.minCounts ? ",minCounts = $args.minCounts" : "" + def chrom_names_arg = args.chrom_names ? ",chrom_names = $args.chrom_names" : "" + def min_base_qual_arg = args.min_base_qual ? ",min_base_qual = $args.min_base_qual" : "" + def min_map_qual_arg = args.min_map_qual ? ",min_map_qual = $args.min_map_qual" : "" + def ref_fasta_arg = args.ref_fasta ? ",ref.fasta = '$args.ref_fasta'" : "" + def skip_allele_counting_tumour_arg = args.skip_allele_counting_tumour ? ",skip_allele_counting_tumour = $args.skip_allele_counting_tumour" : "" + def skip_allele_counting_normal_arg = args.skip_allele_counting_normal ? ",skip_allele_counting_normal = $args.skip_allele_counting_normal" : "" + + + + """ + #!/usr/bin/env Rscript + library(RColorBrewer) + library(ASCAT) + options(bitmapType='cairo') + + + #prepare from BAM files + ascat.prepareHTS( + tumourseqfile = "$input_tumor", + normalseqfile = "$input_normal", + tumourname = "Tumour", + normalname = "Normal", + allelecounter_exe = "alleleCounter", + alleles.prefix = "$allele_files", + loci.prefix = "$loci_files", + gender = "$gender", + genomeVersion = "$genomeVersion", + nthreads = $task.cpus + $minCounts_arg + $chrom_names_arg + $min_base_qual_arg + $min_map_qual_arg + $ref_fasta_arg + $skip_allele_counting_tumour_arg + $skip_allele_counting_normal_arg + ) + + + #Load the data + ascat.bc = ascat.loadData( + Tumor_LogR_file = "Tumour_tumourLogR.txt", + Tumor_BAF_file = "Tumour_normalBAF.txt", + Germline_LogR_file = "Tumour_normalLogR.txt", + Germline_BAF_file = "Tumour_normalBAF.txt", + genomeVersion = "$genomeVersion", + gender = "$gender" + ) + + #optional GC wave correction + if(!is.null($gc_files)){ + ascat.bc = ascat.GCcorrect(ascat.bc, $gc_files) + } + + #Plot the raw data + ascat.plotRawData(ascat.bc) + + #Segment the data + ascat.bc = ascat.aspcf(ascat.bc) + + #Plot the segmented data + ascat.plotSegmentedData(ascat.bc) + + #Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, and discrete copy numbers + #If psi and rho are manually set: + if (!is.null($purity) && !is.null($ploidy)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, psi_manual=$ploidy) + } else if(!is.null($purity) && is.null($ploidy)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity) + } else if(!is.null($ploidy) && is.null($purity)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=$ploidy) + } else { + ascat.output <- ascat.runAscat(ascat.bc, gamma=1) + } + + #Write out segmented regions (including regions with one copy of each allele) + write.table(ascat.output[["segments"]], file=paste0("$prefix", ".segments.txt"), sep="\t", quote=F, row.names=F) + + #Write out CNVs in bed format + cnvs=ascat.output[["segments"]][2:6] + write.table(cnvs, file=paste0("$prefix",".cnvs.txt"), sep="\t", quote=F, row.names=F, col.names=T) + + #Write out purity and ploidy info + summary <- tryCatch({ + matrix(c(ascat.output[["aberrantcellfraction"]], ascat.output[["ploidy"]]), ncol=2, byrow=TRUE)}, error = function(err) { + # error handler picks up where error was generated + print(paste("Could not find optimal solution: ",err)) + return(matrix(c(0,0),nrow=1,ncol=2,byrow = TRUE)) + } + ) + colnames(summary) <- c("AberrantCellFraction","Ploidy") + write.table(summary, file=paste0("$prefix",".purityploidy.txt"), sep="\t", quote=F, row.names=F, col.names=T) + + #version export. Have to hardcode process name and software name because + #won't run inside an R-block + version_file_path="versions.yml" + f <- file(version_file_path,"w") + writeLines("ASCAT:", f) + writeLines(" ascat: 3.0.0",f) + close(f) + """ + + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cnvs.txt + touch ${prefix}.purityploidy.txt + touch ${prefix}.segments.txt + touch Tumour.ASCATprofile.png + touch Tumour.ASPCF.png + touch Tumour.germline.png + touch Tumour.rawprofile.png + touch Tumour.sunrise.png + touch Tumour.tumour.png + + echo 'ASCAT:' > versions.yml + echo ' ascat: 3.0.0' >> versions.yml + """ + + +} diff --git a/modules/nf-core/modules/ascat/meta.yml b/modules/nf-core/modules/ascat/meta.yml new file mode 100644 index 0000000000..949afd6af0 --- /dev/null +++ b/modules/nf-core/modules/ascat/meta.yml @@ -0,0 +1,92 @@ +name: ascat +description: copy number profiles of tumour cells. +keywords: + - sort +tools: + - ascat: + description: ASCAT is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy. ASCAT infers tumour purity (the fraction of tumour cells) and ploidy (the amount of DNA per tumour cell), expressed as multiples of haploid genomes from SNP array or massively parallel sequencing data, and calculates whole-genome allele-specific copy number profiles (the number of copies of both parental alleles for all SNP loci across the genome). + homepage: None + documentation: None + tool_dev_url: https://github.com/Crick-CancerGenomics/ascat + doi: "10.1093/bioinformatics/btaa538" + licence: ['GPL v3'] + +input: + - args: + type: map + description: | + Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. Parameters must be set between quotes. parameters can be removed from the map, if they are not set. For default values, please check the documentation above. + + ``` + { + [ + "gender": "XX", + "genomeVersion": "hg19" + "purity": , + "ploidy": , + "gc_files": , + "minCounts": , + "chrom_names": , + "min_base_qual": , + "min_map_qual": , + "ref_fasta": , + "skip_allele_counting_tumour": , + "skip_allele_counting_normal": + ] + } + ``` + + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index_normal: + type: file + description: index for normal_bam + pattern: "*.{bai}" + - input_tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index_tumor: + type: file + description: index for tumor_bam + pattern: "*.{bai}" + - allele_files: + type: file + description: allele files for ASCAT. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + - loci_files: + type: file + description: loci files for ASCAT. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - png: + type: file + description: ASCAT plots + pattern: "*.{png}" + - purityploidy: + type: file + description: purity and ploidy data + pattern: "*.purityploidy.txt" + - segments: + type: file + description: segments data + pattern: "*.segments.txt" +authors: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/modules/controlfreec/main.nf b/modules/nf-core/modules/controlfreec/main.nf new file mode 100644 index 0000000000..21084f641b --- /dev/null +++ b/modules/nf-core/modules/controlfreec/main.nf @@ -0,0 +1,158 @@ +process CONTROLFREEC { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::control-freec=11.6" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/control-freec:11.6--h1b792b2_1': + 'quay.io/biocontainers/control-freec:11.6--h1b792b2_1' }" + + input: + tuple val(meta), path(mpileup_normal), path(mpileup_tumor), path(cpn_normal), path(cpn_tumor), path(minipileup_normal), path(minipileup_tumor) + path fasta + path fai + path snp_position + path known_snps + path known_snps_tbi + path chr_directory + path mappability + path target_bed + path gccontent_profile + + output: + tuple val(meta), path("*_ratio.BedGraph") , emit: bedgraph, optional: true + tuple val(meta), path("*_control.cpn") , emit: control_cpn + tuple val(meta), path("*_sample.cpn") , emit: sample_cpn + tuple val(meta), path("GC_profile.*.cpn") , emit: gcprofile_cpn, optional:true + tuple val(meta), path("*_BAF.txt") , emit: BAF + tuple val(meta), path("*_CNVs") , emit: CNV + tuple val(meta), path("*_info.txt") , emit: info + tuple val(meta), path("*_ratio.txt") , emit: ratio + tuple val(meta), path("config.txt") , emit: config + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + //"General" configurations + def bedgraphoutput = task.ext.args?["general"]?["bedgraphoutput"] ? "BedGraphOutput = ${task.ext.args["general"]["bedgraphoutput"]}" : "" + def chr_files = chr_directory ? "chrFiles =\${PWD}/${chr_directory}" : "" + def chr_length = fai ? "chrLenFile = \${PWD}/${fai}" : "" + def breakpointthreshold = task.ext.args?["general"]?["breakpointthreshold"] ? "breakPointThreshold = ${task.ext.args["general"]["breakpointthreshold"]}" : "" + def breakpointtype = task.ext.args?["general"]?["breakpointtype"] ? "breakPointType = ${task.ext.args["general"]["breakpointtype"]}" : "" + def coefficientofvariation = task.ext.args?["general"]?["coefficient"] ? "coefficientOfVariation = ${task.ext.args["general"]["coefficientofvariation"]}" : "" + def contamination = task.ext.args?["general"]?["contamination"] ? "contamination = ${task.ext.args["general"]["contamination"]}" : "" + def contaminationadjustment = task.ext.args?["general"]?["contaminationadjustment"] ? "contaminationAdjustment = ${task.ext.args["general"]["contaminationadjustment"]}" : "" + def degree = task.ext.args?["general"]?["degree"] ? "degree = ${task.ext.args["general"]["degree"]}" : "" + def forcegccontentnormalization = task.ext.args?["general"]?["forcegccontentnormalization"] ? "forceGCcontentNormalization = ${task.ext.args["general"]["forcegccontentnormalization"]}" : "" + def gccontentprofile = gccontent_profile ? "GCcontentProfile = ${gccontent_profile}" : "" + def mappability = mappability ? "gemMappabilityFile = \${PWD}/${mappability}" : "" + def intercept = task.ext.args?["general"]?["intercept"] ? "intercept = ${task.ext.args["general"]["intercept"]}" : "" + def mincnalength = task.ext.args?["general"]?["mincnalength"] ? "minCNAlength = ${task.ext.args["general"]["mincnalength"]}" : "" + def minmappabilityperwindow = task.ext.args?["general"]?["minmappabilityperwindow"] ? "minMappabilityPerWindow = ${task.ext.args["general"]["minmappabilityperwindow"]}" : "" + def minexpectedgc = task.ext.args?["general"]?["minexpectedgc"] ? "minExpectedGC = ${task.ext.args["general"]["minexpectedgc"]}" : "" + def maxexpectedgc = task.ext.args?["general"]?["maxexpectedgc"] ? "maxExpectedGC = ${task.ext.args["general"]["maxexpectedgc"]}" : "" + def minimalsubclonepresence = task.ext.args?["general"]?["minimalsubclonepresence"] ? "minimalSubclonePresence = ${task.ext.args["general"]["minimalsubclonepresence"]}" : "" + def noisydata = task.ext.args?["general"]?["noisydata"] ? "noisyData = ${task.ext.args["general"]["noisydata"]}" : "" + def output = task.ext.prefix ? "outputDir = \${PWD}/${task.ext.prefix}" : "" + def ploidy = task.ext.args?["general"]?["ploidy"] ? "ploidy = ${task.ext.args["general"]["ploidy"]}" : "" + def printNA = task.ext.args?["general"]?["printNA"] ? "printNA = ${task.ext.args["general"]["printNA"]}" : "" + def readcountthreshold = task.ext.args?["general"]?["readcountthreshold"] ? "readCountThreshold = ${task.ext.args["general"]["readcountthreshold"]}" : "" + def sex = task.ext.args?["general"]?["sex"] ? "sex = ${task.ext.args["general"]["sex"]}" : "" + def step = task.ext.args?["general"]?["step"] ? "step = ${task.ext.args["general"]["step"]}" : "" + def telocentromeric = task.ext.args?["general"]?["telocentromeric"] ? "telocentromeric = ${task.ext.args["general"]["telocentromeric"]} " : "" + def uniquematch = task.ext.args?["general"]?["uniquematch"] ? "uniqueMatch = ${task.ext.args["general"]["uniquematch"]}" : "" + def window = task.ext.args?["general"]?["window"] ? "window = ${task.ext.args["general"]["window"]}" : "" + + //"Control" configurations + def matefile_normal = mpileup_normal ? "mateFile = \${PWD}/${mpileup_normal}" : "" + def matecopynumberfile_normal = cpn_normal ? "mateCopyNumberFile = \${PWD}/${cpn_normal}" : "" + def minipileup_normal = minipileup_normal ? "miniPileup = \${PWD}/${minipileup_normal}" : "" + def inputformat_normal = task.ext.args?["control"]?["inputformat"] ? "inputFormat = ${task.ext.args["control"]["inputformat"]}" : "" + def mateorientation_normal = task.ext.args?["control"]?["mateorientation"] ? "mateOrientation = ${task.ext.args["control"]["mateorientation"]}" : "" + + //"Sample" configuration + def matefile_tumor = mpileup_tumor ? "mateFile = \${PWD}/${mpileup_tumor}" : "" + def matecopynumberfile_tumor = cpn_tumor ? "mateCopyNumberFile = \${PWD}/${cpn_tumor}" : "" + def minipileup_tumor = minipileup_tumor ? "miniPileup = \${PWD}/${minipileup_tumor}" : "" + def inputformat_tumor = task.ext.args?["sample"]?["inputformat"] ? "inputFormat = ${task.ext.args["sample"]["inputformat"]}" : "" + def mateorientation_tumor = task.ext.args?["sample"]?["mateorientation"] ? "mateOrientation = ${task.ext.args["sample"]["mateorientation"]}" : "" + + //"BAF" configuration + def makepileup = snp_position ? "makePileup = \${PWD}/${snp_position}" : "" + def fastafile = fasta ? "fastaFile = \${PWD}/${fasta}" : "" + def minimalcoverageperposition = task.ext.args?["BAF"]?["minimalcoverageperposition"] ? "minimalCoveragePerPosition = ${task.ext.args["BAF"]["minimalcoverageperposition"]}" : "" + def minimalqualityperposition = task.ext.args?["BAF"]?["minimalqualityperposition"] ? "minimalQualityPerPosition = ${task.ext.args["BAF"]["minimalqualityperposition"]}" : "" + def shiftinquality = task.ext.args?["BAF"]?["shiftinquality"] ? "shiftInQuality = ${task.ext.args["BAF"]["shiftinquality"]}" : "" + def snpfile = known_snps ? "SNPfile = \$PWD/${known_snps}" : "" + + //"Target" configuration + def target_bed = target_bed ? "captureRegions = ${target_bed}" : "" + """ + touch config.txt + + echo "[general]" >> config.txt + echo ${bedgraphoutput} >> config.txt + echo ${breakpointthreshold} >> config.txt + echo ${breakpointtype} >> config.txt + echo ${chr_files} >> config.txt + echo ${chr_length} >> config.txt + echo ${coefficientofvariation} >> config.txt + echo ${contamination} >> config.txt + echo ${contaminationadjustment} >> config.txt + echo ${degree} >> config.txt + echo ${forcegccontentnormalization} >> config.txt + echo ${gccontentprofile} >> config.txt + echo ${mappability} >> config.txt + echo ${intercept} >> config.txt + echo ${mincnalength} >> config.txt + echo ${minmappabilityperwindow} >> config.txt + echo ${minexpectedgc} >> config.txt + echo ${maxexpectedgc} >> config.txt + echo ${minimalsubclonepresence} >> config.txt + echo "maxThreads = ${task.cpus}" >> config.txt + echo ${noisydata} >> config.txt + echo ${output} >> config.txt + echo ${ploidy} >> config.txt + echo ${printNA} >> config.txt + echo ${readcountthreshold} >> config.txt + echo ${sex} >> config.txt + echo ${step} >> config.txt + echo ${telocentromeric} >> config.txt + echo ${uniquematch} >> config.txt + echo ${window} >> config.txt + + echo "[control]" >> config.txt + echo ${matefile_normal} >> config.txt + echo ${matecopynumberfile_normal} >> config.txt + echo ${minipileup_normal} >> config.txt + echo ${inputformat_normal} >> config.txt + echo ${mateorientation_normal} >> config.txt + + echo "[sample]" >> config.txt + echo ${matefile_tumor} >> config.txt + echo ${matecopynumberfile_tumor} >> config.txt + echo ${minipileup_tumor} >> config.txt + echo ${inputformat_tumor} >> config.txt + echo ${mateorientation_tumor} >> config.txt + + echo "[BAF]" >> config.txt + echo ${makepileup} >> config.txt + echo ${fastafile} >> config.txt + echo ${minimalcoverageperposition} >> config.txt + echo ${minimalqualityperposition} >> config.txt + echo ${shiftinquality} >> config.txt + echo ${snpfile} >> config.txt + + echo "[target]" >> config.txt + echo ${target_bed} >> config.txt + + freec -conf config.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/controlfreec/meta.yml b/modules/nf-core/modules/controlfreec/meta.yml new file mode 100644 index 0000000000..4d1e8674bd --- /dev/null +++ b/modules/nf-core/modules/controlfreec/meta.yml @@ -0,0 +1,183 @@ +name: controlfreec +description: Copy number and genotype annotation from whole genome and whole exome sequencing data +keywords: + - cna + - cnv + - somatic + - single + - tumor-only +tools: + - controlfreec: + description: Copy number and genotype annotation from whole genome and whole exome sequencing data. + homepage: http://boevalab.inf.ethz.ch/FREEC + documentation: http://boevalab.inf.ethz.ch/FREEC/tutorial.html + tool_dev_url: https://github.com/BoevaLab/FREEC/ + doi: "10.1093/bioinformatics/btq635" + licence: ['GPL >=2'] + +input: + - args: + type: map + description: | + Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. + parameters can be removed from the map, if they are not set. All value must be surrounded by quotes, meta map parameters can be set with, i.e. sex = meta.sex: + For default values, please check the documentation above. + + ``` + { + [ + "general" :[ + "bedgraphoutput": , + "breakpointthreshold": , + "breakpointtype": , + "coefficientofvariation": , + "contamination": , + "contaminationadjustment": , + "degree": , + "forcegccontentnormalization": , + "gccontentprofile": , + "intercept": , + "mincnalength": , + "minmappabilityperwindow": , + "minexpectedgc": , + "maxexpectedgc": , + "minimalsubclonepresence": , + "noisydata": , + "ploidy": , + "printNA": , + "readcountthreshold": , + "sex": , + "step": , + "telocentromeric": , + "uniquematch": , + "window": + ], + "control":[ + "inputformat": , + "mateorientation": , + ], + "sample":[ + "inputformat": , + "mateorientation": , + ], + "BAF":[ + "minimalcoverageperposition": , + "minimalqualityperposition": , + "shiftinquality": + ] + ] + } + ``` + + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - mateFile_normal: + type: file + description: File with mapped reads + pattern: "*.{sam,bam,pileup(.gz),bowtie(.gz),eland(.gz),arachne(.gz),psl(.gz),bed(.gz)}" + - mateFile_tumor: + type: file + description: File with mapped reads + pattern: "*.{sam,bam,pileup(.gz),bowtie(.gz),eland(.gz),arachne(.gz),psl(.gz),bed(.gz)}" + - cpn_normal: + type: file + description: Raw copy number profiles (optional) + pattern: "*.cpn" + - cpn_tumor: + type: file + description: Raw copy number profiles (optional) + pattern: "*.cpn" + - minipileup_normal: + type: file + description: miniPileup file from previous run (optional) + pattern: "*.pileup" + - minipileup_tumor: + type: file + description: miniPileup file from previous run (optional) + pattern: "*.pileup" + - fasta: + type: file + description: Reference file (optional; required if args 'makePileup' is set) + pattern: "*.{fasta,fna,fa}" + - fai: + type: file + description: Fasta index + pattern: "*.fai" + - snp_position: + type: file + description: + pattern: "*.{}" + - known_snps: + type: file + description: File with known SNPs + pattern: "*.{vcf,vcf.gz}" + - known_snps_tbi: + type: file + description: Index of known_snps + pattern: "*.tbi" + - chr_directory: + type: file + description: Path to directory with chromosome fasta files (optional, required if gccontentprofile is not provided) + pattern: "*/" + - mappability: + type: file + description: Contains information of mappable positions (optional) + pattern: "*.gem" + - target_bed: + type: file + description: Sorted bed file containing capture regions (optional) + pattern: "*.bed" + + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bedgraph: + type: file + description: Bedgraph format for the UCSC genome browser + pattern: ".bedgraph" + - control_cpn: + type: file + description: files with raw copy number profiles + pattern: "*_control.cpn" + - sample_cpn: + type: file + description: files with raw copy number profiles + pattern: "*_sample.cpn" + - gcprofile_cpn: + type: file + description: file with GC-content profile. + pattern: "GC_profile.*.cpn" + - BAF: + type: file + description: file B-allele frequencies for each possibly heterozygous SNP position + pattern: "*_BAF.txt" + - CNV: + type: file + description: file with coordinates of predicted copy number alterations. + pattern: "*_CNVs" + - info: + type: file + description: parsable file with information about FREEC run + pattern: "*_info.txt" + - ratio: + type: file + description: file with ratios and predicted copy number alterations for each window + pattern: "*_ratio.txt" + - config: + type: file + description: Config file used to run Control-FREEC + pattern: "config.txt" + +authors: + - "@FriederikeHanssen" diff --git a/modules/local/manta/germline/main.nf b/modules/nf-core/modules/manta/germline/main.nf similarity index 99% rename from modules/local/manta/germline/main.nf rename to modules/nf-core/modules/manta/germline/main.nf index 1fa6aa9614..ef6bd4a32e 100644 --- a/modules/local/manta/germline/main.nf +++ b/modules/nf-core/modules/manta/germline/main.nf @@ -12,6 +12,7 @@ process MANTA_GERMLINE { path fasta path fasta_fai + output: tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf tuple val(meta), path("*candidate_small_indels.vcf.gz.tbi"), emit: candidate_small_indels_vcf_tbi diff --git a/modules/local/manta/germline/meta.yml b/modules/nf-core/modules/manta/germline/meta.yml similarity index 100% rename from modules/local/manta/germline/meta.yml rename to modules/nf-core/modules/manta/germline/meta.yml index d6297eadb6..b719f0756c 100644 --- a/modules/local/manta/germline/meta.yml +++ b/modules/nf-core/modules/manta/germline/meta.yml @@ -31,14 +31,6 @@ input: type: file description: BAM/CRAM/SAM index file. For joint calling use a list of files. pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fasta_fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - target_bed: type: file description: BED file containing target regions for variant calling @@ -47,6 +39,14 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + - fasta_fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" output: - meta: diff --git a/modules/local/manta/somatic/main.nf b/modules/nf-core/modules/manta/somatic/main.nf similarity index 100% rename from modules/local/manta/somatic/main.nf rename to modules/nf-core/modules/manta/somatic/main.nf diff --git a/modules/local/manta/somatic/meta.yml b/modules/nf-core/modules/manta/somatic/meta.yml similarity index 100% rename from modules/local/manta/somatic/meta.yml rename to modules/nf-core/modules/manta/somatic/meta.yml index ec9cc86957..457d66a5fd 100644 --- a/modules/local/manta/somatic/meta.yml +++ b/modules/nf-core/modules/manta/somatic/meta.yml @@ -39,14 +39,6 @@ input: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - target_bed: type: file description: BED file containing target regions for variant calling @@ -55,6 +47,14 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" output: - meta: diff --git a/modules/local/manta/tumoronly/main.nf b/modules/nf-core/modules/manta/tumoronly/main.nf similarity index 100% rename from modules/local/manta/tumoronly/main.nf rename to modules/nf-core/modules/manta/tumoronly/main.nf diff --git a/modules/local/manta/tumoronly/meta.yml b/modules/nf-core/modules/manta/tumoronly/meta.yml similarity index 100% rename from modules/local/manta/tumoronly/meta.yml rename to modules/nf-core/modules/manta/tumoronly/meta.yml index f902bc77a3..398d684365 100644 --- a/modules/local/manta/tumoronly/meta.yml +++ b/modules/nf-core/modules/manta/tumoronly/meta.yml @@ -31,14 +31,6 @@ input: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - target_bed: type: file description: BED file containing target regions for variant calling @@ -47,6 +39,14 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" output: - meta: diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 4cfee46c8d..9738880697 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -2,37 +2,12 @@ // GERMLINE VARIANT CALLING // -include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_FREEBAYES } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' -include { DEEPVARIANT } from '../../modules/nf-core/modules/deepvariant/main' -include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' -include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../modules/nf-core/modules/gatk4/genotypegvcfs/main' -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' -include { MANTA_GERMLINE } from '../../modules/local/manta/germline/main' -include { STRELKA_GERMLINE } from '../../modules/nf-core/modules/strelka/germline/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TIDDIT_SV } from '../../modules/nf-core/modules/tiddit/sv/main' +include { RUN_DEEPVARIANT } from '../nf-core/variantcalling/deepvariant/main.nf' +include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf' +include { RUN_HAPLOTYPECALLER } from '../nf-core/variantcalling/haplotypecaller/main.nf' +include { RUN_MANTA_GERMLINE } from '../nf-core/variantcalling/manta/germline/main.nf' +include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf' +//include { TIDDIT } from './variantcalling/tiddit.nf' workflow GERMLINE_VARIANT_CALLING { take: @@ -45,27 +20,37 @@ workflow GERMLINE_VARIANT_CALLING { intervals // channel: [mandatory] intervals/target regions intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed intervals_bed_combine_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed in one file - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file + intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped in one file num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS // joint_germline // val: true/false on whether to run joint_germline calling, only works in combination with haplotypecaller at the moment main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config + deepvariant_vcf = Channel.empty() + freebayes_vcf = Channel.empty() + haplotypecaller_gvcf = Channel.empty() + genotype_gvcf = Channel.empty() + manta_vcf = Channel.empty() + strelka_vcf = Channel.empty() // Remap channel with intervals cram_recalibrated_intervals = cram_recalibrated.combine(intervals) .map{ meta, cram, crai, intervals -> sample = meta.sample + //new_intervals = num_intervals > 1 ? intervals : [] new_intervals = intervals.baseName != "no_intervals" ? intervals : [] id = new_intervals ? sample + "_" + new_intervals.baseName : sample [[ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ], cram, crai, new_intervals] } - // Remap channel with gziped intervals + indexes + // Remap channel with gzipped intervals + indexes cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi) .map{ meta, cram, crai, bed, tbi -> sample = meta.sample + //new_bed = num_intervals > 1 ? bed : [] //TODO can I pass in empty lists? Then I only need to work with the id line new_bed = bed.simpleName != "no_intervals" ? bed : [] new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] id = new_bed ? sample + "_" + new_bed.simpleName : sample @@ -74,352 +59,69 @@ workflow GERMLINE_VARIANT_CALLING { } // DEEPVARIANT + if(params.tools.contains('deepvariant')){ + RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) - //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine - // Deepvariant: https://github.com/google/deepvariant/issues/510 - - DEEPVARIANT( - cram_recalibrated_intervals, - fasta, - fasta_fai) - - // Only when no intervals - TABIX_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) - TABIX_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) - - // Only when using intervals - BGZIP_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) - BGZIP_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) - - CONCAT_DEEPVARIANT_VCF( - BGZIP_VC_DEEPVARIANT_VCF.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - CONCAT_DEEPVARIANT_GVCF( - BGZIP_VC_DEEPVARIANT_GVCF.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - deepvariant_vcf = channel.empty().mix( - CONCAT_DEEPVARIANT_GVCF.out.vcf, - CONCAT_DEEPVARIANT_VCF.out.vcf, - DEEPVARIANT.out.gvcf.join(TABIX_VC_DEEPVARIANT_GVCF.out.tbi), - DEEPVARIANT.out.vcf.join(TABIX_VC_DEEPVARIANT_VCF.out.tbi)) + deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf + ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) + } // FREEBAYES - - // Remap channel for Freebayes - cram_recalibrated_intervals_freebayes = cram_recalibrated.combine(intervals) - .map{ meta, cram, crai, intervals -> - sample = meta.sample - new_intervals = intervals.baseName != "no_intervals" ? intervals : [] - id = new_intervals ? sample + "_" + new_intervals.baseName : sample - new_meta = [ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ] - [new_meta, cram, crai, [], [], new_intervals] - } - - FREEBAYES( - cram_recalibrated_intervals_freebayes, - fasta, - fasta_fai, - [], [], []) - - // Only when no intervals - TABIX_VC_FREEBAYES(FREEBAYES.out.vcf) - - // Only when using intervals - BGZIP_VC_FREEBAYES(FREEBAYES.out.vcf) - - CONCAT_FREEBAYES( - BGZIP_VC_FREEBAYES.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - freebayes_vcf = Channel.empty().mix( - CONCAT_FREEBAYES.out.vcf, - FREEBAYES.out.vcf.join(TABIX_VC_FREEBAYES.out.tbi)) + if (params.tools.contains('freebayes')){ + // Remap channel for Freebayes + cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals + .map{ meta, cram, crai, intervals -> + [meta, cram, crai, [], [], intervals] + } + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) + + freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf + ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) + } // HAPLOTYPECALLER - - HAPLOTYPECALLER( - cram_recalibrated_intervals, - fasta, - fasta_fai, - dict, - dbsnp, - dbsnp_tbi) - - // Only when no intervals - TABIX_VC_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) - - // Only when using intervals - BGZIP_VC_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) - - CONCAT_HAPLOTYPECALLER( - BGZIP_VC_HAPLOTYPECALLER.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - HAPLOTYPECALLER.out.vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{haplotypecaller_gvcf_intervals} - - HAPLOTYPECALLER.out.tbi.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{haplotypecaller_gvcf_tbi_intervals} - - haplotypecaller_gvcf = Channel.empty().mix( - CONCAT_HAPLOTYPECALLER.out.vcf, - haplotypecaller_gvcf_intervals.no_intervals) - - haplotypecaller_gvcf_tbi = Channel.empty().mix( - CONCAT_HAPLOTYPECALLER.out.tbi, - haplotypecaller_gvcf_tbi_intervals.no_intervals) - - genotype_gvcf_to_call = haplotypecaller_gvcf.join(haplotypecaller_gvcf_tbi) - .combine(intervals_bed_combine_gz_tbi) - .map{ - meta, gvcf, gvf_tbi, intervals, intervals_tbi -> - new_intervals = intervals.simpleName != "no_intervals" ? intervals : [] - new_intervals_tbi = intervals_tbi.simpleName != "no_intervals" ? intervals_tbi : [] - [meta, gvcf, gvf_tbi, new_intervals, new_intervals_tbi] - } - - // GENOTYPEGVCFS - - GENOTYPEGVCFS( - genotype_gvcf_to_call, - fasta, - fasta_fai, - dict, - dbsnp, - dbsnp_tbi) - - genotype_gvcf = GENOTYPEGVCFS.out.vcf - - // if (joint_germline) { - // run_haplotypecaller = false - // run_vqsr = true //parameter? - // some feedback from gavin - // GATK_JOINT_GERMLINE_VARIANT_CALLING( - // haplotypecaller_vcf_gz_tbi, - // run_haplotypecaller, - // run_vqsr, - // fasta, - // fasta_fai, - // dict, - // dbsnp, - // dbsnp_tbi, - // "joined", - // allelespecific? - // resources? - // annotation? - // "BOTH", - // true, - // truthsensitivity -> parameter or module? - // ) - // ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) - // } + if (params.tools.contains('haplotypecaller')){ + RUN_HAPLOTYPECALLER(cram_recalibrated_intervals, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + intervals_bed_combine_gz, + intervals_bed_combine_gz_tbi, + num_intervals) + + haplotypecaller_gvcf = RUN_HAPLOTYPECALLER.out.haplotypecaller_gvcf + genotype_gvcf = RUN_HAPLOTYPECALLER.out.genotype_gvcf + ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) + } // MANTA - // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. - // Seems to be the consensus on upstream modules implementation too - - MANTA_GERMLINE( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai) - - // Figure out if using intervals or no_intervals - MANTA_GERMLINE.out.candidate_small_indels_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{manta_small_indels_vcf} - - MANTA_GERMLINE.out.candidate_sv_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{manta_sv_vcf} - - MANTA_GERMLINE.out.diploid_sv_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{manta_diploid_sv_vcf} - - // Only when using intervals - BGZIP_VC_MANTA_DIPLOID(MANTA_GERMLINE.out.diploid_sv_vcf) - - CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_GERMLINE.out.candidate_small_indels_vcf) - - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - BGZIP_VC_MANTA_SV(MANTA_GERMLINE.out.candidate_sv_vcf) + if (params.tools.contains('manta')){ + RUN_MANTA_GERMLINE (cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - manta_vcf = Channel.empty().mix( - CONCAT_MANTA_DIPLOID.out.vcf, - CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_SV.out.vcf, - manta_diploid_sv_vcf.no_intervals, - manta_small_indels_vcf.no_intervals, - manta_sv_vcf.no_intervals) + manta_vcf = RUN_MANTA_GERMLINE.out.manta_vcf + ch_versions = ch_versions.mix(RUN_MANTA_GERMLINE.out.versions) + } // STRELKA - // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. - // Seems to be the consensus on upstream modules implementation too - - STRELKA_GERMLINE( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai) - - // Figure out if using intervals or no_intervals - STRELKA_GERMLINE.out.vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{strelka_vcf} - - STRELKA_GERMLINE.out.genome_vcf.groupTuple(size: num_intervals) - .branch{ - intervals: it[1].size() > 1 - no_intervals: it[1].size() == 1 - }.set{strelka_genome_vcf} - - // Only when using intervals - BGZIP_VC_STRELKA(STRELKA_GERMLINE.out.vcf) - - CONCAT_STRELKA( - BGZIP_VC_STRELKA.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - BGZIP_VC_STRELKA_GENOME(STRELKA_GERMLINE.out.genome_vcf) - - CONCAT_STRELKA_GENOME( - BGZIP_VC_STRELKA_GENOME.out.vcf - .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), - fasta_fai, - intervals_bed_combine_gz) - - strelka_vcf = Channel.empty().mix( - CONCAT_STRELKA.out.vcf, - CONCAT_STRELKA_GENOME.out.vcf, - strelka_genome_vcf.no_intervals, - strelka_vcf.no_intervals) - - // if (tools.contains('tiddit')) { - // TODO: Update tiddit on bioconda, the current version does not support cram usage, needs newest version: - // https://github.com/SciLifeLab/TIDDIT/issues/82#issuecomment-1022103264 - // Issue opened, either this week or end of february - - // TIDDIT_SV( - // cram_recalibrated, - // fasta, - // fasta_fai - // ) - - // TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) - // tiddit_vcf_gz_tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi - // tiddit_ploidy = TIDDIT_SV.out.ploidy - // tiddit_signals = TIDDIT_SV.out.signals - // tiddit_wig = TIDDIT_SV.out.wig - // tiddit_gc_wig = TIDDIT_SV.out.gc_wig - - // ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) - // ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) - // } - - ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_VCF.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_VCF.out.versions) - ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(CONCAT_HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) - ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) - ch_versions = ch_versions.mix(FREEBAYES.out.versions) - ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) - ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) - ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_VCF.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(TABIX_VC_HAPLOTYPECALLER.out.versions) + if (params.tools.contains('strelka')){ + RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) + + strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf + ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) + } + + //TIDDIT + //TODO emit: deepvariant_vcf diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 5c6ef808ee..e3ddcc1b0d 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -1,65 +1,43 @@ // // PAIRED VARIANT CALLING // -include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../modules/local/concat_vcf/main' include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main' -include { MANTA_SOMATIC } from '../../modules/local/manta/somatic/main' include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main' -include { STRELKA_SOMATIC } from '../../modules/nf-core/modules/strelka/somatic/main' +include { RUN_MANTA_SOMATIC } from '../nf-core/variantcalling/manta/somatic/main.nf' +include { RUN_STRELKA_SOMATIC } from '../nf-core/variantcalling/strelka/somatic/main.nf' workflow PAIR_VARIANT_CALLING { take: tools - cram_pair // channel: [mandatory] cram - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combined_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file - num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS + cram_pair // channel: [mandatory] cram + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + intervals // channel: [mandatory] intervals/target regions + intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combined_gz_tbi // channel: [mandatory] intervals/target regions all in one file zipped and indexed + intervals_bed_combine_gz // channel: [mandatory] intervals/target regions zipped in one file + intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS no_intervals - msisensorpro_scan // channel: [optional] msisensorpro_scan - germline_resource // channel: [optional] germline_resource - germline_resource_tbi // channel: [optional] germline_resource_tbi - panel_of_normals // channel: [optional] panel_of_normals - panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi + msisensorpro_scan // channel: [optional] msisensorpro_scan + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource_tbi + panel_of_normals // channel: [optional] panel_of_normals + panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi main: - if (!tools) tools = "" - ch_versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config manta_vcf = Channel.empty() strelka_vcf = Channel.empty() msisensorpro_output = Channel.empty() mutect2_vcf = Channel.empty() - - cram_pair_intervals = cram_pair.combine(intervals) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - new_intervals = intervals.baseName != "no_intervals" ? intervals : [] - id = new_intervals ? tumor_id + "_vs_" + normal_id + "_" + new_intervals.baseName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals] - } - cram_pair_intervals_gz_tbi = cram_pair.combine(intervals_bed_gz_tbi) .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> normal_id = meta.normal_id @@ -72,150 +50,56 @@ workflow PAIR_VARIANT_CALLING { [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, new_bed, new_tbi] } - if (tools.contains('manta')) { - MANTA_SOMATIC( - cram_pair_intervals_gz_tbi, - fasta, - fasta_fai) - - ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) - - if (no_intervals) { - manta_candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf - manta_diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf - manta_somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf - } else { - BGZIP_VC_MANTA_SV(MANTA_SOMATIC.out.candidate_small_indels_vcf) - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_SOMATIC.out.candidate_sv_vcf) - BGZIP_VC_MANTA_DIPLOID(MANTA_SOMATIC.out.diploid_sv_vcf) - BGZIP_VC_MANTA_SOMATIC(MANTA_SOMATIC.out.somatic_sv_vcf) - - manta_sv_vcf_to_concat = BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - manta_small_indels_vcf_to_concat = BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - manta_diploid_vcf_to_concat = BGZIP_VC_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - manta_somatic_sv_vcf_to_concat = BGZIP_VC_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_DIPLOID(manta_diploid_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SOMATIC(manta_somatic_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf - manta_diploid_sv_vcf = CONCAT_MANTA_DIPLOID.out.vcf - manta_somatic_sv_vcf = CONCAT_MANTA_SOMATIC.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) - - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) - + cram_pair_intervals = cram_pair.combine(intervals) + .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> + normal_id = meta.normal_id + tumor_id = meta.tumor_id + new_intervals = intervals.baseName != "no_intervals" ? intervals : [] + id = new_intervals ? tumor_id + "_vs_" + normal_id + "_" + new_intervals.baseName : tumor_id + "_vs_" + normal_id + new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ] + [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals] } - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf,manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) - } - - cram_pair_strelka = Channel.empty() - if (tools.contains('strelka') && tools.contains('manta')) { - cram_pair_strelka = cram_pair.join(manta_somatic_sv_vcf).combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, new_bed, new_tbi] - } - } else if (tools.contains('strelka') && !tools.contains('manta')) { - cram_pair_strelka = cram_pair.combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], new_bed, new_tbi] - } + if (tools.contains('manta')) { + RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) + + manta_vcf = RUN_MANTA_SOMATIC.out.manta_vcf + ch_versions = ch_versions.mix(RUN_MANTA_SOMATIC.out.versions) } if (tools.contains('strelka')) { - STRELKA_SOMATIC( - cram_pair_strelka, - fasta, - fasta_fai - ) - - if (no_intervals) { - strelka_snvs_vcf_gz = STRELKA_SOMATIC.out.vcf_snvs - strelka_indels_vcf_gz = STRELKA_SOMATIC.out.vcf_indels + if (tools.contains('manta')) { + cram_pair_strelka = intervals_bed_gz_tbi.join(manta_somatic_sv_vcf).map{ + meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi, manta_vcf, manta_tbi -> + [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi] + } } else { - BGZIP_VC_STRELKA_SNVS(STRELKA_SOMATIC.out.vcf_snvs) - BGZIP_VC_STRELKA_INDELS(STRELKA_SOMATIC.out.vcf_indels) - - strelka_snvs_vcf_to_concat = BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - strelka_indels_vcf_to_concat = BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - - CONCAT_STRELKA_SNVS(strelka_snvs_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_STRELKA_INDELS(strelka_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - - strelka_snvs_vcf_gz = CONCAT_STRELKA_SNVS.out.vcf - strelka_indels_vcf_gz = CONCAT_STRELKA_INDELS.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) + cram_pair_strelka = cram_pair_intervals_gz_tbi.map{ + meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> + [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], bed, tbi] + } } - strelka_vcf = strelka_vcf.mix(strelka_snvs_vcf_gz,strelka_indels_vcf_gz) + RUN_STRELKA_SOMATIC(cram_pair_strelka, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) + + strelka_vcf = RUN_STRELKA_SOMATIC.out.strelka_vcf + ch_versions = ch_versions.mix(RUN_STRELKA_SOMATIC.out.versions) } if (tools.contains('msisensorpro')) { - MSISENSORPRO_MSI_SOMATIC( - cram_pair_intervals, - fasta, - msisensorpro_scan) + cram_pair_msisensor = cram_pair.combine(intervals_bed_combined) + MSISENSORPRO_MSI_SOMATIC(cram_pair_msisensor, fasta, msisensorpro_scan) ch_versions = ch_versions.mix(MSISENSORPRO_MSI_SOMATIC.out.versions) - msisensorpro_output = msisensorpro_output.mix(MSISENSORPRO_MSI_SOMATIC.out.output_report) } @@ -233,10 +117,11 @@ workflow PAIR_VARIANT_CALLING { germline_resource_tbi, panel_of_normals, panel_of_normals_tbi, - no_intervals, - num_intervals, - intervals_bed_combine_gz + intervals_bed_combine_gz, + num_intervals ) + + mutect2_vcf = GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.mutect2_vcf ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions) } diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 2af55711fe..1158b14ca5 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -20,10 +20,10 @@ workflow PREPARE_INTERVALS { ch_versions = Channel.empty() + // TODO maybe instead [] ch_intervals = Channel.empty() ch_intervals_bed_gz_tbi = Channel.empty() ch_intervals_combined_bed_gz_tbi = Channel.empty() // Create bed.gz and bed.gz.tbi for input/or created interval file. Contains ALL regions. - tabix_in_combined = Channel.empty() if (params.no_intervals) { @@ -77,10 +77,10 @@ workflow PREPARE_INTERVALS { ch_versions = ch_versions.mix(TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.versions) } + emit: intervals_bed = ch_intervals // path: intervals.bed [intervals split for parallel execution] intervals_bed_gz_tbi = ch_intervals_bed_gz_tbi // path: target.bed.gz, target.bed.gz.tbi [intervals split for parallel execution] intervals_combined_bed_gz_tbi = ch_intervals_combined_bed_gz_tbi // path: interval.bed.gz, interval.bed.gz.tbi [all intervals in one file] - versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/split_fastq.nf b/subworkflows/local/split_fastq.nf index 765ebed99c..14ea551d6e 100644 --- a/subworkflows/local/split_fastq.nf +++ b/subworkflows/local/split_fastq.nf @@ -15,8 +15,15 @@ workflow SPLIT_FASTQ { ch_versions = Channel.empty() reads_no_split = reads_input.map{ meta, reads -> - meta.size = 1 - [meta, reads] + [[ id:meta.id, + patient:meta.patient, + sample:meta.sample, + gender:meta.gender, + status:meta.status, + numLanes:meta.numLanes, + read_group: meta.read_group, + data_type:meta.data_type, + size:1], reads] } // Only if we want to split fastq files diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 36ee463392..59ee2d5cea 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -3,51 +3,36 @@ // Should be only run on patients without normal sample // - -include { BGZIP as BGZIP_VC_FREEBAYES } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_SV } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' +include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf' include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' -include { MANTA_TUMORONLY } from '../../modules/local/manta/tumoronly/main' -include { STRELKA_GERMLINE as STRELKA_TUMORONLY } from '../../modules/nf-core/modules/strelka/germline/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../modules/nf-core/modules/tabix/tabix/main' +include { RUN_MANTA_TUMORONLY } from '../nf-core/variantcalling/manta/tumoronly/main.nf' +include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: - tools // Mandatory, list of tools to apply - cram_recalibrated // channel: [mandatory] cram - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file - num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS + tools // Mandatory, list of tools to apply + cram_recalibrated // channel: [mandatory] cram + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + intervals // channel: [mandatory] intervals/target regions + intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combine_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file + num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS no_intervals - germline_resource - germline_resource_tbi // channel - panel_of_normals - panel_of_normals_tbi - + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource_tbi + panel_of_normals // channel: [optional] panel_of_normals + panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi main: - if(!tools) tools = "" - ch_versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config freebayes_vcf = Channel.empty() manta_vcf = Channel.empty() mutect2_vcf = Channel.empty() @@ -73,179 +58,60 @@ workflow TUMOR_ONLY_VARIANT_CALLING { if (tools.contains('freebayes')){ + // Remap channel for Freebayes + cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals + .map{ meta, cram, crai, intervals -> + [meta, cram, crai, [], [], intervals] + } - cram_recalibrated.combine(intervals).map{ meta, cram, crai, intervals -> - new_meta = meta.clone() - new_meta.id = meta.sample + "_" + intervals.simpleName - new_meta.id = intervals.baseName != "no_intervals" ? meta.sample + "_" + intervals.baseName : meta.sample - intervals = intervals.baseName != "no_intervals" ? intervals : [] - [new_meta, cram, crai, [], [], intervals] - }.set{cram_recalibrated_intervals_freebayes} - - FREEBAYES( - cram_recalibrated_intervals_freebayes, - fasta, - fasta_fai, - [], - [], - [] - ) - ch_versions = ch_versions.mix(FREEBAYES.out.versions) - - if(no_intervals){ - TABIX_VC_FREEBAYES(FREEBAYES.out.vcf) - freebayes_vcf_gz = FREEBAYES.out.vcf - ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) - }else{ - BGZIP_VC_FREEBAYES(FREEBAYES.out.vcf) - BGZIP_VC_FREEBAYES.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{freebayes_vcf_to_concat} - - CONCAT_FREEBAYES(freebayes_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - freebayes_vcf_gz = CONCAT_FREEBAYES.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) - } - - freebayes_vcf = freebayes_vcf.mix(freebayes_vcf_gz) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai, intervals_bed_combine_gz, num_intervals) + freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf + ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) } if (tools.contains('mutect2')) { which_norm = [] cram_recalibrated_intervals.map{ meta, cram, crai, intervals -> [meta, cram, crai, intervals, which_norm]}.set{cram_recalibrated_mutect2} - GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING( - cram_recalibrated_mutect2, - fasta, - fasta_fai, - dict, - germline_resource, - germline_resource_tbi, - panel_of_normals, - panel_of_normals_tbi, - num_intervals, - no_intervals, - intervals_bed_combine_gz - ) - + GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING(cram_recalibrated_mutect2, + fasta, + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + panel_of_normals, + panel_of_normals_tbi, + intervals_bed_combine_gz, + num_intervals) + + mutect2_vcf = GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf ch_versions = ch_versions.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.versions) - //mutect2_vcf_tbi = mutect2_vcf_tbi.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf_gz_tbi) } if (tools.contains('manta')){ //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - MANTA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai - ) - - ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) - - if(no_intervals){ - manta_candidate_small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf - manta_tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf - }else{ - - BGZIP_VC_MANTA_SV(MANTA_TUMORONLY.out.candidate_small_indels_vcf) - BGZIP_VC_MANTA_SMALL_INDELS(MANTA_TUMORONLY.out.candidate_sv_vcf) - BGZIP_VC_MANTA_TUMOR(MANTA_TUMORONLY.out.tumor_sv_vcf) - - BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_sv_vcf_to_concat} - - BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_small_indels_vcf_to_concat} - - BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_tumor_sv_vcf_to_concat} + RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) - CONCAT_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_MANTA_TUMOR(manta_tumor_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - manta_candidate_small_indels_vcf = CONCAT_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_MANTA_SMALL_INDELS.out.vcf - manta_tumor_sv_vcf = CONCAT_MANTA_TUMOR.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) - - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) - - } - - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) + manta_vcf = RUN_MANTA_TUMORONLY.out.manta_vcf + ch_versions = ch_versions.mix(RUN_MANTA_TUMORONLY.out.versions) } if (tools.contains('strelka')) { - //TODO: research if multiple targets can be provided: waiting for reply - - STRELKA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai - ) - - ch_versions = ch_versions.mix(STRELKA_TUMORONLY.out.versions) - - if(no_intervals){ - strelka_vcf_gz = STRELKA_TUMORONLY.out.vcf - strelka_genome_vcf_gz = STRELKA_TUMORONLY.out.genome_vcf - - }else{ - BGZIP_VC_STRELKA(STRELKA_TUMORONLY.out.vcf) - BGZIP_VC_STRELKA_GENOME(STRELKA_TUMORONLY.out.genome_vcf) - - BGZIP_VC_STRELKA.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_vcf_to_concat} - - BGZIP_VC_STRELKA_GENOME.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_genome_vcf_to_concat} - - CONCAT_STRELKA(strelka_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_STRELKA_GENOME(strelka_genome_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - - strelka_vcf_gz = CONCAT_STRELKA.out.vcf - strelka_genome_vcf_gz = CONCAT_STRELKA_GENOME.out.vcf - - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) - } - - strelka_vcf = strelka_vcf.mix(strelka_vcf_gz,strelka_genome_vcf_gz ) + RUN_STRELKA_SINGLE( cram_recalibrated_intervals_gz_tbi, + fasta, + fasta_fai, + intervals_bed_combine_gz, + num_intervals) + + strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf + ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) } @@ -259,4 +125,5 @@ workflow TUMOR_ONLY_VARIANT_CALLING { manta_vcf mutect2_vcf strelka_vcf + } diff --git a/subworkflows/nf-core/gatk4/joint_germline_variant_calling/main.nf b/subworkflows/nf-core/gatk4/joint_germline_variant_calling/main.nf index 3e2240423d..9e1d1d95bf 100644 --- a/subworkflows/nf-core/gatk4/joint_germline_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/joint_germline_variant_calling/main.nf @@ -29,32 +29,12 @@ workflow GATK_JOINT_GERMLINE_VARIANT_CALLING { main: ch_versions = Channel.empty() - // haplotypecaller can be skipped if input samples are already in gvcf format, essentially making the subworkflow joint genotyping. - if (run_haplotc) { - haplotc_input = channel.from(input) - // - //Perform variant calling using haplotypecaller module. Additional argument "-ERC GVCF" used to run in gvcf mode. - // - HAPLOTYPECALLER ( haplotc_input, fasta, fai, dict, sites, sites_index ) - - ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions.first()) - ch_vcf = HAPLOTYPECALLER.out.vcf.collect{it[1]}.toList() - ch_index = HAPLOTYPECALLER.out.tbi.collect{it[1]}.toList() - - } else { - // if haplotypecaller is skipped, this channels the input to genomicsdbimport instead of the output vcfs and tbis that normally come from haplotypecaller - direct_input = channel.from(input) - ch_vcf = direct_input.collect{it[1]}.toList() - ch_index = direct_input.collect{it[2]}.toList() - } - // //Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport. // gendb_input = Channel.of([[ id:joint_id ]]).combine(ch_vcf).combine(ch_index).combine([interval_file]).combine(['']).combine([dict]) GENOMICSDBIMPORT ( gendb_input, false, false, false ) - ch_versions = ch_versions.mix(GENOMICSDBIMPORT.out.versions) // @@ -65,7 +45,6 @@ workflow GATK_JOINT_GERMLINE_VARIANT_CALLING { ch_genotype_in.add([]) GENOTYPEGVCFS ( ch_genotype_in, fasta, fai, dict, sites, sites_index ) - ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) // setting run_vqsr to false skips the VQSR process, for if user does not wish to perform VQSR, diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf index 754d65dc5b..6abe74c9e0 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf @@ -24,9 +24,8 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { germline_resource_tbi // channel: /path/to/germline/index panel_of_normals // channel: /path/to/panel/of/normals panel_of_normals_tbi // channel: /path/to/panel/of/normals/index - no_intervals - num_intervals intervals_bed_combine_gz + num_intervals main: ch_versions = Channel.empty() @@ -40,108 +39,124 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { // //Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of ch_mutect2_in, //to ensure correct file order for calculatecontamination. + input.multiMap{ meta, input_list, input_index_list, intervals, which_norm -> + tumor: [ meta, input_list[1], input_index_list[1], intervals ] + normal: [ meta, input_list[0], input_index_list[0], intervals ] + }.set{pileup} - pileup_tumor_input = input.map { - meta, input_list, input_index_list, intervals, which_norm -> - tumor_id = meta.tumor_id - id = intervals ? tumor_id + "_" + intervals.baseName : tumor_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], input_list[1], input_index_list[1], intervals] - } - - pileup_normal_input = input.map { - meta, input_list, input_index_list, intervals, which_norm -> - normal_id = meta.normal_id - id = intervals ? normal_id + "_" + intervals.baseName : normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], input_list[0], input_index_list[0], intervals] - } - GETPILEUPSUMMARIES_TUMOR ( pileup_tumor_input, fasta, fai, dict, germline_resource, germline_resource_tbi ) - GETPILEUPSUMMARIES_NORMAL ( pileup_normal_input, fasta, fai, dict, germline_resource, germline_resource_tbi ) + GETPILEUPSUMMARIES_TUMOR ( pileup.tumor, fasta, fai, dict, germline_resource, germline_resource_tbi ) + GETPILEUPSUMMARIES_NORMAL ( pileup.normal, fasta, fai, dict, germline_resource, germline_resource_tbi ) ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions) - if(no_intervals){ - mutect2_vcf_gz_tbi = MUTECT2.out.vcf.join(MUTECT2.out.tbi) - mutect2_stats = MUTECT2.out.stats - pileup_table_tumor= GETPILEUPSUMMARIES_TUMOR.out.table - pileup_table_normal= GETPILEUPSUMMARIES_NORMAL.out.table + // Figure out if using intervals or no_intervals + MUTECT2.out.vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_vcf } - }else{ + MUTECT2.out.tbi.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_tbi } - //Merge Mutect2 VCF - BGZIP_MUTECT2(MUTECT2.out.vcf) - - BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], vcf] - }.set{bgzip_mutect2} + MUTECT2.out.stats.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_stats } - mutect2_vcf_to_concat = bgzip_mutect2.groupTuple(size: num_intervals) + GETPILEUPSUMMARIES_NORMAL.out.table.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }set{ pileup_table_normal } - CONCAT_VCF_MUTECT2(mutect2_vcf_to_concat, fai, intervals_bed_combine_gz) - mutect2_vcf_gz_tbi = CONCAT_VCF_MUTECT2.out.vcf.join(CONCAT_VCF_MUTECT2.out.tbi) + GETPILEUPSUMMARIES_TUMOR.out.table.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }set{ pileup_table_tumor } - ch_versions = ch_versions.mix(BGZIP_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MUTECT2.out.versions) - //Merge Muteect2 Stats - MUTECT2.out.stats.map{ meta, stats -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], stats] - }.groupTuple(size: num_intervals).set{mutect2_stats_to_merge} + //Only when using intervals - MERGEMUTECTSTATS(mutect2_stats_to_merge) - mutect2_stats = MERGEMUTECTSTATS.out.stats - ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) + //Merge Mutect2 VCF + BGZIP_MUTECT2(MUTECT2.out.vcf) - //Merge Pileup Summaries - pileup_tumor_tables_to_gather = GETPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> - [[id: meta.tumor_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.groupTuple(size: num_intervals) - - GATHERPILEUPSUMMARIES_TUMOR(pileup_tumor_tables_to_gather, dict) - GATHERPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.set{pileup_table_tumor} - - pileup_normal_tables_to_gather = GETPILEUPSUMMARIES_NORMAL.out.table.map{ meta, table -> + CONCAT_MUTECT2( + BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], vcf] + }.groupTuple(size: num_intervals), + fai, + intervals_bed_combine_gz) + + mutect2_vcf = Channel.empty().mix( + CONCAT_MUTECT2.out.vcf, + mutect2_vcf.no_intervals) + + mutect2_tbi = Channel.empty().mix( + CONCAT_MUTECT2.out.tbi, + mutect2_tbi.no_intervals) + + ch_versions = ch_versions.mix(BGZIP_MUTECT2.out.versions) + ch_versions = ch_versions.mix(CONCAT_MUTECT2.out.versions) + + //Merge Muteect2 Stats + MERGEMUTECTSTATS(mutect2_stats.intervals.map{ meta, stats -> + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], stats] + }.groupTuple(size: num_intervals)) + + mutect2_stats = Channel.empty().mix( + MERGEMUTECTSTATS.out.stats, + mutect2_stats.no_intervals) + + //Merge Pileup Summaries + GATHERPILEUPSUMMARIES_NORMAL( + GETPILEUPSUMMARIES_NORMAL.out.table.map{ meta, table -> [[id: meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.groupTuple(size: num_intervals) + }.groupTuple(size: num_intervals), + dict) - GATHERPILEUPSUMMARIES_NORMAL(pileup_normal_tables_to_gather, dict) + gather_table_normal = Channel.empty().mix( GATHERPILEUPSUMMARIES_NORMAL.out.table.map{ meta, table -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.set{pileup_table_normal} + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] + }, + pileup_table_normal.no_intervals) + + GATHERPILEUPSUMMARIES_TUMOR( GETPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> + [[id: meta.tumor_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] + }.groupTuple(size: num_intervals), + dict) - } + gather_table_tumor = Channel.empty().mix( + GATHERPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] + }, + pileup_table_tumor.no_intervals) // //Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2. // MUTECT2.out.f1r2.map{ meta, f1f2 -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], f1f2] + [[id: meta.tumor_id + "_vs_" + meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], f1f2] }.groupTuple(size: num_intervals) .set{ch_learnread_in} LEARNREADORIENTATIONMODEL (ch_learnread_in) - ch_versions = ch_versions.mix(LEARNREADORIENTATIONMODEL.out.versions) // //Contamination and segmentation tables created using calculatecontamination on the pileup summary table. // - ch_calccon_in = pileup_table_tumor.join(pileup_table_normal) + ch_calccon_in = gather_table_tumor.join(gather_table_normal) CALCULATECONTAMINATION ( ch_calccon_in, true ) ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) // //Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables. // - ch_filtermutect = mutect2_vcf_gz_tbi.join(mutect2_stats) - .join(LEARNREADORIENTATIONMODEL.out.artifactprior) - .join(CALCULATECONTAMINATION.out.segmentation) - .join(CALCULATECONTAMINATION.out.contamination) + ch_filtermutect = mutect2_vcf.join(mutect2_tbi) + .join(mutect2_stats) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior) + .join(CALCULATECONTAMINATION.out.segmentation) + .join(CALCULATECONTAMINATION.out.contamination) ch_filtermutect.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [meta, vcf, tbi, stats, orientation, seg, cont, []] }.set{ch_filtermutect_in} @@ -150,21 +165,21 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) emit: - mutect2_vcf_gz_tbi = mutect2_vcf_gz_tbi // channel: [ val(meta), [ vcf ] ] - mutect2_stats = mutect2_stats // channel: [ val(meta), [ stats ] ] - //mutect2_f1r2 = MUTECT2.out.f1r2 // channel: [ val(meta), [ f1r2 ] ] + mutect2_vcf = mutect2_vcf // channel: [ val(meta), [ vcf ] ] + mutect2_stats = mutect2_stats // channel: [ val(meta), [ stats ] ] + mutect2_f1r2 = MUTECT2.out.f1r2 // channel: [ val(meta), [ f1r2 ] ] - artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ val(meta), [ artifactprior ] ] + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ val(meta), [ artifactprior ] ] - pileup_table_tumor = pileup_table_tumor // channel: [ val(meta), [ table_tumor ] ] - pileup_table_normal = pileup_table_normal // channel: [ val(meta), [ table_normal ] ] + pileup_table_tumor = gather_table_tumor // channel: [ val(meta), [ table_tumor ] ] + pileup_table_normal = gather_table_normal // channel: [ val(meta), [ table_normal ] ] - contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] - segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] + contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] + segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] - filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] - filtered_tbi = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] - filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] + filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] + filtered_tbi = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] + filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index d8b44a46e8..97ab85b85f 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -2,14 +2,14 @@ // Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls // -include { BGZIP as BGZIP_MUTECT2 } from '../../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' -include { GATK4_MUTECT2 as MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mutect2/main' -include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../../modules/nf-core/modules/gatk4/mergemutectstats/main' -include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' -include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/gatherpileupsummaries/main' -include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../../modules/nf-core/modules/gatk4/calculatecontamination/main' -include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' +include { BGZIP as BGZIP_VC_MUTECT2 } from '../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' +include { GATK4_MUTECT2 as MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mutect2/main' +include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../../modules/nf-core/modules/gatk4/mergemutectstats/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/gatherpileupsummaries/main' +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../../modules/nf-core/modules/gatk4/calculatecontamination/main' +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { take: @@ -21,10 +21,8 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { germline_resource_tbi // channel: /path/to/germline/index panel_of_normals // channel: /path/to/panel/of/normals panel_of_normals_tbi // channel: /path/to/panel/of/normals/index - num_intervals - no_intervals intervals_bed_combine_gz - + num_intervals main: ch_versions = Channel.empty() @@ -32,9 +30,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { // //Perform variant calling using mutect2 module in tumor single mode. // - mutect2_vcf_gz_tbi = Channel.empty() MUTECT2 ( input , true , false , false , fasta , fai , dict , germline_resource , germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) - ch_versions = ch_versions.mix(MUTECT2.out.versions) // //Generate pileup summary table using getepileupsummaries. @@ -44,83 +40,109 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { [meta, input_file, input_index, intervals] } GETPILEUPSUMMARIES ( pileup_input , fasta, fai, dict, germline_resource , germline_resource_tbi ) - ch_versions = ch_versions.mix(GETPILEUPSUMMARIES.out.versions) - if(no_intervals){ - mutect2_vcf_gz_tbi = MUTECT2.out.vcf.join(MUTECT2.out.tbi) - mutect2_stats = MUTECT2.out.stats - pileup_table = GETPILEUPSUMMARIES.out.table - }else{ + // Figure out if using intervals or no_intervals + MUTECT2.out.vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_vcf_branch } - //Merge Mutect2 VCF - BGZIP_MUTECT2(MUTECT2.out.vcf) - BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.set{bgzip_mutect2} + MUTECT2.out.tbi.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_tbi_branch } - mutect2_vcf_to_concat = bgzip_mutect2.groupTuple(size: num_intervals) + MUTECT2.out.stats.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{ mutect2_stats_branch } - CONCAT_VCF_MUTECT2(mutect2_vcf_to_concat, fai, intervals_bed_combine_gz) - mutect2_vcf_gz_tbi = CONCAT_VCF_MUTECT2.out.vcf.join(CONCAT_VCF_MUTECT2.out.tbi) + GETPILEUPSUMMARIES.out.table.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }set{ pileup_table_branch } - ch_versions = ch_versions.mix(BGZIP_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MUTECT2.out.versions) + //Only when using intervals + //Merge Mutect2 VCF + BGZIP_VC_MUTECT2(mutect2_vcf_branch.intervals) - //Merge Muteect2 Stats - MUTECT2.out.stats.map{ meta, stats -> + CONCAT_MUTECT2(BGZIP_VC_MUTECT2.out.vcf.map{ meta, vcf -> new_meta = meta.clone() new_meta.id = new_meta.sample - [new_meta, stats] - }.groupTuple(size: num_intervals).set{mutect2_stats_to_merge} - - MERGEMUTECTSTATS(mutect2_stats_to_merge) - mutect2_stats = MERGEMUTECTSTATS.out.stats - ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) - - //Merge Pileup Summaries - pileup_tables_to_gather = GETPILEUPSUMMARIES.out.table.map{ meta, table -> + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fai, + intervals_bed_combine_gz) + + mutect2_vcf = Channel.empty().mix( + CONCAT_MUTECT2.out.vcf, + mutect2_vcf_branch.no_intervals) + + mutect2_tbi = Channel.empty().mix( + CONCAT_MUTECT2.out.tbi, + mutect2_tbi_branch.no_intervals) + + //Merge Muteect2 Stats + MERGEMUTECTSTATS(mutect2_stats_branch.intervals.map{ meta, stats -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, stats] + }.groupTuple(size: num_intervals)) + + mutect2_stats = Channel.empty().mix( + MERGEMUTECTSTATS.out.stats, + mutect2_stats_branch.no_intervals) + + //Merge Pileup Summaries + GATHERPILEUPSUMMARIES( GETPILEUPSUMMARIES.out.table.map{ meta, table -> new_meta = meta.clone() new_meta.id = new_meta.sample [new_meta, table] - }.groupTuple(size: num_intervals) - - GATHERPILEUPSUMMARIES(pileup_tables_to_gather, dict) - pileup_table = GATHERPILEUPSUMMARIES.out.table + }.groupTuple(size: num_intervals), + dict) - } + pileup_table = Channel.empty().mix( + GATHERPILEUPSUMMARIES.out.table, + pileup_table_branch.no_intervals) // //Contamination and segmentation tables created using calculatecontamination on the pileup summary table. // pileup_table.map{meta, table -> [meta, table, []]}.set{table_contamination} CALCULATECONTAMINATION ( table_contamination, true ) - ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) - // //Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables. // - ch_filtermutect = mutect2_vcf_gz_tbi.join(mutect2_stats) - .join(CALCULATECONTAMINATION.out.segmentation) - .join(CALCULATECONTAMINATION.out.contamination) + ch_filtermutect = mutect2_vcf.join(mutect2_tbi) + .join(mutect2_stats) + .join(CALCULATECONTAMINATION.out.segmentation) + .join(CALCULATECONTAMINATION.out.contamination) ch_filtermutect_in = ch_filtermutect.map{ meta, vcf, tbi, stats, seg, cont -> [meta, vcf, tbi, stats, [], seg, cont, []] } + FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) + + ch_versions = ch_versions.mix(BGZIP_VC_MUTECT2.out.versions) + ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) + ch_versions = ch_versions.mix(CONCAT_MUTECT2.out.versions) ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) + ch_versions = ch_versions.mix(GATHERPILEUPSUMMARIES.out.versions) + ch_versions = ch_versions.mix(GETPILEUPSUMMARIES.out.versions) + ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) + ch_versions = ch_versions.mix(MUTECT2.out.versions) emit: - mutect2_vcf_gz_tbi = mutect2_vcf_gz_tbi // channel: [ val(meta), [ vcf ] ] - mutect2_stats = MUTECT2.out.stats // channel: [ val(meta), [ stats ] ] + mutect2_vcf = mutect2_vcf // channel: [ val(meta), [ vcf ] ] + mutect2_stats = mutect2_stats // channel: [ val(meta), [ stats ] ] - pileup_table = pileup_table // channel: [ val(meta), [ table ] ] + pileup_table = pileup_table // channel: [ val(meta), [ table ] ] - contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] - segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] + contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] + segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] - filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] - filtered_index = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] - filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] + filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] + filtered_index = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] + filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf new file mode 100644 index 0000000000..e5069f2bab --- /dev/null +++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf @@ -0,0 +1,71 @@ +include { BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../../modules/local/concat_vcf/main' +include { DEEPVARIANT } from '../../../../modules/nf-core/modules/deepvariant/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' + +//TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine +// Deepvariant: https://github.com/google/deepvariant/issues/510 +workflow RUN_DEEPVARIANT { + take: + cram // channel: [mandatory] [meta, cram, crai, interval] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + DEEPVARIANT(cram, fasta, fasta_fai) + + // Only when no intervals + TABIX_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) + TABIX_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) + + // Only when using intervals + BGZIP_VC_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) + BGZIP_VC_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) + + CONCAT_DEEPVARIANT_VCF( + BGZIP_VC_DEEPVARIANT_VCF.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + CONCAT_DEEPVARIANT_GVCF( + BGZIP_VC_DEEPVARIANT_GVCF.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + // Mix output channels for "no intervals" and "with intervals" results + deepvariant_vcf = Channel.empty().mix( + CONCAT_DEEPVARIANT_GVCF.out.vcf, + CONCAT_DEEPVARIANT_VCF.out.vcf, + DEEPVARIANT.out.gvcf, + DEEPVARIANT.out.vcf) + + ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_VCF.out.versions) + ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_GVCF.out.versions) + ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_VCF.out.versions) + ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) + ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_GVCF.out.versions) + ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_VCF.out.versions) + + emit: + deepvariant_vcf + versions = ch_versions +} diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf new file mode 100644 index 0000000000..3e909a5e4e --- /dev/null +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -0,0 +1,53 @@ +include { BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../../modules/local/concat_vcf/main' +include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' + +workflow RUN_FREEBAYES { + take: + cram // channel: [mandatory] [meta, cram, crai, [], [], interval] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + FREEBAYES( + cram, + fasta, + fasta_fai, + [], [], []) + + // Only when no intervals + TABIX_VC_FREEBAYES(FREEBAYES.out.vcf) + + // Only when using intervals + BGZIP_VC_FREEBAYES(FREEBAYES.out.vcf) + + CONCAT_FREEBAYES( + BGZIP_VC_FREEBAYES.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + // Mix output channels for "no intervals" and "with intervals" results + freebayes_vcf = Channel.empty().mix( + CONCAT_FREEBAYES.out.vcf, + FREEBAYES.out.vcf) + + ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) + ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) + ch_versions = ch_versions.mix(FREEBAYES.out.versions) + ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) + + emit: + freebayes_vcf + versions = ch_versions +} diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf new file mode 100644 index 0000000000..0a8a6958f5 --- /dev/null +++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf @@ -0,0 +1,119 @@ +include { BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../../modules/local/concat_vcf/main' +include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/gatk4/haplotypecaller/main' +include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' +include { TABIX_TABIX as TABIX_VC_HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/tabix/tabix/main' + +workflow RUN_HAPLOTYPECALLER { + take: + cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + dict // channel: [mandatory] + dbsnp // channel: [mandatory] + dbsnp_tbi // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + intervals_bed_combine_gz_tbi // channel: [optional] Contains a [bed.gz, bed.gz.tbi ]file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + HAPLOTYPECALLER( + cram, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi) + + // Figure out if using intervals or no_intervals + HAPLOTYPECALLER.out.vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{haplotypecaller_vcf_branch} + + // Only when no intervals + TABIX_VC_HAPLOTYPECALLER(haplotypecaller_vcf_branch.no_intervals) + + // Only when using intervals + BGZIP_VC_HAPLOTYPECALLER(haplotypecaller_vcf_branch.intervals) + + CONCAT_HAPLOTYPECALLER( + BGZIP_VC_HAPLOTYPECALLER.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + haplotypecaller_vcf = Channel.empty().mix( + CONCAT_HAPLOTYPECALLER.out.vcf, + haplotypecaller_vcf_branch.no_intervals) + + haplotypecaller_vcf_tbi = Channel.empty().mix( + CONCAT_HAPLOTYPECALLER.out.tbi, + haplotypecaller_vcf_branch.no_intervals) + + // genotype_gvcf_to_call = haplotypecaller_gvcf.join(haplotypecaller_gvcf_tbi) + // .combine(intervals_bed_combine_gz_tbi) + // .map{ + // meta, gvcf, gvf_tbi, intervals, intervals_tbi -> + // new_intervals = intervals.simpleName != "no_intervals" ? intervals : [] + // new_intervals_tbi = intervals_tbi.simpleName != "no_intervals" ? intervals_tbi : [] + // [meta, gvcf, gvf_tbi, new_intervals, new_intervals_tbi] + // } + + // GENOTYPEGVCFS + + // GENOTYPEGVCFS( + // genotype_gvcf_to_call, + // fasta, + // fasta_fai, + // dict, + // dbsnp, + // dbsnp_tbi) + //workflow haplotypecaller (default mode)-> CNNScoreVariants + //workflow haplotypecaller (ERC mode) -> GenomicsDBimport -> GenotypeGVCFs -> VQSR + + //genotype_gvcf = GENOTYPEGVCFS.out.vcf + + // if (joint_germline) { + // run_haplotypecaller = false + // run_vqsr = true //parameter? + // some feedback from gavin + // GATK_JOINT_GERMLINE_VARIANT_CALLING( + // haplotypecaller_vcf_gz_tbi, + // run_haplotypecaller, + // run_vqsr, + // fasta, + // fasta_fai, + // dict, + // dbsnp, + // dbsnp_tbi, + // "joined", + // allelespecific? + // resources? + // annotation? + // "BOTH", + // true, + // truthsensitivity -> parameter or module? + // ) + // ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) + // } + ch_versions = ch_versions.mix(BGZIP_VC_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(CONCAT_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) + //ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) + ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(TABIX_VC_HAPLOTYPECALLER.out.versions) + + emit: + versions = ch_versions + genotype_gvcf + haplotypecaller_gvcf +} diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf new file mode 100644 index 0000000000..dec574f959 --- /dev/null +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -0,0 +1,98 @@ +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' + +// TODO: Research if splitting by intervals is ok, we pretend for now it is fine. +// Seems to be the consensus on upstream modules implementation too +workflow RUN_MANTA_GERMLINE { + take: + cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + MANTA_GERMLINE(cram, fasta, fasta_fai) + + // Figure out if using intervals or no_intervals + MANTA_GERMLINE.out.candidate_small_indels_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_small_indels_vcf} + + MANTA_GERMLINE.out.candidate_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_sv_vcf} + + MANTA_GERMLINE.out.diploid_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_diploid_sv_vcf} + + // Only when using intervals + BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) + + CONCAT_MANTA_SMALL_INDELS( + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_MANTA_SV(manta_sv_vcf.intervals) + + CONCAT_MANTA_SV( + BGZIP_VC_MANTA_SV.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) + + CONCAT_MANTA_DIPLOID( + BGZIP_VC_MANTA_DIPLOID.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + // Mix output channels for "no intervals" and "with intervals" results + manta_vcf = Channel.empty().mix( + CONCAT_MANTA_DIPLOID.out.vcf, + CONCAT_MANTA_SMALL_INDELS.out.vcf, + CONCAT_MANTA_SV.out.vcf, + manta_diploid_sv_vcf.no_intervals, + manta_small_indels_vcf.no_intervals, + manta_sv_vcf.no_intervals) + + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) + + emit: + manta_vcf + versions = ch_versions +} diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf new file mode 100644 index 0000000000..34e2f5eb9c --- /dev/null +++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf @@ -0,0 +1,117 @@ +include { BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' + +workflow RUN_MANTA_SOMATIC { + take: + cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + MANTA_SOMATIC(cram, fasta, fasta_fai) + + // Figure out if using intervals or no_intervals + MANTA_SOMATIC.out.candidate_small_indels_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_candidate_small_indels_vcf} + + MANTA_SOMATIC.out.candidate_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_candidate_sv_vcf} + + MANTA_SOMATIC.out.diploid_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_diploid_sv_vcf} + + MANTA_SOMATIC.out.somatic_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_somatic_sv_vcf} + + //Only when using intervals + BGZIP_VC_MANTA_SV(manta_candidate_small_indels_vcf.intervals) + + CONCAT_MANTA_SV( + BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_MANTA_SMALL_INDELS(manta_candidate_sv_vcf.intervals) + + CONCAT_MANTA_SMALL_INDELS( + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) + + CONCAT_MANTA_DIPLOID( + BGZIP_VC_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_MANTA_SOMATIC(manta_somatic_sv_vcf.intervals) + + CONCAT_MANTA_SOMATIC( + BGZIP_VC_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + // Mix output channels for "no intervals" and "with intervals" results + manta_vcf = Channel.empty().mix( + CONCAT_MANTA_SV.out.vcf, + CONCAT_MANTA_SMALL_INDELS.out.vcf, + CONCAT_MANTA_DIPLOID.out.vcf, + CONCAT_MANTA_SOMATIC.out.vcf, + manta_candidate_small_indels_vcf.no_intervals, + manta_candidate_sv_vcf.no_intervals, + manta_diploid_sv_vcf.no_intervals, + manta_somatic_sv_vcf.no_intervals + ) + + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) + ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) + + emit: + manta_vcf + versions = ch_versions + +} diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf new file mode 100644 index 0000000000..13696c0db7 --- /dev/null +++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf @@ -0,0 +1,96 @@ +include { BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../../../modules/local/concat_vcf/main' +include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' + +// TODO: Research if splitting by intervals is ok, we pretend for now it is fine. +// Seems to be the consensus on upstream modules implementation too +workflow RUN_MANTA_TUMORONLY { + take: + cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + MANTA_TUMORONLY(cram, fasta, fasta_fai) + + // Figure out if using intervals or no_intervals + MANTA_TUMORONLY.out.candidate_small_indels_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_small_indels_vcf} + + MANTA_TUMORONLY.out.candidate_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_candidate_sv_vcf} + + MANTA_TUMORONLY.out.tumor_sv_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{manta_tumor_sv_vcf} + + //Only when using intervals + BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) + + CONCAT_MANTA_SMALL_INDELS( + BGZIP_VC_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_MANTA_SV(manta_candidate_sv_vcf.intervals) + + CONCAT_MANTA_SV( + BGZIP_VC_MANTA_SV.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_MANTA_TUMOR(manta_tumor_sv_vcf.intervals) + + CONCAT_MANTA_TUMOR( + BGZIP_VC_MANTA_TUMOR.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + // Mix output channels for "no intervals" and "with intervals" results + manta_vcf = Channel.empty().mix( + CONCAT_MANTA_SMALL_INDELS.out.vcf, + CONCAT_MANTA_SV.out.vcf, + CONCAT_MANTA_TUMOR.out.vcf, + manta_small_indels_vcf.no_intervals, + manta_candidate_sv_vcf.no_intervals, + manta_tumor_sv_vcf.no_intervals + ) + + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) + ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) + + emit: + manta_vcf + versions = ch_versions +} diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf new file mode 100644 index 0000000000..e90ec8118a --- /dev/null +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -0,0 +1,75 @@ +include { BGZIP as BGZIP_VC_STRELKA } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_STRELKA } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../../../modules/local/concat_vcf/main' +include { STRELKA_GERMLINE } from '../../../../../modules/nf-core/modules/strelka/germline/main' + +// TODO: Research if splitting by intervals is ok, we pretend for now it is fine. +// Seems to be the consensus on upstream modules implementation too +workflow RUN_STRELKA_SINGLE { + take: + cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + STRELKA_GERMLINE(cram, fasta, fasta_fai) + + // Figure out if using intervals or no_intervals + STRELKA_GERMLINE.out.vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{strelka_vcf} + + STRELKA_GERMLINE.out.genome_vcf.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{strelka_genome_vcf} + + // Only when using intervals + BGZIP_VC_STRELKA(strelka_vcf.intervals) + + CONCAT_STRELKA( + BGZIP_VC_STRELKA.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_STRELKA_GENOME(strelka_genome_vcf.intervals) + + CONCAT_STRELKA_GENOME( + BGZIP_VC_STRELKA_GENOME.out.vcf + .map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.sample + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + // Mix output channels for "no intervals" and "with intervals" results + strelka_vcf = Channel.empty().mix( + CONCAT_STRELKA.out.vcf, + CONCAT_STRELKA_GENOME.out.vcf, + strelka_genome_vcf.no_intervals, + strelka_vcf.no_intervals) + + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_GENOME.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA_GENOME.out.versions) + ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) + + emit: + strelka_vcf + versions = ch_versions +} diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf new file mode 100644 index 0000000000..b0bc7509d8 --- /dev/null +++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf @@ -0,0 +1,71 @@ +include { BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../../../../modules/local/bgzip' +include { BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../../../../modules/local/bgzip' +include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../../../../modules/local/concat_vcf/main' +include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../../../../modules/local/concat_vcf/main' +include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' + +// TODO: Research if splitting by intervals is ok, we pretend for now it is fine. +// Seems to be the consensus on upstream modules implementation too +workflow RUN_STRELKA_SOMATIC { + take: + cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, interval.bed.gz, interval.bed.gz.tbi] manta* are optional + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. + num_intervals // val: [optional] Number of used intervals, mandatory when intervals are provided. + + main: + + ch_versions = Channel.empty() + + STRELKA_SOMATIC(cram, fasta, fasta_fai ) + + // Figure out if using intervals or no_intervals + STRELKA_SOMATIC.out.vcf_snvs.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{strelka_vcf_snvs} + + STRELKA_SOMATIC.out.vcf_indels.branch{ + intervals: num_intervals > 1 + no_intervals: num_intervals == 1 + }.set{strelka_vcf_indels} + + // Only when using intervals + BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals) + + CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals) + + CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.vcf.map{ meta, vcf -> + new_meta = meta.clone() + new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + [new_meta, vcf] + }.groupTuple(size: num_intervals), + fasta_fai, + intervals_bed_gz) + + // Mix output channels for "no intervals" and "with intervals" results + strelka_vcf = Channel.empty().mix( + CONCAT_STRELKA_SNVS.out.vcf, + CONCAT_STRELKA_INDELS.out.vcf, + strelka_vcf_snvs.no_intervals, + strelka_vcf_indels.no_intervals) + + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_INDELS.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(CONCAT_STRELKA_INDELS.out.versions) + ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) + + emit: + strelka_vcf + versions = ch_versions +} diff --git a/subworkflows/nf-core/variantcalling/tiddit/main.nf b/subworkflows/nf-core/variantcalling/tiddit/main.nf new file mode 100644 index 0000000000..c21e2e845a --- /dev/null +++ b/subworkflows/nf-core/variantcalling/tiddit/main.nf @@ -0,0 +1,35 @@ +include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' +include { TIDDIT_SV } from '../../../../modules/nf-core/modules/tiddit/sv/main' + +//TODO: UNDER CONSTRUCTIONS +workflow RUN_TIDDIT { + take: + + + main: + + ch_versions = Channel.empty() + // if (tools.contains('tiddit')) { + // TODO: Update tiddit on bioconda, the current version does not support cram usage, needs newest version: + // https://github.com/SciLifeLab/TIDDIT/issues/82#issuecomment-1022103264 + // Issue opened, either this week or end of february + + // TIDDIT_SV( + // cram_recalibrated, + // fasta, + // fasta_fai + // ) + + // TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) + // tiddit_vcf_gz_tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi + // tiddit_ploidy = TIDDIT_SV.out.ploidy + // tiddit_signals = TIDDIT_SV.out.signals + // tiddit_wig = TIDDIT_SV.out.wig + // tiddit_gc_wig = TIDDIT_SV.out.gc_wig + + // ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) + // ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) + // } + emit: + versions = ch_versions +} diff --git a/tests/csv/3.0/recalibrated_somatic.csv b/tests/csv/3.0/recalibrated_somatic.csv new file mode 100644 index 0000000000..d7364586ef --- /dev/null +++ b/tests/csv/3.0/recalibrated_somatic.csv @@ -0,0 +1,3 @@ +patient,gender,status,sample,cram,crai +test3,XX,0,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai +test3,XX,1,sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/csv/3.0/recalibrated_tumoronly.csv b/tests/csv/3.0/recalibrated_tumoronly.csv new file mode 100644 index 0000000000..1b453883dc --- /dev/null +++ b/tests/csv/3.0/recalibrated_tumoronly.csv @@ -0,0 +1,2 @@ +patient,gender,status,sample,cram,crai +test1,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/test_tools.yml b/tests/test_tools.yml index a308ebb444..c372f93a8f 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -5,6 +5,8 @@ - germline - variant_calling files: + - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz + - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz.tbi - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi @@ -16,35 +18,52 @@ - no_intervals - variant_calling files: + - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz + - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz.tbi - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi -- name: Run variant calling on germline sample with deepvariant and generate gvcf - command: nextflow run main.nf -profile test,tools_germline,docker --tools deepvariant --generate_gvcf +- name: Run variant calling on germline sample with freebayes + command: nextflow run main.nf -profile test,tools_germline,docker --tools freebayes tags: - - deepvariant + - freebayes - germline - - gvcf - variant_calling files: - - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz - - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz.tbi - - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz - - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi -- name: Run variant calling on germline sample with deepvariant without intervals and generate gvcf - command: nextflow run main.nf -profile test,tools_germline,docker --tools deepvariant --generate_gvcf --no_intervals +- name: Run variant calling on germline sample with freebayes without intervals + command: nextflow run main.nf -profile test,tools_germline,docker --tools freebayes --no_intervals tags: - - deepvariant + - freebayes - germline - - gvcf - no_intervals - variant_calling files: - - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz - - path: results/variant_calling/sample1/deepvariant/sample1.g.vcf.gz.tbi - - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz - - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz + - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + +- name: Run variant calling on tumor_only sample with freebayes + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes + tags: + - freebayes + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz + - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi + +- name: Run variant calling on tumor_only sample with freebayes without intervals + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes --no_intervals + tags: + - freebayes + - no_intervals + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz + - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi - name: Run variant calling on germline sample with haplotypecaller command: nextflow run main.nf -profile test,tools_germline,docker --tools haplotypecaller @@ -123,6 +142,162 @@ - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz - path: results/variant_calling/sample1/manta/sample1.diploid_sv.vcf.gz.tbi +- name: Run variant calling on tumor_only sample with manta + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools manta + tags: + - manta + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/manta/sample2.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.candidate_sv.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.tumor_sv.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.tumor_sv.vcf.gz.tbi + +- name: Run variant calling on tumor_only sample with manta without intervals + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools manta --no_intervals + tags: + - manta + - no_intervals + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/manta/sample2.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.candidate_sv.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample2/manta/sample2.tumor_sv.vcf.gz + - path: results/variant_calling/sample2/manta/sample2.tumor_sv.vcf.gz.tbi + +- name: Run variant calling on somatic sample with manta + command: nextflow run main.nf -profile test,tools_somatic,docker --tools manta + tags: + - manta + - somatic + - variant_calling + files: + - path: results/variant_calling/sample3/manta/sample3.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.candidate_sv.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.diploid_sv.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.diploid_sv.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_sv.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz.tbi + +- name: Run variant calling on somatic sample with manta without intervals + command: nextflow run main.nf -profile test,tools_somatic,docker --tools manta --no_intervals + tags: + - manta + - no_intervals + - somatic + - variant_calling + files: + - path: results/variant_calling/sample3/manta/sample3.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.candidate_sv.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample3/manta/sample3.diploid_sv.vcf.gz + - path: results/variant_calling/sample3/manta/sample3.diploid_sv.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_small_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_small_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_sv.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.candidate_sv.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/manta/sample4_vs_sample3.diploid_sv.vcf.gz.tbi + +- name: Run variant calling on tumoronly sample with mutect2 + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools mutect2 + tags: + - mutect2 + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.tbi + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.stats + - path: results/variant_calling/sample2/mutect2/sample2.contamination.table + - path: results/variant_calling/sample2/mutect2/sample2.segmentation.table + - path: results/variant_calling/sample2/mutect2/sample2.table.pileupsummaries.table + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.tbi + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.filteringStats.tsv + +- name: Run variant calling on tumoronly sample with mutect2 without intervals + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools mutect2 --no_intervals + tags: + - mutect2 + - no_intervals + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.tbi + - path: results/variant_calling/sample2/mutect2/sample2.vcf.gz.stats + - path: results/variant_calling/sample2/mutect2/sample2.contamination.table + - path: results/variant_calling/sample2/mutect2/sample2.segmentation.table + - path: results/variant_calling/sample2/mutect2/sample2.pileups.table + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.tbi + - path: results/variant_calling/sample2/mutect2/sample2.filtered.vcf.gz.filteringStats.tsv + +- name: Run variant calling on somatic sample with mutect2 + command: nextflow run main.nf -profile test,tools_somatic,docker --tools mutect2 + tags: + - mutect2 + - somatic + - variant_calling + files: + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.stats + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.contamination.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.segmentation.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample3.table.pileupsummaries.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4.table.pileupsummaries.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.learnreadorientationmodel.tar.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.filteringStats.tsv + +- name: Run variant calling on somatic sample with mutect2 without intervals + command: nextflow run main.nf -profile test,tools_somatic,docker --tools mutect2 --no_intervals + tags: + - mutect2 + - no_intervals + - somatic + - variant_calling + files: + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.vcf.gz.stats + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.contamination.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.segmentation.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample3.pileups.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4.pileups.table + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.learnreadorientationmodel.tar.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/mutect2/sample4_vs_sample3.filtered.vcf.gz.filteringStats.tsv + +- name: Run variant calling on somatic sample with msisensor-pro + command: nextflow run main.nf -profile test,tools_somatic,docker --tools msisensorpro + tags: + - msisensorpro + - somatic + - variant_calling + files: + - path: results/variant_calling/sample4_vs_sample3/msisensorpro/sample4_vs_sample3 + - path: results/variant_calling/sample4_vs_sample3/msisensorpro/sample4_vs_sample3_dis + - path: results/variant_calling/sample4_vs_sample3/msisensorpro/sample4_vs_sample3_germline + - path: results/variant_calling/sample4_vs_sample3/msisensorpro/sample4_vs_sample3_somatic + - name: Run variant calling on germline sample with strelka command: nextflow run main.nf -profile test,tools_germline,docker --tools strelka tags: @@ -147,3 +322,62 @@ - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz.tbi - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz.tbi + +- name: Run variant calling on tumoronly sample with strelka + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools strelka + tags: + - strelka + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz.tbi + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz.tbi + +- name: Run variant calling on tumoronly sample with strelka without intervals + command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools strelka --no_intervals + tags: + - no_intervals + - strelka + - tumor_only + - variant_calling + files: + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz.tbi + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz.tbi + +- name: Run variant calling on somatic sample with strelka + command: nextflow run main.nf -profile test,tools_somatic,docker --tools strelka + tags: + - somatic + - strelka + - variant_calling + files: + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz.tbi + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz.tbi + +- name: Run variant calling on somatic sample with strelka without intervals + command: nextflow run main.nf -profile test,tools_somatic,docker --tools strelka --no_intervals + tags: + - no_intervals + - somatic + - strelka + - variant_calling + files: + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz.tbi + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz.tbi +#TODO: Test for strelka + manta diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 2b86813074..49447a55a8 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -252,7 +252,7 @@ workflow SAREK { intervals_bed_combined_gz = intervals_bed_combined_gz_tbi.map{ bed, tbi -> [bed]}.collect() // one file containing all intervals interval.bed.gz file intervals_for_preprocessing = (!params.wes || params.no_intervals) ? [] : PREPARE_INTERVALS.out.intervals_bed //TODO: intervals also with WGS data? Probably need a parameter if WGS for deepvariant tool, that would allow to check here too - num_intervals = params.intervals ? count_intervals(file(params.intervals)) : 1 + num_intervals = !params.no_intervals ? (params.intervals ? count_intervals(file(params.intervals)) : 1) : 1 // Gather used softwares versions ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) @@ -623,6 +623,7 @@ workflow SAREK { intervals_bed_gz_tbi, intervals_bed_combined_gz_tbi, intervals_bed_combined_gz, + intervals_bed_combined, num_intervals, params.no_intervals, msisensorpro_scan,