diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fe9be0ecd..f62b303a71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#544](https://github.com/nf-core/sarek/pull/544) - `Mutect2` is no longer compatible with `--no_intervals` - [#551](https://github.com/nf-core/sarek/pull/551) - Sync `TEMPLATE` with `tools` `2.4` - [#563](https://github.com/nf-core/sarek/pull/563) - Updated subway map +- [#571](https://github.com/nf-core/sarek/pull/571) - Including and using GATK4's mergeVcfs. Removing the local module `concat_vcf`. - [#572](https://github.com/nf-core/sarek/pull/572) - Adjusted subway map svg for firefox compatibility ### Fixed diff --git a/README.md b/README.md index be6f34a6bf..d93929f39d 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ We thank the following people for their extensive assistance in the development - [Abhinav Sharma](https://github.com/abhi18av) - [Adrian Lärkeryd](https://github.com/adrlar) - [Alexander Peltzer](https://github.com/apeltzer) +- [Anders Sune Pedersen](https://github.com/asp8200) - [Chela James](https://github.com/chelauk) - [David Mas-Ponte](https://github.com/davidmasp) - [Francesco L](https://github.com/nibscles) diff --git a/bin/concatenateVCFs.sh b/bin/concatenateVCFs.sh deleted file mode 100755 index d4a9bff1d1..0000000000 --- a/bin/concatenateVCFs.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# This script concatenates all VCF files that are in the local directory, -# that were created from different intervals to make a single final VCF - -usage() { echo "Usage: $0 [-i genome_index_file] [-o output.file.no.gz.extension] <-t target.bed> <-c cpus> <-n>" 1>&2; exit 1; } - -while [[ $# -gt 0 ]] -do - key=$1 - case $key in - -i) - genomeIndex=$2 - shift # past argument - shift # past value - ;; - -c) - cpus=$2 - shift # past argument - shift # past value - ;; - -o) - outputFile=$2 - shift # past argument - shift # past value - ;; - -t) - targetBED=$2 - shift # past argument - shift # past value - ;; - -n) - noInt=1 - shift # past argument - ;; - *) - usage - shift # past argument - ;; - esac -done - -if [ -z ${genomeIndex} ]; then echo "Missing index file "; usage; fi -if [ -z ${cpus} ]; then echo "No CPUs defined: setting to 1"; cpus=1; fi -if [ -z ${outputFile} ]; then echo "Missing output file name"; usage; fi - -if [ -z ${noInt+x} ] -then - # First make a header from one of the VCF - # Remove interval information from the GATK command-line, but leave the rest - FIRSTVCF=$(set +o pipefail; ls *.vcf | head -n 1) - sed -n '/^[^#]/q;p' $FIRSTVCF | \ - awk '!/GATKCommandLine/{print}/GATKCommandLine/{for(i=1;i<=NF;i++){if($i!~/intervals=/ && $i !~ /out=/){printf("%s ",$i)}}printf("\n")}' \ - > header - - # Get list of contigs from the FASTA index (.fai) - # ##contig header in the VCF cannot be used as it is optional (FreeBayes does not save it, for example) - - CONTIGS=($(cut -f1 ${genomeIndex})) - - #Concatenate VCFs in the correct order - ( - cat header - - for chr in "${CONTIGS[@]}"; do - # Skip if globbing would not match any file to avoid errors such as - # "ls: cannot access chr3_*.vcf: No such file or directory" when chr3 - # was not processed. - pattern="*_${chr}_*.vcf" - if ! compgen -G "${pattern}" > /dev/null ; then continue; fi - - # ls -v sorts by numeric value ("version"), which means that chr1_100_ - # is sorted *after* chr1_99_. - for vcf in $(ls -v ${pattern}); do - # Determine length of header. - # The 'q' command makes sed exit when it sees the first non-header - # line, which avoids reading in the entire file. - L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l) - - # Then print all non-header lines. Since tail is very fast (nearly as - # fast as cat), this is way more efficient than using a single sed, - # awk or grep command. - tail -n +$((L+1)) ${vcf} - done - done - ) | bgzip -@${cpus} > rawcalls.unsorted.vcf.gz -else - VCF=$(ls no_intervals*.vcf) - cp $VCF rawcalls.unsorted.vcf - bgzip -@${cpus} rawcalls.unsorted.vcf -fi - -bcftools sort -T . rawcalls.unsorted.vcf.gz | bgzip > rawcalls.vcf.gz -tabix -p vcf rawcalls.vcf.gz - -set +u - -# Now we have the concatenated VCF file, check for WES/panel targets, and generate a subset if there is a BED provided -if [ ! -z ${targetBED+x} ]; then - echo "Target is $targetBED - Selecting subset..." - bcftools isec --targets-file ${targetBED} rawcalls.vcf.gz | bgzip -@${cpus} > ${outputFile}.gz - tabix ${outputFile}.gz -else - # Rename the raw calls as WGS results - for f in rawcalls.vcf*; do mv -v $f ${outputFile}${f#rawcalls.vcf}; done -fi diff --git a/conf/modules.config b/conf/modules.config index 4cfdfea1fa..6bc7fe157d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -470,28 +470,15 @@ process { // VARIANT CALLING process{ - // ALL - withName: 'CONCAT_.*' { - // For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE - // (exit code 141). Rerunning the process will usually work. - errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'} - ext.args = { params.no_intervals ? "-n" : "" } //Why ConcatVCF is never run when no_intervals is set.. - } - withName : 'BGZIP_VC_.*' { - publishDir = [ - enabled: false - ] - } - // DEEPVARIANT - withName: 'CONCAT_DEEPVARIANT_.*' { + withName: 'MERGE_DEEPVARIANT_.*' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/deepvariant" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName: 'CONCAT_DEEPVARIANT_GVCF' { + withName: 'MERGE_DEEPVARIANT_GVCF' { ext.prefix = {"${meta.id}.g"} } withName: 'DEEPVARIANT' { @@ -513,7 +500,7 @@ process{ } // FREEBAYES - withName: 'CONCAT_FREEBAYES' { + withName: 'MERGE_FREEBAYES' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" }, @@ -547,7 +534,7 @@ process{ } // HAPLOTYPECALLER - withName: 'CONCAT_HAPLOTYPECALLER' { + withName: 'MERGE_HAPLOTYPECALLER' { ext.prefix = {"${meta.id}.g"} publishDir = [ enabled: !params.no_intervals, @@ -578,20 +565,20 @@ process{ } // MANTA - withName: 'CONCAT_MANTA.*' { + withName: 'MERGE_MANTA.*' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/manta" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName: 'CONCAT_MANTA_DIPLOID' { + withName: 'MERGE_MANTA_DIPLOID' { ext.prefix = {"${meta.id}.diploid_sv"} } - withName: 'CONCAT_MANTA_SMALL_INDELS' { + withName: 'MERGE_MANTA_SMALL_INDELS' { ext.prefix = {"${meta.id}.candidate_small_indels"} } - withName: 'CONCAT_MANTA_SV' { + withName: 'MERGE_MANTA_SV' { ext.prefix = {"${meta.id}.candidate_sv"} } withName: 'MANTA.*' { @@ -606,17 +593,17 @@ process{ } // STRELKA - withName: 'CONCAT_STRELKA.*' { + withName: 'MERGE_STRELKA.*' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/strelka" }, pattern: "*{vcf.gz,vcf.gz.tbi}" ] } - withName: 'CONCAT_STRELKA' { + withName: 'MERGE_STRELKA' { ext.prefix = {"${meta.id}.variants"} } - withName: 'CONCAT_STRELKA_GENOME' { + withName: 'MERGE_STRELKA_GENOME' { ext.prefix = {"${meta.id}.genome"} } withName: 'STRELKA_.*' { @@ -766,7 +753,7 @@ process{ } //MANTA - withName: 'CONCAT_MANTA_TUMOR' { + withName: 'MERGE_MANTA_TUMOR' { ext.prefix = {"${meta.id}.tumor_sv"} } @@ -780,7 +767,7 @@ process{ ] } - withName: 'CONCAT_MUTECT2.*' { + withName: 'MERGE_MUTECT2.*' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, @@ -957,7 +944,7 @@ process{ } //MANTA - withName: 'CONCAT_MANTA_SOMATIC' { + withName: 'MERGE_MANTA_SOMATIC' { ext.prefix = {"${meta.id}.somatic_sv"} } @@ -979,10 +966,10 @@ process{ } //STRELKA - withName: 'CONCAT_STRELKA_INDELS' { + withName: 'MERGE_STRELKA_INDELS' { ext.prefix = {"${meta.id}.somatic_indels"} } - withName: 'CONCAT_STRELKA_SNVS' { + withName: 'MERGE_STRELKA_SNVS' { ext.prefix = {"${meta.id}.somatic_snvs"} } diff --git a/modules.json b/modules.json index cfb9facba0..9c660fffdd 100644 --- a/modules.json +++ b/modules.json @@ -147,6 +147,9 @@ "gatk4/mergemutectstats": { "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" }, + "gatk4/mergevcfs": { + "git_sha": "4199a05aeb0ec277d40cb112949bb85893310873" + }, "gatk4/mutect2": { "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" }, @@ -219,9 +222,6 @@ "strelka/somatic": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, - "tabix/bgzip": { - "git_sha": "37bf3936f3665483d070a5e0e0b314311032af7c" - }, "tabix/bgziptabix": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" }, diff --git a/modules/local/concat_vcf/main.nf b/modules/local/concat_vcf/main.nf deleted file mode 100644 index 97db0f2a7b..0000000000 --- a/modules/local/concat_vcf/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process CONCAT_VCF { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bcftools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--hde04aa1_1' : - 'quay.io/biocontainers/bcftools:1.14--hde04aa1_1' }" - - input: - tuple val(meta), path(vcf) - path fasta_fai - path target_bed - - output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf - tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def target_options = target_bed ? "-t ${target_bed}" : "" - """ - concatenateVCFs.sh -i ${fasta_fai} -c ${task.cpus} -o ${prefix}.vcf ${target_options} $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/mergevcfs/main.nf b/modules/nf-core/modules/gatk4/mergevcfs/main.nf new file mode 100644 index 0000000000..35930a6e51 --- /dev/null +++ b/modules/nf-core/modules/gatk4/mergevcfs/main.nf @@ -0,0 +1,47 @@ +process GATK4_MERGEVCFS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(vcf) + path dict + + output: + tuple val(meta), path('*.vcf.gz'), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{ "--INPUT $it"}.join(' ') + def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" MergeVcfs \\ + $input_list \\ + --OUTPUT ${prefix}.vcf.gz \\ + $reference_command \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/mergevcfs/meta.yml b/modules/nf-core/modules/gatk4/mergevcfs/meta.yml new file mode 100644 index 0000000000..3ebce0b9e1 --- /dev/null +++ b/modules/nf-core/modules/gatk4/mergevcfs/meta.yml @@ -0,0 +1,48 @@ +name: gatk4_mergevcfs +description: Merges several vcf files +keywords: + - vcf + - merge +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: Two or more VCF files + pattern: "*.{vcf,vcf.gz}" + - ref_dict: + type: file + description: Optional Sequence Dictionary as input + pattern: "*.dict" + - use_ref_dict: + type: boolean + description: Specify whether or not to use a given reference dictionary +output: + - vcf: + type: file + description: merged vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" diff --git a/modules/nf-core/modules/tabix/bgzip/main.nf b/modules/nf-core/modules/tabix/bgzip/main.nf deleted file mode 100644 index 18e83c84d8..0000000000 --- a/modules/nf-core/modules/tabix/bgzip/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process TABIX_BGZIP { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("${prefix}*"), emit: output - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - in_bgzip = input.toString().endsWith(".gz") - command1 = in_bgzip ? '-d' : '-c' - command2 = in_bgzip ? '' : " > ${prefix}.${input.getExtension()}.gz" - """ - bgzip $command1 $args -@${task.cpus} $input $command2 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/tabix/bgzip/meta.yml b/modules/nf-core/modules/tabix/bgzip/meta.yml deleted file mode 100644 index 5007017510..0000000000 --- a/modules/nf-core/modules/tabix/bgzip/meta.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: tabix_bgzip -description: Compresses/decompresses files -keywords: - - compress - - decompress - - bgzip - - tabix -tools: - - bgzip: - description: | - Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. - homepage: https://www.htslib.org/doc/tabix.html - documentation: http://www.htslib.org/doc/bgzip.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: file to compress or to decompress -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - output: - type: file - description: Output compressed/decompressed file - pattern: "*." - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@maxulysse" diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 9b1859ae45..0314705a04 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -64,7 +64,7 @@ workflow GERMLINE_VARIANT_CALLING { // DEEPVARIANT if(params.tools.contains('deepvariant')){ - RUN_DEEPVARIANT(cram_recalibrated_intervals, fasta, fasta_fai, intervals_bed_combine_gz) + RUN_DEEPVARIANT(cram_recalibrated_intervals, dict, fasta, fasta_fai, intervals_bed_combine_gz) deepvariant_vcf = RUN_DEEPVARIANT.out.deepvariant_vcf ch_versions = ch_versions.mix(RUN_DEEPVARIANT.out.versions) @@ -77,7 +77,7 @@ workflow GERMLINE_VARIANT_CALLING { .map{ meta, cram, crai, intervals -> [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai, intervals_bed_combine_gz) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, dict, fasta, fasta_fai, intervals_bed_combine_gz) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) @@ -102,6 +102,7 @@ workflow GERMLINE_VARIANT_CALLING { // MANTA if (params.tools.contains('manta')){ RUN_MANTA_GERMLINE (cram_recalibrated_intervals_gz_tbi, + dict, fasta, fasta_fai, intervals_bed_combine_gz) @@ -113,6 +114,7 @@ workflow GERMLINE_VARIANT_CALLING { // STRELKA if (params.tools.contains('strelka')){ RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, + dict, fasta, fasta_fai, intervals_bed_combine_gz) diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 225c2444cf..b10a70bcac 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -104,7 +104,7 @@ workflow PAIR_VARIANT_CALLING { } if (tools.contains('freebayes')){ - RUN_FREEBAYES_SOMATIC(cram_pair_intervals, fasta, fasta_fai, intervals_bed_combine_gz) + RUN_FREEBAYES_SOMATIC(cram_pair_intervals, dict, fasta, fasta_fai, intervals_bed_combine_gz) freebayes_vcf = RUN_FREEBAYES_SOMATIC.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions) @@ -112,6 +112,7 @@ workflow PAIR_VARIANT_CALLING { if (tools.contains('manta')) { RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi, + dict, fasta, fasta_fai, intervals_bed_combine_gz) @@ -148,6 +149,7 @@ workflow PAIR_VARIANT_CALLING { } RUN_STRELKA_SOMATIC(cram_pair_strelka, + dict, fasta, fasta_fai, intervals_bed_combine_gz) diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 338a2a32c5..7e49b905bd 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -105,7 +105,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { [meta, cram, crai, [], [], intervals] } - RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, fasta, fasta_fai, intervals_bed_combine_gz) + RUN_FREEBAYES(cram_recalibrated_intervals_freebayes, dict, fasta, fasta_fai, intervals_bed_combine_gz) freebayes_vcf = RUN_FREEBAYES.out.freebayes_vcf ch_versions = ch_versions.mix(RUN_FREEBAYES.out.versions) @@ -128,6 +128,7 @@ workflow TUMOR_ONLY_VARIANT_CALLING { if (tools.contains('manta')){ RUN_MANTA_TUMORONLY(cram_recalibrated_intervals_gz_tbi, + dict, fasta, fasta_fai, intervals_bed_combine_gz) @@ -137,10 +138,11 @@ workflow TUMOR_ONLY_VARIANT_CALLING { } if (tools.contains('strelka')) { - RUN_STRELKA_SINGLE( cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai, - intervals_bed_combine_gz) + RUN_STRELKA_SINGLE(cram_recalibrated_intervals_gz_tbi, + dict, + fasta, + fasta_fai, + intervals_bed_combine_gz) strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf index d95488dfc4..b0f30883bd 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf @@ -2,8 +2,7 @@ // Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls // -include { TABIX_BGZIP as BGZIP_VC_MUTECT2 } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../../modules/nf-core/modules/gatk4/calculatecontamination/main' include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL} from '../../../../modules/nf-core/modules/gatk4/gatherpileupsummaries/main' @@ -63,26 +62,23 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { }.set{ mutect2_f1r2_branch } //Only when using intervals - //Merge Mutect2 VCF - BGZIP_VC_MUTECT2(mutect2_vcf_branch.intervals) - - CONCAT_MUTECT2( - BGZIP_VC_MUTECT2.out.output + MERGE_MUTECT2( + mutect2_vcf_branch.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fai, - intervals_bed_combine_gz) + dict + ) mutect2_vcf = Channel.empty().mix( - CONCAT_MUTECT2.out.vcf, + MERGE_MUTECT2.out.vcf, mutect2_vcf_branch.no_intervals) mutect2_tbi = Channel.empty().mix( - CONCAT_MUTECT2.out.tbi, + MERGE_MUTECT2.out.tbi, mutect2_tbi_branch.no_intervals) //Merge Muteect2 Stats @@ -202,8 +198,7 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) - ch_versions = ch_versions.mix(BGZIP_VC_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_MUTECT2.out.versions) + ch_versions = ch_versions.mix(MERGE_MUTECT2.out.versions) ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions) diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index 5f07d2e147..be311a4094 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -2,8 +2,7 @@ // Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls // -include { TABIX_BGZIP as BGZIP_VC_MUTECT2 } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MUTECT2 } from '../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../../modules/nf-core/modules/gatk4/calculatecontamination/main' include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' @@ -62,24 +61,22 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { //Only when using intervals //Merge Mutect2 VCF - BGZIP_VC_MUTECT2(mutect2_vcf_branch.intervals) - CONCAT_MUTECT2( - BGZIP_VC_MUTECT2.out.output + MERGE_MUTECT2( + mutect2_vcf_branch.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fai, - intervals_bed_combine_gz) + dict) mutect2_vcf = Channel.empty().mix( - CONCAT_MUTECT2.out.vcf, + MERGE_MUTECT2.out.vcf, mutect2_vcf_branch.no_intervals) mutect2_tbi = Channel.empty().mix( - CONCAT_MUTECT2.out.tbi, + MERGE_MUTECT2.out.tbi, mutect2_tbi_branch.no_intervals) //Merge Mutect2 Stats @@ -150,8 +147,7 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) - ch_versions = ch_versions.mix(BGZIP_VC_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_MUTECT2.out.versions) + ch_versions = ch_versions.mix(MERGE_MUTECT2.out.versions) ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) ch_versions = ch_versions.mix(GETPILEUPSUMMARIES.out.versions) diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf index 0929ea7eca..9e799c2a40 100644 --- a/subworkflows/nf-core/variantcalling/deepvariant/main.nf +++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf @@ -1,16 +1,15 @@ -include { TABIX_BGZIP as BGZIP_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_GVCF } from '../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_DEEPVARIANT_VCF } from '../../../../modules/local/concat_vcf/main' -include { DEEPVARIANT } from '../../../../modules/nf-core/modules/deepvariant/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { GATK4_MERGEVCFS as MERGE_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { DEEPVARIANT } from '../../../../modules/nf-core/modules/deepvariant/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_GVCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { TABIX_TABIX as TABIX_VC_DEEPVARIANT_VCF } from '../../../../modules/nf-core/modules/tabix/tabix/main' //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine // Deepvariant: https://github.com/google/deepvariant/issues/510 workflow RUN_DEEPVARIANT { take: cram // channel: [mandatory] [meta, cram, crai, interval] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -36,45 +35,39 @@ workflow RUN_DEEPVARIANT { TABIX_VC_DEEPVARIANT_GVCF(deepvariant_gvcf_out.no_intervals) // Only when using intervals - BGZIP_VC_DEEPVARIANT_VCF(deepvariant_vcf_out.intervals) - BGZIP_VC_DEEPVARIANT_GVCF(deepvariant_gvcf_out.intervals) - CONCAT_DEEPVARIANT_VCF( - BGZIP_VC_DEEPVARIANT_VCF.out.output + MERGE_DEEPVARIANT_VCF( + deepvariant_vcf_out.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) - CONCAT_DEEPVARIANT_GVCF( - BGZIP_VC_DEEPVARIANT_GVCF.out.output + MERGE_DEEPVARIANT_GVCF( + deepvariant_gvcf_out.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results deepvariant_vcf = Channel.empty().mix( - CONCAT_DEEPVARIANT_GVCF.out.vcf, - CONCAT_DEEPVARIANT_VCF.out.vcf, + MERGE_DEEPVARIANT_GVCF.out.vcf, + MERGE_DEEPVARIANT_VCF.out.vcf, deepvariant_gvcf_out.no_intervals, deepvariant_vcf_out.no_intervals) .map{ meta, vcf -> [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Deepvariant"], vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_VCF.out.versions) - ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_GVCF.out.versions) - ch_versions = ch_versions.mix(CONCAT_DEEPVARIANT_VCF.out.versions) + ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_GVCF.out.versions) + ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_VCF.out.versions) ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_GVCF.out.versions) ch_versions = ch_versions.mix(TABIX_VC_DEEPVARIANT_VCF.out.versions) diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf index 2e2eb1f96c..d4d36da41f 100644 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -1,12 +1,12 @@ -include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' -include { TABIX_BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' +include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' +include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/tabix/main' workflow RUN_FREEBAYES { take: cram // channel: [mandatory] [meta, cram, crai, [], [], interval] + dict fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -21,20 +21,18 @@ workflow RUN_FREEBAYES { fasta_fai, [], [], []) - FREEBAYES.out.vcf.branch{ + BCFTOOLS_SORT(FREEBAYES.out.vcf) + BCFTOOLS_SORT.out.vcf.branch{ intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 - }.set{freebayes_vcf_out} + }.set{bcftools_vcf_out} // Only when no intervals - BCFTOOLS_SORT(freebayes_vcf_out.no_intervals) - TABIX_VC_FREEBAYES(BCFTOOLS_SORT.out.vcf) + TABIX_VC_FREEBAYES(bcftools_vcf_out.no_intervals) // Only when using intervals - BGZIP_VC_FREEBAYES(freebayes_vcf_out.intervals) - - CONCAT_FREEBAYES( - BGZIP_VC_FREEBAYES.out.output + MERGE_FREEBAYES( + bcftools_vcf_out.intervals .map{ meta, vcf -> new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample @@ -43,13 +41,13 @@ workflow RUN_FREEBAYES { : [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict + ) // Mix output channels for "no intervals" and "with intervals" results freebayes_vcf = Channel.empty().mix( - CONCAT_FREEBAYES.out.vcf, - freebayes_vcf_out.no_intervals) + MERGE_FREEBAYES.out.vcf, + bcftools_vcf_out.no_intervals) .map{ meta, vcf -> new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample @@ -60,8 +58,7 @@ workflow RUN_FREEBAYES { } ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) + ch_versions = ch_versions.mix(MERGE_FREEBAYES.out.versions) ch_versions = ch_versions.mix(FREEBAYES.out.versions) ch_versions = ch_versions.mix(TABIX_VC_FREEBAYES.out.versions) diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf index fe7e24fbbf..69fcbdb3a3 100644 --- a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf +++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf @@ -1,5 +1,4 @@ -include { TABIX_BGZIP as BGZIP_VC_HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../../../modules/local/concat_vcf/main' +include { GATK4_MERGEVCFS as MERGE_HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/gatk4/mergevcfs/main' include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../../modules/nf-core/modules/gatk4/haplotypecaller/main' include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../../../subworkflows/nf-core/gatk4/joint_germline_variant_calling/main' @@ -39,25 +38,22 @@ workflow RUN_HAPLOTYPECALLER { }.set{haplotypecaller_tbi_branch} // Only when using intervals - BGZIP_VC_HAPLOTYPECALLER(haplotypecaller_vcf_branch.intervals) - - CONCAT_HAPLOTYPECALLER( - BGZIP_VC_HAPLOTYPECALLER.out.output + MERGE_HAPLOTYPECALLER( + haplotypecaller_vcf_branch.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, new_meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) haplotypecaller_vcf = Channel.empty().mix( - CONCAT_HAPLOTYPECALLER.out.vcf, + MERGE_HAPLOTYPECALLER.out.vcf, haplotypecaller_vcf_branch.no_intervals) haplotypecaller_tbi = Channel.empty().mix( - CONCAT_HAPLOTYPECALLER.out.tbi, + MERGE_HAPLOTYPECALLER.out.tbi, haplotypecaller_vcf_branch.no_intervals) // genotype_gvcf_to_call = haplotypecaller_gvcf.join(haplotypecaller_gvcf_tbi) @@ -112,8 +108,7 @@ workflow RUN_HAPLOTYPECALLER { } - ch_versions = ch_versions.mix(BGZIP_VC_HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(CONCAT_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(MERGE_HAPLOTYPECALLER.out.versions) //ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) //ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf index 4d71dc1250..0c8bd8962b 100644 --- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -1,16 +1,14 @@ -include { TABIX_BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' -include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { MANTA_GERMLINE } from '../../../../../modules/nf-core/modules/manta/germline/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too workflow RUN_MANTA_GERMLINE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -38,50 +36,41 @@ workflow RUN_MANTA_GERMLINE { }.set{manta_diploid_sv_vcf} // Only when using intervals - BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) - - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.output + MERGE_MANTA_SMALL_INDELS( + manta_small_indels_vcf.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_SV(manta_sv_vcf.intervals) + dict) - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.output + MERGE_MANTA_SV( + manta_sv_vcf.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [ groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) + dict) - CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.output + MERGE_MANTA_DIPLOID( + manta_diploid_sv_vcf.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( - CONCAT_MANTA_DIPLOID.out.vcf, - //CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_SV.out.vcf, + MERGE_MANTA_DIPLOID.out.vcf, + //MERGE_MANTA_SMALL_INDELS.out.vcf, + MERGE_MANTA_SV.out.vcf, manta_diploid_sv_vcf.no_intervals, //manta_small_indels_vcf.no_intervals, manta_sv_vcf.no_intervals) @@ -89,12 +78,9 @@ workflow RUN_MANTA_GERMLINE { [ [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Manta"], vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) emit: diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf index c3d108fc7a..036cab3d48 100644 --- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf @@ -1,16 +1,13 @@ -include { TABIX_BGZIP as BGZIP_VC_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MANTA_DIPLOID } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SOMATIC } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' -include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_DIPLOID } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { MANTA_SOMATIC } from '../../../../../modules/nf-core/modules/manta/somatic/main' workflow RUN_MANTA_SOMATIC { take: cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, interval.bed.gz, interval.bed.gz.tbi] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -48,57 +45,45 @@ workflow RUN_MANTA_SOMATIC { }.set{manta_somatic_sv_vcf} //Only when using intervals - BGZIP_VC_MANTA_SV(manta_candidate_small_indels_vcf.intervals) - - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf -> + MERGE_MANTA_SV( + manta_candidate_small_indels_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [ groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_SMALL_INDELS(manta_candidate_sv_vcf.intervals) + dict) - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf -> + MERGE_MANTA_SMALL_INDELS( + manta_candidate_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) - BGZIP_VC_MANTA_DIPLOID(manta_diploid_sv_vcf.intervals) - - CONCAT_MANTA_DIPLOID( - BGZIP_VC_MANTA_DIPLOID.out.output.map{ meta, vcf -> + MERGE_MANTA_DIPLOID( + manta_diploid_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_SOMATIC(manta_somatic_sv_vcf.intervals) + dict) - CONCAT_MANTA_SOMATIC( - BGZIP_VC_MANTA_SOMATIC.out.output.map{ meta, vcf -> + MERGE_MANTA_SOMATIC( + manta_somatic_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( - //CONCAT_MANTA_SV.out.vcf, - //CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_DIPLOID.out.vcf, - CONCAT_MANTA_SOMATIC.out.vcf, + //MERGE_MANTA_SV.out.vcf, + //MERGE_MANTA_SMALL_INDELS.out.vcf, + MERGE_MANTA_DIPLOID.out.vcf, + MERGE_MANTA_SOMATIC.out.vcf, //manta_candidate_sv_vcf.no_intervals, //manta_candidate_small_indels_vcf.no_intervals, manta_diploid_sv_vcf.no_intervals, @@ -109,7 +94,7 @@ workflow RUN_MANTA_SOMATIC { } manta_candidate_small_indels_vcf = Channel.empty().mix( - CONCAT_MANTA_SMALL_INDELS.out.vcf, + MERGE_MANTA_SMALL_INDELS.out.vcf, manta_candidate_small_indels_vcf.no_intervals ).map{ meta, vcf -> [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"], @@ -117,21 +102,17 @@ workflow RUN_MANTA_SOMATIC { } manta_candidate_small_indels_vcf_tbi = Channel.empty().mix( - CONCAT_MANTA_SMALL_INDELS.out.tbi, + MERGE_MANTA_SMALL_INDELS.out.tbi, manta_candidate_small_indels_vcf_tbi.no_intervals ).map{ meta, vcf -> [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"], vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SOMATIC.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SOMATIC.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_DIPLOID.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SOMATIC.out.versions) ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) emit: diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf index b0c1bfc807..658615a21a 100644 --- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf @@ -1,16 +1,14 @@ -include { TABIX_BGZIP as BGZIP_VC_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_SV } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_MANTA_TUMOR } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_MANTA_SMALL_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_SV } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_MANTA_TUMOR } from '../../../../../modules/local/concat_vcf/main' -include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SMALL_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_SV } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_MANTA_TUMOR } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { MANTA_TUMORONLY } from '../../../../../modules/nf-core/modules/manta/tumoronly/main' // TODO: Research if splitting by intervals is ok, we pretend for now it is fine. // Seems to be the consensus on upstream modules implementation too workflow RUN_MANTA_TUMORONLY { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -38,47 +36,38 @@ workflow RUN_MANTA_TUMORONLY { }.set{manta_tumor_sv_vcf} //Only when using intervals - BGZIP_VC_MANTA_SMALL_INDELS(manta_small_indels_vcf.intervals) - - CONCAT_MANTA_SMALL_INDELS( - BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf -> + MERGE_MANTA_SMALL_INDELS( + manta_small_indels_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_SV(manta_candidate_sv_vcf.intervals) + dict) - CONCAT_MANTA_SV( - BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf -> + MERGE_MANTA_SV( + manta_candidate_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_MANTA_TUMOR(manta_tumor_sv_vcf.intervals) + dict) - CONCAT_MANTA_TUMOR( - BGZIP_VC_MANTA_TUMOR.out.output.map{ meta, vcf -> + MERGE_MANTA_TUMOR( + manta_tumor_sv_vcf.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( - CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_SV.out.vcf, - CONCAT_MANTA_TUMOR.out.vcf, + MERGE_MANTA_SMALL_INDELS.out.vcf, + MERGE_MANTA_SV.out.vcf, + MERGE_MANTA_TUMOR.out.vcf, manta_small_indels_vcf.no_intervals, manta_candidate_sv_vcf.no_intervals, manta_tumor_sv_vcf.no_intervals @@ -87,12 +76,9 @@ workflow RUN_MANTA_TUMORONLY { vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_MANTA_TUMOR.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_MANTA_TUMOR.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) + ch_versions = ch_versions.mix(MERGE_MANTA_TUMOR.out.versions) ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) emit: diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index 4d06d8a3a8..0536905ade 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -1,12 +1,11 @@ -include { TABIX_BGZIP as BGZIP_VC_STRELKA } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_STRELKA_GENOME } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_STRELKA } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_GENOME } from '../../../../../modules/local/concat_vcf/main' -include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../../../modules/nf-core/modules/strelka/germline/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../../../modules/nf-core/modules/strelka/germline/main' workflow RUN_STRELKA_SINGLE { take: cram // channel: [mandatory] [meta, cram, crai, interval.bed.gz, interval.bed.gz.tbi] + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -28,47 +27,37 @@ workflow RUN_STRELKA_SINGLE { no_intervals: it[0].num_intervals <= 1 }.set{strelka_genome_vcf} - // Only when using intervals - BGZIP_VC_STRELKA(strelka_vcf.intervals) - - CONCAT_STRELKA( - BGZIP_VC_STRELKA.out.output + MERGE_STRELKA( + strelka_vcf.intervals .map{ meta, vcf -> - new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_STRELKA_GENOME(strelka_genome_vcf.intervals) + dict + ) - CONCAT_STRELKA_GENOME( - BGZIP_VC_STRELKA_GENOME.out.output + MERGE_STRELKA_GENOME( + strelka_genome_vcf.intervals .map{ meta, vcf -> new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict + ) // Mix output channels for "no intervals" and "with intervals" results strelka_vcf = Channel.empty().mix( - CONCAT_STRELKA.out.vcf, - //CONCAT_STRELKA_GENOME.out.vcf, - //strelka_genome_vcf.no_intervals, + MERGE_STRELKA.out.vcf, strelka_vcf.no_intervals) .map{ meta, vcf -> [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Strelka"], vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_GENOME.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_GENOME.out.versions) + ch_versions = ch_versions.mix(MERGE_STRELKA.out.versions) + ch_versions = ch_versions.mix(MERGE_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) emit: diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf index e34117bb16..ff01cdb584 100644 --- a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf @@ -1,12 +1,11 @@ -include { TABIX_BGZIP as BGZIP_VC_STRELKA_INDELS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { TABIX_BGZIP as BGZIP_VC_STRELKA_SNVS } from '../../../../../modules/nf-core/modules/tabix/bgzip/main' -include { CONCAT_VCF as CONCAT_STRELKA_INDELS } from '../../../../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_STRELKA_SNVS } from '../../../../../modules/local/concat_vcf/main' -include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_INDELS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_SNVS } from '../../../../../modules/nf-core/modules/gatk4/mergevcfs/main' +include { STRELKA_SOMATIC } from '../../../../../modules/nf-core/modules/strelka/somatic/main' workflow RUN_STRELKA_SOMATIC { take: cram // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, interval.bed.gz, interval.bed.gz.tbi] manta* are optional + dict // channel: [optional] fasta // channel: [mandatory] fasta_fai // channel: [mandatory] intervals_bed_gz // channel: [optional] Contains a bed.gz file of all intervals combined provided with the cram input(s). Mandatory if interval files are used. @@ -29,30 +28,24 @@ workflow RUN_STRELKA_SOMATIC { }.set{strelka_vcf_indels} // Only when using intervals - BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals) - - CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.output.map{ meta, vcf -> + MERGE_STRELKA_SNVS(strelka_vcf_snvs.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, new_meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) - - BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals) + dict) - CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.output.map{ meta, vcf -> + MERGE_STRELKA_INDELS(strelka_vcf_indels.intervals.map{ meta, vcf -> new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [groupKey(new_meta, new_meta.num_intervals), vcf] }.groupTuple(), - fasta_fai, - intervals_bed_gz) + dict) // Mix output channels for "no intervals" and "with intervals" results strelka_vcf = Channel.empty().mix( - CONCAT_STRELKA_SNVS.out.vcf, - CONCAT_STRELKA_INDELS.out.vcf, + MERGE_STRELKA_SNVS.out.vcf, + MERGE_STRELKA_INDELS.out.vcf, strelka_vcf_snvs.no_intervals, strelka_vcf_indels.no_intervals) .map{ meta, vcf -> @@ -60,10 +53,8 @@ workflow RUN_STRELKA_SOMATIC { , vcf] } - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(CONCAT_STRELKA_INDELS.out.versions) + ch_versions = ch_versions.mix(MERGE_STRELKA_SNVS.out.versions) + ch_versions = ch_versions.mix(MERGE_STRELKA_INDELS.out.versions) ch_versions = ch_versions.mix(STRELKA_SOMATIC.out.versions) emit: