From 91bbf3da360328e6b1533adbfc991b5a30556b02 Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 18 Jul 2022 21:40:24 +0200 Subject: [PATCH 01/18] own subworkflow for cnvkit references --- modules/local/build_intervals/main.nf | 8 ++++- modules/local/create_intervals_bed/main.nf | 19 +++++++++-- .../local/prepare_cnvkit_reference.nf | 33 +++++++++++++++++++ subworkflows/local/prepare_genome.nf | 10 ------ subworkflows/local/prepare_intervals.nf | 21 ++++++++---- workflows/sarek.nf | 29 +++++++++------- 6 files changed, 89 insertions(+), 31 deletions(-) create mode 100644 subworkflows/local/prepare_cnvkit_reference.nf diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index 38fbb36ee..0f1941d42 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -11,7 +11,8 @@ process BUILD_INTERVALS { path fasta_fai output: - path "*.bed", emit: bed + path("*.bed") , emit: bed + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -19,5 +20,10 @@ process BUILD_INTERVALS { script: """ awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") + END_VERSIONS """ } diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index 08bb6eb5e..559bd6371 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -11,8 +11,8 @@ process CREATE_INTERVALS_BED { path intervals output: - path ("*.bed"), emit: bed - //TODO version number missing + path "*.bed" , emit: bed + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -40,6 +40,11 @@ process CREATE_INTERVALS_BED { chunk += t print \$0 > name }' ${intervals} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") + END_VERSIONS """ else if (intervals.toString().toLowerCase().endsWith("interval_list")) """ @@ -47,6 +52,11 @@ process CREATE_INTERVALS_BED { name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") + END_VERSIONS """ else """ @@ -54,5 +64,10 @@ process CREATE_INTERVALS_BED { name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' ${intervals} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") + END_VERSIONS """ } diff --git a/subworkflows/local/prepare_cnvkit_reference.nf b/subworkflows/local/prepare_cnvkit_reference.nf new file mode 100644 index 000000000..67042c23e --- /dev/null +++ b/subworkflows/local/prepare_cnvkit_reference.nf @@ -0,0 +1,33 @@ +// +// PREPARE CNVKIT REFERENCE +// + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CNVKIT_ANTITARGET } from '../../modules/nf-core/modules/cnvkit/antitarget/main' +include { CNVKIT_REFERENCE } from '../../modules/nf-core/modules/cnvkit/reference/main' + +workflow PREPARE_CNVKIT_REFERENCE { + take: + fasta // channel: [mandatory] fasta + intervals_bed_combined // channel: [] + + main: + + ch_versions = Channel.empty() + + // prepare a antitarget reference files for tumor_only mode of cnvkit + CNVKIT_ANTITARGET(intervals_bed_combined.flatten().map{ it -> [[id:it[0].baseName], it] }) + CNVKIT_REFERENCE(fasta, intervals_bed_combined, CNVKIT_ANTITARGET.out.bed.map{ meta, bed -> [bed]} ) + + ch_versions = ch_versions.mix(CNVKIT_ANTITARGET.out.versions) + ch_versions = ch_versions.mix(CNVKIT_REFERENCE.out.versions) + + emit: + versions = ch_versions + cnvkit_reference = CNVKIT_REFERENCE.out.cnn +} + + diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index b6287b6ab..f01e0c221 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -10,8 +10,6 @@ include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/modules/bwa/index/main' include { BWAMEM2_INDEX } from '../../modules/nf-core/modules/bwamem2/index/main' -include {CNVKIT_ANTITARGET } from '../../modules/nf-core/modules/cnvkit/antitarget/main' -include {CNVKIT_REFERENCE } from '../../modules/nf-core/modules/cnvkit/reference/main' include { DRAGMAP_HASHTABLE } from '../../modules/nf-core/modules/dragmap/hashtable/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/modules/gatk4/createsequencedictionary/main' include { MSISENSORPRO_SCAN } from '../../modules/nf-core/modules/msisensorpro/scan/main' @@ -37,7 +35,6 @@ workflow PREPARE_GENOME { fasta // channel: [mandatory] fasta fasta_fai // channel: [optional] fasta_fai germline_resource // channel: [optional] germline_resource - intervals_bed_combined // channel: [] known_indels // channel: [optional] known_indels pon // channel: [optional] pon @@ -63,10 +60,6 @@ workflow PREPARE_GENOME { TABIX_KNOWN_INDELS( known_indels.flatten().map{ it -> [[id:it.baseName], it] } ) TABIX_PON(pon.flatten().map{ it -> [[id:it.baseName], it] }) - // prepare a reference for tumor_only mode based on target_baits - CNVKIT_ANTITARGET(intervals_bed_combined.flatten().map{ it -> [[id:it[0].baseName], it] }) - CNVKIT_REFERENCE(fasta, intervals_bed_combined, CNVKIT_ANTITARGET.out.bed.map{ meta, bed -> [bed]} ) - // prepare ascat reference files allele_files = ascat_alleles if (params.ascat_alleles && params.ascat_alleles.endsWith('.zip')) { @@ -106,8 +99,6 @@ workflow PREPARE_GENOME { ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) ch_versions = ch_versions.mix(BWAMEM1_INDEX.out.versions) ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) - ch_versions = ch_versions.mix(CNVKIT_ANTITARGET.out.versions) - ch_versions = ch_versions.mix(CNVKIT_REFERENCE.out.versions) ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) ch_versions = ch_versions.mix(MSISENSORPRO_SCAN.out.versions) ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) @@ -127,7 +118,6 @@ workflow PREPARE_GENOME { msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> [list] } // path: genome_msi.list pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: pon.vcf.gz.tbi chr_files = chr_files - cnvkit_reference = CNVKIT_REFERENCE.out.cnn allele_files = allele_files loci_files = loci_files gc_file = gc_file diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 2da68f065..72d131e96 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -7,6 +7,8 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run include { BUILD_INTERVALS } from '../../modules/local/build_intervals/main' +include {CNVKIT_ANTITARGET } from '../../modules/nf-core/modules/cnvkit/antitarget/main' +include {CNVKIT_REFERENCE } from '../../modules/nf-core/modules/cnvkit/reference/main' include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed/main' include { GATK4_INTERVALLISTTOBED } from '../../modules/nf-core/modules/gatk4/intervallisttobed/main' include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../modules/nf-core/modules/tabix/bgziptabix/main' @@ -43,15 +45,20 @@ workflow PREPARE_INTERVALS { if (!params.intervals) { BUILD_INTERVALS(fasta_fai) - ch_intervals_combined = BUILD_INTERVALS.out.bed.map{it -> [[id:it.simpleName], it] } + ch_intervals_combined = BUILD_INTERVALS.out.bed - ch_intervals = CREATE_INTERVALS_BED(ch_intervals_combined) + CREATE_INTERVALS_BED(ch_intervals_combined) + ch_intervals = CREATE_INTERVALS_BED.out.bed + + ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) } else { ch_intervals_combined = Channel.fromPath(file(params.intervals)).map{it -> [[id:it.baseName], it] } ch_intervals = CREATE_INTERVALS_BED(file(params.intervals)) + ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) + //If interval file is not provided as .bed, but e.g. as .interval_list then convert to BED format if(!params.intervals.endsWith(".bed")) { GATK4_INTERVALLISTTOBED(ch_intervals_combined) @@ -91,15 +98,15 @@ workflow PREPARE_INTERVALS { TABIX_BGZIPTABIX_INTERVAL_SPLIT(tabix_in) ch_intervals_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.gz_tbi.map{ meta, bed, tbi -> [bed, tbi ]}.toList().map{ it -> - [it, it.size()] // Adding number of intervals as elements + [it, it.size()] // Adding number ofq }.transpose() ch_versions = ch_versions.mix(TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.versions) } emit: - intervals_bed = ch_intervals // path: intervals.bed, num_intervals [intervals split for parallel execution] - intervals_bed_gz_tbi = ch_intervals_bed_gz_tbi // path: target.bed.gz, target.bed.gz.tbi, num_intervals [intervals split for parallel execution] - intervals_bed_combined = ch_intervals_combined.map{meta, bed -> bed }.collect() // path: intervals.bed [all intervals in one file] - versions = ch_versions // channel: [ versions.yml ] + intervals_bed = ch_intervals // path: intervals.bed, num_intervals [intervals split for parallel execution] + intervals_bed_gz_tbi = ch_intervals_bed_gz_tbi // path: target.bed.gz, target.bed.gz.tbi, num_intervals [intervals split for parallel execution] + intervals_bed_combined = ch_intervals_combined.map{meta, bed -> bed }.collect() // path: intervals.bed [all intervals in one file] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index b915f192c..dea662daf 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -201,6 +201,9 @@ include { PREPARE_GENOME } from '../subwor // Build intervals if needed include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals' +// Build CNVkit reference if needed +include { PREPARE_CNVKIT_REFERENCE } from '../subworkflows/local/prepare_cnvkit_reference' + // Convert BAM files to FASTQ files include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_INPUT } from '../subworkflows/nf-core/alignment_to_fastq' include { ALIGNMENT_TO_FASTQ as ALIGNMENT_TO_FASTQ_UMI } from '../subworkflows/nf-core/alignment_to_fastq' @@ -296,15 +299,6 @@ workflow SAREK { // To gather used softwares versions for MultiQC ch_versions = Channel.empty() - // Build intervals if needed - PREPARE_INTERVALS(fasta_fai) - - // Intervals for speed up preprocessing/variant calling by spread/gather - intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined // [interval.bed] all intervals in one file - intervals_for_preprocessing = params.wes ? intervals_bed_combined : [] // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) - - intervals = PREPARE_INTERVALS.out.intervals_bed // [interval, num_intervals] multiple interval.bed files, divided by useful intervals for scatter/gather - intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [interval_bed, tbi, num_intervals] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather // Build indices if needed PREPARE_GENOME( @@ -317,7 +311,6 @@ workflow SAREK { fasta, fasta_fai, germline_resource, - intervals_bed_combined, known_indels, pon) @@ -326,7 +319,6 @@ workflow SAREK { bwa = params.fasta ? params.bwa ? Channel.fromPath(params.bwa).collect() : PREPARE_GENOME.out.bwa : [] bwamem2 = params.fasta ? params.bwamem2 ? Channel.fromPath(params.bwamem2).collect() : PREPARE_GENOME.out.bwamem2 : [] chr_files = PREPARE_GENOME.out.chr_files - cnvkit_reference = params.tools && params.tools.split(',').contains('cnvkit') ? PREPARE_GENOME.out.cnvkit_reference : Channel.empty() dragmap = params.fasta ? params.dragmap ? Channel.fromPath(params.dragmap).collect() : PREPARE_GENOME.out.hashtable : [] dict = params.fasta ? params.dict ? Channel.fromPath(params.dict).collect() : PREPARE_GENOME.out.dict : [] fasta_fai = params.fasta ? params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : PREPARE_GENOME.out.fasta_fai : [] @@ -349,10 +341,25 @@ workflow SAREK { known_sites = dbsnp.concat(known_indels).collect() known_sites_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() + // Build intervals if needed + PREPARE_INTERVALS(fasta_fai) + + // Intervals for speed up preprocessing/variant calling by spread/gather + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined // [interval.bed] all intervals in one file + intervals_for_preprocessing = params.wes ? intervals_bed_combined : [] // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) + + intervals = PREPARE_INTERVALS.out.intervals_bed // [interval, num_intervals] multiple interval.bed files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi // [interval_bed, tbi, num_intervals] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + // Gather used softwares versions ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) ch_versions = ch_versions.mix(PREPARE_INTERVALS.out.versions) + // Antitarget based reference for CNVKit + PREPARE_CNVKIT_REFERENCE(fasta, intervals_bed_combined) + cnvkit_reference = params.tools && params.tools.split(',').contains('cnvkit') ? PREPARE_INTERVALS.out.cnvkit_reference : Channel.empty() + ch_versions = ch_versions.mix(PREPARE_CNVKIT_REFERENCE.out.versions) + // PREPROCESSING if (params.step == 'mapping') { From 46cc19cebdf07edfedd225d6857d74c2b6c0afc0 Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 18 Jul 2022 21:45:48 +0200 Subject: [PATCH 02/18] add comment back in --- subworkflows/local/prepare_intervals.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 72d131e96..8f13663be 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -98,7 +98,7 @@ workflow PREPARE_INTERVALS { TABIX_BGZIPTABIX_INTERVAL_SPLIT(tabix_in) ch_intervals_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.gz_tbi.map{ meta, bed, tbi -> [bed, tbi ]}.toList().map{ it -> - [it, it.size()] // Adding number ofq + [it, it.size()] // Adding number of intervals as elements }.transpose() ch_versions = ch_versions.mix(TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.versions) From fd48c5853d9e77a923f70f69645f3a3659807783 Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 18 Jul 2022 22:55:25 +0200 Subject: [PATCH 03/18] why does this work --- subworkflows/local/prepare_intervals.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 8f13663be..c16be991a 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -47,7 +47,9 @@ workflow PREPARE_INTERVALS { BUILD_INTERVALS(fasta_fai) ch_intervals_combined = BUILD_INTERVALS.out.bed - CREATE_INTERVALS_BED(ch_intervals_combined) + ch_intervals_combined.view() + + CREATE_INTERVALS_BED(ch_intervals_combined.map{v,p -> [p]}) ch_intervals = CREATE_INTERVALS_BED.out.bed ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) From 2e651823dfe8c8b3a1c43e113403718d9ccb3c3b Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 00:04:25 +0200 Subject: [PATCH 04/18] this works aside from versions parsing --- subworkflows/local/prepare_intervals.nf | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index c16be991a..f662873b1 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -44,20 +44,21 @@ workflow PREPARE_INTERVALS { //If no interval/target file is provided, then intervals are generated from FASTA file if (!params.intervals) { - BUILD_INTERVALS(fasta_fai) + BUILD_INTERVALS(fasta_fai.map{ it -> [[id:it[0].name], it] }.view()) ch_intervals_combined = BUILD_INTERVALS.out.bed - ch_intervals_combined.view() + CREATE_INTERVALS_BED(ch_intervals_combined) + ch_intervals = CREATE_INTERVALS_BED.out.bed.map{meta, intervals -> intervals} - CREATE_INTERVALS_BED(ch_intervals_combined.map{v,p -> [p]}) - ch_intervals = CREATE_INTERVALS_BED.out.bed - - ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) + //ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) + ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) } else { ch_intervals_combined = Channel.fromPath(file(params.intervals)).map{it -> [[id:it.baseName], it] } - ch_intervals = CREATE_INTERVALS_BED(file(params.intervals)) + + CREATE_INTERVALS_BED(ch_intervals_combined) + ch_intervals = CREATE_INTERVALS_BED.out.bed.map{meta, intervals -> intervals} ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) From 94114dae80e4d7a109a644c6d983d5df6257b9b0 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 00:05:49 +0200 Subject: [PATCH 05/18] try with only one version --- modules/local/build_intervals/main.nf | 14 ++++---------- modules/local/create_intervals_bed/main.nf | 9 ++++----- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index 0f1941d42..c83f660c0 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -1,6 +1,5 @@ process BUILD_INTERVALS { - tag "$fasta_fai" - label 'process_medium' + tag "$meta.id" conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -8,11 +7,11 @@ process BUILD_INTERVALS { 'quay.io/biocontainers/gawk:5.1.0' }" input: - path fasta_fai + tuple val(meta), path(fasta_fai) output: - path("*.bed") , emit: bed - path "versions.yml" , emit: versions + tuple val(meta), path("*.bed") , emit: bed + //path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,10 +19,5 @@ process BUILD_INTERVALS { script: """ awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") - END_VERSIONS """ } diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index 559bd6371..ee269cfc4 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -1,6 +1,5 @@ process CREATE_INTERVALS_BED { - tag "$intervals" - label 'process_medium' + tag "$meta.id" conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -8,11 +7,11 @@ process CREATE_INTERVALS_BED { 'quay.io/biocontainers/gawk:5.1.0' }" input: - path intervals + tuple val(meta), path(intervals) output: - path "*.bed" , emit: bed - path "versions.yml" , emit: versions + tuple val(meta), path("*.bed") , emit: bed + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when From 0b554f93b83cff09462e857ef386579aca7975f4 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 00:20:26 +0200 Subject: [PATCH 06/18] no versions --- modules/local/create_intervals_bed/main.nf | 17 +---------------- subworkflows/local/prepare_intervals.nf | 4 ++-- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index ee269cfc4..f4a49f710 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -11,7 +11,7 @@ process CREATE_INTERVALS_BED { output: tuple val(meta), path("*.bed") , emit: bed - path "versions.yml" , emit: versions + //path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -39,11 +39,6 @@ process CREATE_INTERVALS_BED { chunk += t print \$0 > name }' ${intervals} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") - END_VERSIONS """ else if (intervals.toString().toLowerCase().endsWith("interval_list")) """ @@ -51,11 +46,6 @@ process CREATE_INTERVALS_BED { name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") - END_VERSIONS """ else """ @@ -63,10 +53,5 @@ process CREATE_INTERVALS_BED { name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' ${intervals} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") - END_VERSIONS """ } diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index f662873b1..6ab98cad5 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -51,7 +51,7 @@ workflow PREPARE_INTERVALS { ch_intervals = CREATE_INTERVALS_BED.out.bed.map{meta, intervals -> intervals} //ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) - ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) + //ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) } else { @@ -60,7 +60,7 @@ workflow PREPARE_INTERVALS { CREATE_INTERVALS_BED(ch_intervals_combined) ch_intervals = CREATE_INTERVALS_BED.out.bed.map{meta, intervals -> intervals} - ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) + //ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) //If interval file is not provided as .bed, but e.g. as .interval_list then convert to BED format if(!params.intervals.endsWith(".bed")) { From fcc7137bce0f733f17d54816e166b389642a301c Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 09:32:12 +0200 Subject: [PATCH 07/18] versioning makes it fail --- modules/local/build_intervals/main.nf | 7 ++++++- subworkflows/local/prepare_intervals.nf | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index c83f660c0..c96c48b1f 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -11,7 +11,7 @@ process BUILD_INTERVALS { output: tuple val(meta), path("*.bed") , emit: bed - //path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -19,5 +19,10 @@ process BUILD_INTERVALS { script: """ awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") + END_VERSIONS """ } diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 6ab98cad5..d3e894775 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -50,7 +50,7 @@ workflow PREPARE_INTERVALS { CREATE_INTERVALS_BED(ch_intervals_combined) ch_intervals = CREATE_INTERVALS_BED.out.bed.map{meta, intervals -> intervals} - //ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) + ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) //ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) } else { From b668ca2e8f93eda6e39feedb36ed1314d8fdb7c6 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 14:13:43 +0200 Subject: [PATCH 08/18] this works now but without versions --- modules/local/build_intervals/main.nf | 17 +++++++-------- modules/local/create_intervals_bed/main.nf | 14 ++++++------- subworkflows/local/prepare_intervals.nf | 24 +++++++++------------- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index c96c48b1f..83006cc1b 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -1,28 +1,25 @@ process BUILD_INTERVALS { tag "$meta.id" - conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) + conda (params.enable_conda ? "bioconda::gawk=4.1.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'quay.io/biocontainers/gawk:5.1.0' }" + 'https://depot.galaxyproject.org/singularity/gawk:4.1.3--0': + 'quay.io/biocontainers/gawk:4.1.3--1' }" input: tuple val(meta), path(fasta_fai) output: - tuple val(meta), path("*.bed") , emit: bed - path "versions.yml" , emit: versions + tuple val(meta), path("${fasta_fai.baseName}.bed") , emit: bed + //path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' + """ awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - awk: \$(awk -Wversion 2>/dev/null | head -n 1 | awk '{split(\$0,a,","); print a[1];}' | egrep -o "([0-9]{1,}\\.)+[0-9]{1,}") - END_VERSIONS """ } diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index f4a49f710..a2dfd46ff 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -1,17 +1,17 @@ process CREATE_INTERVALS_BED { - tag "$meta.id" + tag "$intervals" - conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) + conda (params.enable_conda ? "bioconda::gawk=4.1.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'quay.io/biocontainers/gawk:5.1.0' }" + 'https://depot.galaxyproject.org/singularity/gawk:4.1.3--0': + 'quay.io/biocontainers/gawk:4.1.3--1' }" input: - tuple val(meta), path(intervals) + path(intervals) output: - tuple val(meta), path("*.bed") , emit: bed - //path "versions.yml" , emit: versions + path("*.bed") , emit: bed + //path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index d3e894775..75b316562 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -6,12 +6,12 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { BUILD_INTERVALS } from '../../modules/local/build_intervals/main' -include {CNVKIT_ANTITARGET } from '../../modules/nf-core/modules/cnvkit/antitarget/main' -include {CNVKIT_REFERENCE } from '../../modules/nf-core/modules/cnvkit/reference/main' -include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed/main' -include { GATK4_INTERVALLISTTOBED } from '../../modules/nf-core/modules/gatk4/intervallisttobed/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../modules/nf-core/modules/tabix/bgziptabix/main' +include { BUILD_INTERVALS } from '../../modules/local/build_intervals/main' +include { CNVKIT_ANTITARGET } from '../../modules/nf-core/modules/cnvkit/antitarget/main' +include { CNVKIT_REFERENCE } from '../../modules/nf-core/modules/cnvkit/reference/main' +include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed/main' +include { GATK4_INTERVALLISTTOBED } from '../../modules/nf-core/modules/gatk4/intervallisttobed/main' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../modules/nf-core/modules/tabix/bgziptabix/main' workflow PREPARE_INTERVALS { take: @@ -44,23 +44,19 @@ workflow PREPARE_INTERVALS { //If no interval/target file is provided, then intervals are generated from FASTA file if (!params.intervals) { - BUILD_INTERVALS(fasta_fai.map{ it -> [[id:it[0].name], it] }.view()) + BUILD_INTERVALS(fasta_fai.map{it -> [[id:it.baseName], it]}) ch_intervals_combined = BUILD_INTERVALS.out.bed - CREATE_INTERVALS_BED(ch_intervals_combined) - ch_intervals = CREATE_INTERVALS_BED.out.bed.map{meta, intervals -> intervals} + ch_intervals = CREATE_INTERVALS_BED(ch_intervals_combined.map{meta, path -> path}) - ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) + //ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) //ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) } else { ch_intervals_combined = Channel.fromPath(file(params.intervals)).map{it -> [[id:it.baseName], it] } - CREATE_INTERVALS_BED(ch_intervals_combined) - ch_intervals = CREATE_INTERVALS_BED.out.bed.map{meta, intervals -> intervals} - - //ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) + ch_intervals = CREATE_INTERVALS_BED(file(params.intervals)) //If interval file is not provided as .bed, but e.g. as .interval_list then convert to BED format if(!params.intervals.endsWith(".bed")) { From c6b748eb4dceb20a0efab3bef79b320891b12eca Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 14:14:46 +0200 Subject: [PATCH 09/18] add test for this --- tests/test_targeted.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_targeted.yml b/tests/test_targeted.yml index c94c61903..5c64a3666 100644 --- a/tests/test_targeted.yml +++ b/tests/test_targeted.yml @@ -25,3 +25,31 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools/test/test.md.cram.stats - path: results/reports/samtools/test/test.recal.cram.stats + +- name: Run intervals false pipeline + command: nextflow run main.nf -profile test,docker --intervals false + tags: + - default + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + - path: results/preprocessing/recal_table/test/test.recal.table + - path: results/preprocessing/recalibrated/test/test.recal.cram + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + - path: results/csv/markduplicates.csv + - path: results/csv/markduplicates_no_table.csv + - path: results/csv/recalibrated.csv + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.metrics + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + - path: results/reports/samtools/test/test.md.cram.stats + - path: results/reports/samtools/test/test.recal.cram.stats From 2ac8490ce529a654bc23a17d7ea35b3891300963 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 14:32:19 +0200 Subject: [PATCH 10/18] versions computation works, but collection fails --- modules/local/build_intervals/main.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index 83006cc1b..35f60cb9d 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -11,7 +11,7 @@ process BUILD_INTERVALS { output: tuple val(meta), path("${fasta_fai.baseName}.bed") , emit: bed - //path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,5 +21,10 @@ process BUILD_INTERVALS { """ awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS """ } From 2cdb747dd2720271543d8265b1da0637d63d6c8c Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 21:31:32 +0200 Subject: [PATCH 11/18] somehow parts of the bedfile end up in the collate_versions.yml --- modules/local/create_intervals_bed/main.nf | 7 ++++--- subworkflows/local/prepare_intervals.nf | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index a2dfd46ff..54e8b4ef9 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -20,7 +20,7 @@ process CREATE_INTERVALS_BED { // If intervals file is in BED format, // Fifth column is interpreted to contain runtime estimates // Which is then used to combine short-running jobs - if (intervals.toString().toLowerCase().endsWith("bed")) + if (intervals.toString().toLowerCase().endsWith("bed")) { """ awk -vFS="\t" '{ t = \$5 # runtime estimate @@ -40,18 +40,19 @@ process CREATE_INTERVALS_BED { print \$0 > name }' ${intervals} """ - else if (intervals.toString().toLowerCase().endsWith("interval_list")) + } else if (intervals.toString().toLowerCase().endsWith("interval_list")) { """ grep -v '^@' ${intervals} | awk -vFS="\t" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' """ - else + } else { """ awk -vFS="[:-]" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' ${intervals} """ + } } diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 75b316562..475dc6937 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -47,9 +47,9 @@ workflow PREPARE_INTERVALS { BUILD_INTERVALS(fasta_fai.map{it -> [[id:it.baseName], it]}) ch_intervals_combined = BUILD_INTERVALS.out.bed - ch_intervals = CREATE_INTERVALS_BED(ch_intervals_combined.map{meta, path -> path}) + ch_intervals = CREATE_INTERVALS_BED(ch_intervals_combined.map{meta, path -> path}).bed - //ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) + ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) //ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) } else { From 1722ff47a588967ae80437ef99ffee4977e1e3c1 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 21:52:45 +0200 Subject: [PATCH 12/18] more versiosn test --- modules/local/build_intervals/main.nf | 7 +------ modules/local/create_intervals_bed/main.nf | 17 ++++++++++++++++- subworkflows/local/prepare_intervals.nf | 4 ++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index 35f60cb9d..83006cc1b 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -11,7 +11,7 @@ process BUILD_INTERVALS { output: tuple val(meta), path("${fasta_fai.baseName}.bed") , emit: bed - path "versions.yml" , emit: versions + //path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,10 +21,5 @@ process BUILD_INTERVALS { """ awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') - END_VERSIONS """ } diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index 54e8b4ef9..357aab491 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -11,7 +11,7 @@ process CREATE_INTERVALS_BED { output: path("*.bed") , emit: bed - //path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -39,6 +39,11 @@ process CREATE_INTERVALS_BED { chunk += t print \$0 > name }' ${intervals} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS """ } else if (intervals.toString().toLowerCase().endsWith("interval_list")) { """ @@ -46,6 +51,11 @@ process CREATE_INTERVALS_BED { name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS """ } else { """ @@ -53,6 +63,11 @@ process CREATE_INTERVALS_BED { name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' ${intervals} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS """ } } diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 475dc6937..9c7dfb5c6 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -49,8 +49,8 @@ workflow PREPARE_INTERVALS { ch_intervals = CREATE_INTERVALS_BED(ch_intervals_combined.map{meta, path -> path}).bed - ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) - //ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) + //ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) + ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) } else { From ab2142f71f0b6dc8f19791ff68257af3fda93e6e Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 22:34:32 +0200 Subject: [PATCH 13/18] bad copy&paste was the culprit --- CHANGELOG.md | 1 + modules/local/build_intervals/main.nf | 7 ++++++- subworkflows/local/prepare_intervals.nf | 6 ++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0321be4f1..146a0d834 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -149,6 +149,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#642](https://github.com/nf-core/sarek/pull/642) - Only unzip ref files if tool is run, only publish ref files if `--save_reference` and simplify CNKit logic - [#650](https://github.com/nf-core/sarek/pull/650) - Fix intervals checks - [#654](https://github.com/nf-core/sarek/pull/654) - Allow any step but annotation to start from BAM files +- [#655](https://github.com/nf-core/sarek/pull/655) - Fix `--intervals false` logic & add versioning for local modules - [#658](https://github.com/nf-core/sarek/pull/658) - Fix split fastq names in multiqc-report ### Deprecated diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index 83006cc1b..35f60cb9d 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -11,7 +11,7 @@ process BUILD_INTERVALS { output: tuple val(meta), path("${fasta_fai.baseName}.bed") , emit: bed - //path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,5 +21,10 @@ process BUILD_INTERVALS { """ awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS """ } diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 9c7dfb5c6..0136cb5fa 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -45,18 +45,20 @@ workflow PREPARE_INTERVALS { if (!params.intervals) { BUILD_INTERVALS(fasta_fai.map{it -> [[id:it.baseName], it]}) + ch_intervals_combined = BUILD_INTERVALS.out.bed ch_intervals = CREATE_INTERVALS_BED(ch_intervals_combined.map{meta, path -> path}).bed - //ch_versions = ch_intervals.mix(BUILD_INTERVALS.out.versions) - ch_versions = ch_intervals.mix(CREATE_INTERVALS_BED.out.versions) + ch_versions = ch_versions.mix(BUILD_INTERVALS.out.versions) + ch_versions = ch_versions.mix(CREATE_INTERVALS_BED.out.versions) } else { ch_intervals_combined = Channel.fromPath(file(params.intervals)).map{it -> [[id:it.baseName], it] } ch_intervals = CREATE_INTERVALS_BED(file(params.intervals)) + ch_versions = ch_versions.mix(CREATE_INTERVALS_BED.out.versions) //If interval file is not provided as .bed, but e.g. as .interval_list then convert to BED format if(!params.intervals.endsWith(".bed")) { From 8b515c853c1c2a9d1774222563e69e76d2ea21bd Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 23:18:20 +0200 Subject: [PATCH 14/18] named output --- subworkflows/local/prepare_intervals.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_intervals.nf b/subworkflows/local/prepare_intervals.nf index 0136cb5fa..03a5498fa 100644 --- a/subworkflows/local/prepare_intervals.nf +++ b/subworkflows/local/prepare_intervals.nf @@ -57,7 +57,7 @@ workflow PREPARE_INTERVALS { ch_intervals_combined = Channel.fromPath(file(params.intervals)).map{it -> [[id:it.baseName], it] } - ch_intervals = CREATE_INTERVALS_BED(file(params.intervals)) + ch_intervals = CREATE_INTERVALS_BED(file(params.intervals)).bed ch_versions = ch_versions.mix(CREATE_INTERVALS_BED.out.versions) //If interval file is not provided as .bed, but e.g. as .interval_list then convert to BED format From a1e52e361a542aa30e1737c0bac3b3bdd2aa169d Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Jul 2022 23:38:08 +0200 Subject: [PATCH 15/18] use correct subworkflow --- workflows/sarek.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 86728757e..32ac29309 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -357,7 +357,7 @@ workflow SAREK { // Antitarget based reference for CNVKit PREPARE_CNVKIT_REFERENCE(fasta, intervals_bed_combined) - cnvkit_reference = params.tools && params.tools.split(',').contains('cnvkit') ? PREPARE_INTERVALS.out.cnvkit_reference : Channel.empty() + cnvkit_reference = params.tools && params.tools.split(',').contains('cnvkit') ? PREPARE_CNVKIT_REFERENCE.out.cnvkit_reference : Channel.empty() ch_versions = ch_versions.mix(PREPARE_CNVKIT_REFERENCE.out.versions) // PREPROCESSING From 4d2a1c588934cbf2eb1a7e2074ccbea46542fe40 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 20 Jul 2022 09:53:32 +0200 Subject: [PATCH 16/18] use new container --- modules/local/build_intervals/main.nf | 6 +++--- modules/local/create_intervals_bed/main.nf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index 35f60cb9d..a42f7d1dd 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -1,10 +1,10 @@ process BUILD_INTERVALS { tag "$meta.id" - conda (params.enable_conda ? "bioconda::gawk=4.1.3" : null) + conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:4.1.3--0': - 'quay.io/biocontainers/gawk:4.1.3--1' }" + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'quay.io/biocontainers/gawk:5.1.0' }" input: tuple val(meta), path(fasta_fai) diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index 357aab491..f46a0df09 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -1,10 +1,10 @@ process CREATE_INTERVALS_BED { tag "$intervals" - conda (params.enable_conda ? "bioconda::gawk=4.1.3" : null) + conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:4.1.3--0': - 'quay.io/biocontainers/gawk:4.1.3--1' }" + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'quay.io/biocontainers/gawk:5.1.0' }" input: path(intervals) From 714214c89a91d06a46a5a78d085a9c862226cd55 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Wed, 20 Jul 2022 10:22:33 +0200 Subject: [PATCH 17/18] Update tests/test_targeted.yml Co-authored-by: Maxime U. Garcia --- tests/test_targeted.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_targeted.yml b/tests/test_targeted.yml index ef09007e9..23f84129b 100644 --- a/tests/test_targeted.yml +++ b/tests/test_targeted.yml @@ -38,24 +38,24 @@ - default - preprocessing files: + - path: results/csv/markduplicates.csv + - path: results/csv/markduplicates_no_table.csv + - path: results/csv/recalibrated.csv - path: results/multiqc - path: results/preprocessing/markduplicates/test/test.md.cram - path: results/preprocessing/markduplicates/test/test.md.cram.crai - path: results/preprocessing/recal_table/test/test.recal.table - path: results/preprocessing/recalibrated/test/test.recal.cram - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - - path: results/csv/markduplicates.csv - - path: results/csv/markduplicates_no_table.csv - - path: results/csv/recalibrated.csv - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.metrics - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - path: results/reports/mosdepth/test/test.md.regions.bed.gz - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - path: results/reports/samtools/test/test.md.cram.stats - path: results/reports/samtools/test/test.recal.cram.stats From 7082782fabc11742539e19962604588b7e2996d3 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 20 Jul 2022 10:23:29 +0200 Subject: [PATCH 18/18] remove task.ext.args that is not used in local module --- modules/local/build_intervals/main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index a42f7d1dd..9f14182ea 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -17,8 +17,6 @@ process BUILD_INTERVALS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - """ awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed