From 98d3d65b71b867ef92408c8cba7d8242ee67fb01 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 13 Feb 2024 19:14:47 +0100 Subject: [PATCH 1/7] fasta_fai should be a single entity --- subworkflows/local/prepare_genome/main.nf | 2 +- workflows/sarek.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 5ceb16f8a..46a2fa598 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -123,7 +123,7 @@ workflow PREPARE_GENOME { hashtable = DRAGMAP_HASHTABLE.out.hashmap.map{ meta, index -> [index] }.collect() // path: dragmap/* dbsnp_tbi = TABIX_DBSNP.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: dbsnb.vcf.gz.tbi dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict // path: genome.fasta.dict - fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } // path: genome.fasta.fai + fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] }.flatten().first() // path: genome.fasta.fai germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: germline_resource.vcf.gz.tbi known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 647890947..694e2a19a 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -324,7 +324,7 @@ workflow SAREK { // Built from the fasta file: dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() : PREPARE_GENOME.out.dict - fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() + fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).first() : PREPARE_GENOME.out.fasta_fai bwa = params.bwa ? Channel.fromPath(params.bwa).collect() : PREPARE_GENOME.out.bwa From 763cefaf2ec9b04ecdeabbbe43503f7083d1cb22 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 13 Feb 2024 20:20:18 +0100 Subject: [PATCH 2/7] use baseName over simpleName to keep the dot --- conf/modules/freebayes.config | 2 +- conf/modules/haplotypecaller.config | 2 +- conf/modules/mpileup.config | 2 +- conf/modules/mutect2.config | 4 ++-- conf/modules/sentieon_dnascope.config | 2 +- conf/modules/sentieon_haplotyper.config | 2 +- conf/modules/strelka.config | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/modules/freebayes.config b/conf/modules/freebayes.config index a30ec6230..2c224aeff 100644 --- a/conf/modules/freebayes.config +++ b/conf/modules/freebayes.config @@ -27,7 +27,7 @@ process { withName: 'FREEBAYES' { ext.args = { '--min-alternate-fraction 0.1 --min-mapping-quality 1' } //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.simpleName}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.baseName}" } ext.when = { params.tools && params.tools.split(',').contains('freebayes') } publishDir = [ enabled: false diff --git a/conf/modules/haplotypecaller.config b/conf/modules/haplotypecaller.config index f376f4f41..d5b8a3db4 100644 --- a/conf/modules/haplotypecaller.config +++ b/conf/modules/haplotypecaller.config @@ -17,7 +17,7 @@ process { withName: 'GATK4_HAPLOTYPECALLER' { ext.args = { params.joint_germline ? "-ERC GVCF" : "" } - ext.prefix = { meta.num_intervals <= 1 ? ( params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" ) : ( params.joint_germline ? "${meta.id}.haplotypecaller.${intervals.simpleName}.g" :"${meta.id}.haplotypecaller.${intervals.simpleName}" ) } + ext.prefix = { meta.num_intervals <= 1 ? ( params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" ) : ( params.joint_germline ? "${meta.id}.haplotypecaller.${intervals.baseName}.g" :"${meta.id}.haplotypecaller.${intervals.baseName}" ) } ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') } publishDir = [ mode: params.publish_dir_mode, diff --git a/conf/modules/mpileup.config b/conf/modules/mpileup.config index 43cae7f1b..dbdfa27fe 100644 --- a/conf/modules/mpileup.config +++ b/conf/modules/mpileup.config @@ -24,7 +24,7 @@ process { withName: 'BCFTOOLS_MPILEUP' { ext.args2 = { '--multiallelic-caller' } ext.args3 = { "-i 'count(GT==\"RR\")==0'" } // only report non homozygous reference variants - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.bcftools" : "${meta.id}_${intervals.simpleName}.bcftools" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.bcftools" : "${meta.id}_${intervals.baseName}.bcftools" } ext.when = { params.tools && params.tools.split(',').contains('mpileup') } publishDir = [ mode: params.publish_dir_mode, diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config index 2f74ee632..5892437dd 100644 --- a/conf/modules/mutect2.config +++ b/conf/modules/mutect2.config @@ -18,7 +18,7 @@ process { withName: 'GATK4_MUTECT2' { ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" : "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" } - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.baseName}" } ext.when = { params.tools && params.tools.split(',').contains('mutect2') } publishDir = [ mode: params.publish_dir_mode, @@ -91,7 +91,7 @@ process { } withName: 'GETPILEUPSUMMARIES.*' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.baseName}" } publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/" }, diff --git a/conf/modules/sentieon_dnascope.config b/conf/modules/sentieon_dnascope.config index 224c33c89..50cf373ea 100644 --- a/conf/modules/sentieon_dnascope.config +++ b/conf/modules/sentieon_dnascope.config @@ -16,7 +16,7 @@ process { withName: 'SENTIEON_DNASCOPE' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.dnascope" : "${meta.id}.dnascope.${intervals.simpleName}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.dnascope" : "${meta.id}.dnascope.${intervals.baseName}" } ext.when = { params.tools && params.tools.split(',').contains('sentieon_dnascope') } publishDir = [ mode: params.publish_dir_mode, diff --git a/conf/modules/sentieon_haplotyper.config b/conf/modules/sentieon_haplotyper.config index 8b01b04c2..c2b958222 100644 --- a/conf/modules/sentieon_haplotyper.config +++ b/conf/modules/sentieon_haplotyper.config @@ -16,7 +16,7 @@ process { withName: 'SENTIEON_HAPLOTYPER' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.haplotyper" : "${meta.id}.haplotyper.${intervals.simpleName}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.haplotyper" : "${meta.id}.haplotyper.${intervals.baseName}" } ext.when = { params.tools && params.tools.split(',').contains('sentieon_haplotyper') } publishDir = [ mode: params.publish_dir_mode, diff --git a/conf/modules/strelka.config b/conf/modules/strelka.config index 23620dfda..badffb5df 100644 --- a/conf/modules/strelka.config +++ b/conf/modules/strelka.config @@ -17,7 +17,7 @@ process { withName: 'STRELKA_.*' { ext.args = { params.wes ? '--exome' : '' } - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.simpleName}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.baseName}" } ext.when = { params.tools && params.tools.split(',').contains('strelka') } publishDir = [ mode: params.publish_dir_mode, From d0bbf5582aba7405769caf188569efca2bac3acd Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 13 Feb 2024 20:37:31 +0100 Subject: [PATCH 3/7] forgot some conf files --- conf/modules/deepvariant.config | 2 +- conf/modules/recalibrate.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules/deepvariant.config b/conf/modules/deepvariant.config index ff67bc1a4..021990f7f 100644 --- a/conf/modules/deepvariant.config +++ b/conf/modules/deepvariant.config @@ -17,7 +17,7 @@ process { withName: 'DEEPVARIANT' { ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" } - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.deepvariant" : "${meta.id}.deepvariant.${intervals.simpleName}" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.deepvariant" : "${meta.id}.deepvariant.${intervals.baseName}" } ext.when = { params.tools && params.tools.split(',').contains('deepvariant') } publishDir = [ mode: params.publish_dir_mode, diff --git a/conf/modules/recalibrate.config b/conf/modules/recalibrate.config index 7ca0a476f..41aa3a414 100644 --- a/conf/modules/recalibrate.config +++ b/conf/modules/recalibrate.config @@ -16,7 +16,7 @@ process { withName: 'GATK4_APPLYBQSR|GATK4SPARK_APPLYBQSR' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.baseName}.recal" } publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/preprocessing/" }, From 6df242a1a310fa2555c4c5f175fb25666ad58951 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 13 Feb 2024 20:38:19 +0100 Subject: [PATCH 4/7] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41fcfc598..dca45056e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1381](https://github.com/nf-core/sarek/pull/1381) - Swap NGSCheckMate bed file for GATK.GRCh37 to one without the `chr` prefix - [#1383](https://github.com/nf-core/sarek/pull/1383) - Fix `--three_prime_clip_r{1,2}` parameter documentation - [#1390](https://github.com/nf-core/sarek/pull/1390) - Fix badges in README +- [#1403](https://github.com/nf-core/sarek/pull/1403) - Fix intervals usage with dot in chromosome names ### Removed From 964342499b6bc0219c2453c9b2b00a236cab1285 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 14 Feb 2024 09:58:20 +0100 Subject: [PATCH 5/7] remove docker.userEmulation --- conf/test.config | 2 +- nextflow.config | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/test.config b/conf/test.config index 2612b92cd..dcb536738 100644 --- a/conf/test.config +++ b/conf/test.config @@ -105,7 +105,7 @@ if (System.getenv('PROFILE')) { } else if ("$PROFILE" == "docker") { conda.enabled = false docker.enabled = true - docker.userEmulation = { params.use_gatk_spark ? false : true }.call() + docker.runOptions = '-u $(id -u):$(id -g)' charliecloud.enabled = false podman.enabled = false shifter.enabled = false diff --git a/nextflow.config b/nextflow.config index d49a3ceef..c0eb9c702 100644 --- a/nextflow.config +++ b/nextflow.config @@ -190,7 +190,6 @@ profiles { charliecloud.enabled = false conda.enabled = false docker.enabled = true - docker.userEmulation = { params.use_gatk_spark ? false : true }.call() podman.enabled = false shifter.enabled = false docker.runOptions = '-u $(id -u):$(id -g)' From ee256e65aab6acd1ec059329c81367f5516dfdbf Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 14 Feb 2024 13:14:01 +0100 Subject: [PATCH 6/7] fix simpleName to baseName in subworkflows too --- subworkflows/local/bam_joint_calling_germline_gatk/main.nf | 2 +- subworkflows/local/bam_joint_calling_germline_sentieon/main.nf | 2 +- subworkflows/local/bam_variant_calling_haplotypecaller/main.nf | 2 +- .../local/bam_variant_calling_sentieon_haplotyper/main.nf | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf index f0d9148c0..494a7854b 100644 --- a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -37,7 +37,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK { // Rename based on num_intervals, group all samples by their interval_name/interval_file and restructure for channel // Group by [0, 3] to avoid a list of metas and make sure that any intervals gendb_input = input - .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.simpleName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } + .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.baseName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } .groupTuple(by:3) //join on interval file .map{ meta_list, gvcf, tbi, intervals -> // meta is now a list of [meta1, meta2] but they are all the same. So take the first element. diff --git a/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf b/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf index 3f19b33d5..768b59690 100644 --- a/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf @@ -33,7 +33,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_SENTIEON { versions = Channel.empty() sentieon_input = input - .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.simpleName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } + .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.baseName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } .groupTuple(by:[0, 3]) SENTIEON_GVCFTYPER(sentieon_input, fasta, fai, dbsnp, dbsnp_tbi) diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf index 1dbef4c61..cf167eec4 100644 --- a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf +++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf @@ -29,7 +29,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { cram_intervals = cram.combine(intervals) // Move num_intervals to meta map // Add interval_name to allow correct merging with interval files - .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ interval_name:intervals.simpleName, num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram, crai, intervals, [] ] } + .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ interval_name:intervals.baseName, num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram, crai, intervals, [] ] } GATK4_HAPLOTYPECALLER(cram_intervals, fasta, fasta_fai, dict.map{ meta, dict -> [ dict ] }, dbsnp, dbsnp_tbi) diff --git a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf index 4b280d271..b0bd6bd9a 100644 --- a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf +++ b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf @@ -34,7 +34,7 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER { .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals, - intervals_name:intervals.simpleName, + intervals_name:intervals.baseName, variantcaller:'sentieon_haplotyper'], cram, crai, From 16dbe97f3579bedc5093b2209fcce62094a9472e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 14 Feb 2024 13:57:16 +0100 Subject: [PATCH 7/7] this is fixed by another PR --- conf/test.config | 2 +- nextflow.config | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index dcb536738..2612b92cd 100644 --- a/conf/test.config +++ b/conf/test.config @@ -105,7 +105,7 @@ if (System.getenv('PROFILE')) { } else if ("$PROFILE" == "docker") { conda.enabled = false docker.enabled = true - docker.runOptions = '-u $(id -u):$(id -g)' + docker.userEmulation = { params.use_gatk_spark ? false : true }.call() charliecloud.enabled = false podman.enabled = false shifter.enabled = false diff --git a/nextflow.config b/nextflow.config index c0eb9c702..d49a3ceef 100644 --- a/nextflow.config +++ b/nextflow.config @@ -190,6 +190,7 @@ profiles { charliecloud.enabled = false conda.enabled = false docker.enabled = true + docker.userEmulation = { params.use_gatk_spark ? false : true }.call() podman.enabled = false shifter.enabled = false docker.runOptions = '-u $(id -u):$(id -g)'