diff --git a/CHANGELOG.md b/CHANGELOG.md index 8df76ecbaf..688ff2b520 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - [#679](https://github.com/nf-core/sarek/pull/679) - Back to `dev` +- [#685](https://github.com/nf-core/sarek/pull/685) - Updating the nf-core modules used by Sarek. - [#691](https://github.com/nf-core/sarek/pull/691) - To run the same pytest as before locally, use `PROFILE=docker` ### Fixed diff --git a/modules.json b/modules.json index a36852a24d..7aa38ec126 100644 --- a/modules.json +++ b/modules.json @@ -10,7 +10,7 @@ "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/stats": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + "git_sha": "535975eb81c9e48eb0403c136b7f1dd7acb1afad" }, "bwa/index": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -70,7 +70,7 @@ "git_sha": "973151e9eab9bac400aa99f099075a10cdd8e84c" }, "fastp": { - "git_sha": "9b51362a532a14665f513cf987531f9ea5046b74" + "git_sha": "7e8ad566883449e7939062b5e2bcf53fc1e0002f" }, "fastqc": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" @@ -202,7 +202,7 @@ "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, "samtools/merge": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + "git_sha": "720027275ccdc1363bb2a19c6412da148e31d94b" }, "samtools/mpileup": { "git_sha": "24e05f6097a5dde57dd80d33295ed120f1b81aef" diff --git a/modules/nf-core/modules/bcftools/stats/main.nf b/modules/nf-core/modules/bcftools/stats/main.nf index 1e0f3a47a9..c42ed29a8e 100644 --- a/modules/nf-core/modules/bcftools/stats/main.nf +++ b/modules/nf-core/modules/bcftools/stats/main.nf @@ -9,6 +9,7 @@ process BCFTOOLS_STATS { input: tuple val(meta), path(vcf) + path(target_bed) output: tuple val(meta), path("*stats.txt"), emit: stats @@ -20,8 +21,13 @@ process BCFTOOLS_STATS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def target = target_bed ? "--regions-file ${target_bed}" : "" """ - bcftools stats $args $vcf > ${prefix}.bcftools_stats.txt + bcftools stats \\ + $args \\ + $target \\ + $vcf > ${prefix}.bcftools_stats.txt + cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') diff --git a/modules/nf-core/modules/bcftools/stats/meta.yml b/modules/nf-core/modules/bcftools/stats/meta.yml index 304b88ecba..b0aec1ebfb 100644 --- a/modules/nf-core/modules/bcftools/stats/meta.yml +++ b/modules/nf-core/modules/bcftools/stats/meta.yml @@ -19,6 +19,10 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - target_bed: + type: file + description: target bed file + pattern: "*.{bed}" - vcf: type: file description: VCF input file @@ -40,3 +44,4 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@SusiJo" diff --git a/modules/nf-core/modules/fastp/main.nf b/modules/nf-core/modules/fastp/main.nf index 120392c561..d9134e143d 100644 --- a/modules/nf-core/modules/fastp/main.nf +++ b/modules/nf-core/modules/fastp/main.nf @@ -28,19 +28,23 @@ process FASTP { def args = task.ext.args ?: '' // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" + // Use single ended for interleaved. Add --interleaved_in in config. if (meta.single_end) { def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' """ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - fastp \\ + cat ${prefix}.fastq.gz \\ + | fastp \\ + --stdin \\ + --stdout \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") diff --git a/modules/nf-core/modules/fastp/meta.yml b/modules/nf-core/modules/fastp/meta.yml index 2bd2b1a91f..598c3368bd 100644 --- a/modules/nf-core/modules/fastp/meta.yml +++ b/modules/nf-core/modules/fastp/meta.yml @@ -15,7 +15,7 @@ input: - meta: type: map description: | - Groovy Map containing sample information + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. e.g. [ id:'test', single_end:false ] - reads: type: file diff --git a/modules/nf-core/modules/samtools/merge/main.nf b/modules/nf-core/modules/samtools/merge/main.nf index bbf7e8fbd6..d01bbed8bd 100644 --- a/modules/nf-core/modules/samtools/merge/main.nf +++ b/modules/nf-core/modules/samtools/merge/main.nf @@ -8,8 +8,9 @@ process SAMTOOLS_MERGE { 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: - tuple val(meta), path(input_files) + tuple val(meta), path(input_files, stageAs: "?/*") path fasta + path fai output: tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam diff --git a/modules/nf-core/modules/samtools/merge/meta.yml b/modules/nf-core/modules/samtools/merge/meta.yml index fb78e55cd3..f6833d0c7e 100644 --- a/modules/nf-core/modules/samtools/merge/meta.yml +++ b/modules/nf-core/modules/samtools/merge/meta.yml @@ -29,6 +29,10 @@ input: type: optional file description: Reference file the CRAM was created with pattern: "*.{fasta,fa}" + - fai: + type: optional file + description: Index of the reference file the CRAM was created with + pattern: "*.fai" output: - meta: type: map diff --git a/subworkflows/nf-core/alignment_to_fastq.nf b/subworkflows/nf-core/alignment_to_fastq.nf index ec855d6ca2..a5806f8733 100644 --- a/subworkflows/nf-core/alignment_to_fastq.nf +++ b/subworkflows/nf-core/alignment_to_fastq.nf @@ -15,6 +15,7 @@ workflow ALIGNMENT_TO_FASTQ { take: input // channel: [meta, alignment (BAM or CRAM), index (optional)] fasta // optional: reference file if CRAM format and reference not in header + fasta_fai main: ch_versions = Channel.empty() @@ -40,7 +41,7 @@ workflow ALIGNMENT_TO_FASTQ { [meta, [unmap_unmap, unmap_map, map_unmap]] } - SAMTOOLS_MERGE_UNMAP(all_unmapped_bam, fasta) + SAMTOOLS_MERGE_UNMAP(all_unmapped_bam, fasta, fasta_fai) // Collate & convert unmapped COLLATE_FASTQ_UNMAP(SAMTOOLS_MERGE_UNMAP.out.bam) diff --git a/subworkflows/nf-core/gatk4/recalibrate/main.nf b/subworkflows/nf-core/gatk4/recalibrate/main.nf index fab4d28ddc..6e9fb289b5 100644 --- a/subworkflows/nf-core/gatk4/recalibrate/main.nf +++ b/subworkflows/nf-core/gatk4/recalibrate/main.nf @@ -40,7 +40,7 @@ workflow RECALIBRATE { APPLYBQSR(cram_intervals, fasta, fasta_fai, dict) // STEP 4.5: MERGING AND INDEXING THE RECALIBRATED CRAM FILES - MERGE_INDEX_CRAM(APPLYBQSR.out.cram, fasta) + MERGE_INDEX_CRAM(APPLYBQSR.out.cram, fasta, fasta_fai) ch_cram_recal_out = MERGE_INDEX_CRAM.out.cram_crai.map{ meta, cram, crai -> // remove no longer necessary fields to make sure joining can be done correctly: num_intervals diff --git a/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf b/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf index 8d0b3b5e50..538b791a02 100644 --- a/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf +++ b/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf @@ -40,7 +40,7 @@ workflow RECALIBRATE_SPARK { APPLYBQSR_SPARK(cram_intervals, fasta, fasta_fai, dict) // STEP 4.5: MERGING AND INDEXING THE RECALIBRATED BAM FILES - MERGE_INDEX_CRAM(APPLYBQSR_SPARK.out.cram, fasta) + MERGE_INDEX_CRAM(APPLYBQSR_SPARK.out.cram, fasta, fasta_fai) ch_cram_recal_out = MERGE_INDEX_CRAM.out.cram_crai.map{ meta, cram, crai -> // remove no longer necessary fields to make sure joining can be done correctly: num_intervals diff --git a/subworkflows/nf-core/merge_index_bam.nf b/subworkflows/nf-core/merge_index_bam.nf index 7443850ff2..6bee133ce6 100644 --- a/subworkflows/nf-core/merge_index_bam.nf +++ b/subworkflows/nf-core/merge_index_bam.nf @@ -21,7 +21,7 @@ workflow MERGE_INDEX_BAM { multiple: it[1].size() > 1 }.set{bam_to_merge} - MERGE_BAM(bam_to_merge.multiple, []) + MERGE_BAM(bam_to_merge.multiple, [], []) INDEX_MERGE_BAM(bam_to_merge.single.mix(MERGE_BAM.out.bam)) bam_bai = bam_to_merge.single diff --git a/subworkflows/nf-core/merge_index_cram.nf b/subworkflows/nf-core/merge_index_cram.nf index e838376ceb..eb7dea55f3 100644 --- a/subworkflows/nf-core/merge_index_cram.nf +++ b/subworkflows/nf-core/merge_index_cram.nf @@ -11,6 +11,7 @@ workflow MERGE_INDEX_CRAM { take: ch_cram // channel: [mandatory] meta, cram fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fai for fasta main: ch_versions = Channel.empty() @@ -36,7 +37,7 @@ workflow MERGE_INDEX_CRAM { multiple: it[0].num_intervals > 1 } - MERGE_CRAM(ch_cram_to_merge.multiple, fasta) + MERGE_CRAM(ch_cram_to_merge.multiple, fasta, fasta_fai) INDEX_CRAM(ch_cram_to_merge.single.mix(MERGE_CRAM.out.cram)) cram_crai = ch_cram_to_merge.single diff --git a/subworkflows/nf-core/vcf_qc.nf b/subworkflows/nf-core/vcf_qc.nf index ded381704e..598c906218 100644 --- a/subworkflows/nf-core/vcf_qc.nf +++ b/subworkflows/nf-core/vcf_qc.nf @@ -12,7 +12,7 @@ workflow VCF_QC { ch_versions = Channel.empty() - BCFTOOLS_STATS(vcf) + BCFTOOLS_STATS(vcf, []) VCFTOOLS_TSTV_COUNT(vcf, target_bed, []) VCFTOOLS_TSTV_QUAL(vcf, target_bed, []) VCFTOOLS_SUMMARY(vcf, target_bed, []) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 06089af9d6..643f6f66da 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -385,7 +385,7 @@ workflow SAREK { // convert any bam input to fastq // Fasta are not needed when converting bam to fastq -> [] - ALIGNMENT_TO_FASTQ_INPUT(ch_input_sample_type.bam, []) + ALIGNMENT_TO_FASTQ_INPUT(ch_input_sample_type.bam, [], []) // gather fastq (inputed or converted) // Theorically this could work on mixed input (fastq for one sample and bam for another) @@ -419,7 +419,7 @@ workflow SAREK { bamtofastq = CREATE_UMI_CONSENSUS.out.consensusbam.map{meta, bam -> [meta,bam,[]]} // convert back to fastq for further preprocessing - ALIGNMENT_TO_FASTQ_UMI(bamtofastq, []) + ALIGNMENT_TO_FASTQ_UMI(bamtofastq, [], []) ch_reads_fastp = ALIGNMENT_TO_FASTQ_UMI.out.reads