From 0f4190e2c368ec057ebaa42cf0992f72df067038 Mon Sep 17 00:00:00 2001 From: Evan Cofer Date: Mon, 5 Aug 2024 10:21:05 -0400 Subject: [PATCH 1/2] Fixes missing BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS imports --- workflows/riboseq/main.nf | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/workflows/riboseq/main.nf b/workflows/riboseq/main.nf index 1a73ace..11f326f 100644 --- a/workflows/riboseq/main.nf +++ b/workflows/riboseq/main.nf @@ -49,8 +49,12 @@ def filterGtf = // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' include { PREPROCESS_RNASEQ } from '../../subworkflows/nf-core/preprocess_rnaseq' +include { PREPARE_GENOME } from '../../subworkflows/local/prepare_genome' include { FASTQ_ALIGN_STAR } from '../../subworkflows/nf-core/fastq_align_star' +include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -178,7 +182,8 @@ workflow RIBOSEQ { ch_genome_bam = FASTQ_ALIGN_STAR.out.bam ch_genome_bam_index = FASTQ_ALIGN_STAR.out.bai - ch_transcriptome_bam = FASTQ_ALIGN_STAR.out.orig_bam_transcript + ch_transcriptome_bam = FASTQ_ALIGN_STAR.out.bam_transcript + ch_transcriptome_bai = FASTQ_ALIGN_STAR.out.bai_transcript ch_versions = ch_versions.mix(FASTQ_ALIGN_STAR.out.versions) ch_multiqc_files = ch_multiqc_files @@ -207,13 +212,19 @@ workflow RIBOSEQ { .mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.flagstat.collect{it[1]}) .mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME.out.idxstats.collect{it[1]}) - // Deduplicate transcriptome BAM file before downstream analysis + BAM_SORT_STATS_SAMTOOLS ( + ch_transcriptome_bam, + ch_fasta.map { [ [:], it ] } + ) + ch_transcriptome_sorted_bam = BAM_SORT_STATS_SAMTOOLS.out.bam + ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai + + // Deduplicate transcriptome BAM file before read counting with Salmon BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( - ch_transcriptome_bam.join(ch_transcriptome_bai, by: [0]), + ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]), params.umitools_dedup_stats ) - ch_transcriptome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam ch_multiqc_files = ch_multiqc_files .mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.stats.collect{it[1]}) @@ -225,7 +236,7 @@ workflow RIBOSEQ { SAMTOOLS_SORT ( BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam, - [[:],[]] + ch_fasta ) // Only run prepare_for_rsem.py on paired-end BAM files @@ -381,7 +392,7 @@ workflow RIBOSEQ { ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: true)) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( ch_multiqc_files.collect(), From e02b0d866b07f6d6549385ebf0b89cea2a8af8cd Mon Sep 17 00:00:00 2001 From: Evan Cofer Date: Mon, 5 Aug 2024 10:32:01 -0400 Subject: [PATCH 2/2] Cleaned up code from prior commit --- workflows/riboseq/main.nf | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/workflows/riboseq/main.nf b/workflows/riboseq/main.nf index 11f326f..6ddcf68 100644 --- a/workflows/riboseq/main.nf +++ b/workflows/riboseq/main.nf @@ -52,7 +52,6 @@ include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../../subworkflows/nf-core/b include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_GENOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS as BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME } from '../../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' include { PREPROCESS_RNASEQ } from '../../subworkflows/nf-core/preprocess_rnaseq' -include { PREPARE_GENOME } from '../../subworkflows/local/prepare_genome' include { FASTQ_ALIGN_STAR } from '../../subworkflows/nf-core/fastq_align_star' include { BAM_SORT_STATS_SAMTOOLS } from '../../subworkflows/nf-core/bam_sort_stats_samtools' @@ -217,14 +216,16 @@ workflow RIBOSEQ { ch_transcriptome_bam, ch_fasta.map { [ [:], it ] } ) - ch_transcriptome_sorted_bam = BAM_SORT_STATS_SAMTOOLS.out.bam - ch_transcriptome_sorted_bai = BAM_SORT_STATS_SAMTOOLS.out.bai + ch_transcriptome_bam = BAM_SORT_STATS_SAMTOOLS.out.bam + ch_transcriptome_bai = BAM_SORT_STATS_SAMTOOLS.out.bai // Deduplicate transcriptome BAM file before read counting with Salmon BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME ( - ch_transcriptome_sorted_bam.join(ch_transcriptome_sorted_bai, by: [0]), + ch_transcriptome_bam.join(ch_transcriptome_bai, by: [0]), params.umitools_dedup_stats ) + ch_transcriptome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam + ch_transcriptome_bai = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bai ch_multiqc_files = ch_multiqc_files .mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.stats.collect{it[1]}) @@ -236,7 +237,7 @@ workflow RIBOSEQ { SAMTOOLS_SORT ( BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS_TRANSCRIPTOME.out.bam, - ch_fasta + [[:],[]] ) // Only run prepare_for_rsem.py on paired-end BAM files @@ -392,7 +393,7 @@ workflow RIBOSEQ { ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: true)) MULTIQC ( ch_multiqc_files.collect(),