Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid redundant index generation in sentieon flow #1135

Merged
merged 2 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions subworkflows/local/bam_sentieon_dedup/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
// A when clause condition is defined in the conf/modules.config to determine if the module should be run

include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main'
include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main'
include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main'
include { SAMTOOLS_INDEX as INDEX_INPUT } from '../../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../modules/nf-core/samtools/index/main'

workflow BAM_SENTIEON_DEDUP {
take:
bam // channel: [mandatory] [ meta, bam ] // Although the channel is named "bam", it may contain cram-files.
bai
fasta // channel: [mandatory] [ fasta ]
fasta_fai // channel: [mandatory] [ fasta_fai ]
intervals_bed_combined // channel: [optional] [ intervals_bed ]
Expand All @@ -21,9 +19,9 @@ workflow BAM_SENTIEON_DEDUP {
versions = Channel.empty()
reports = Channel.empty()

INDEX_INPUT(bam)
asp8200 marked this conversation as resolved.
Show resolved Hide resolved
bam_bai = bam.join(INDEX_INPUT.out.bai.concat(INDEX_INPUT.out.crai), failOnMismatch:true, failOnDuplicate:true)
// The concat operation is part of the above command since if the "bam" channel contains cram-files, then the index files will be in the channel INDEX_INPUT.out.crai and not in INDEX_INPUT.out.bai
bam = bam.map{ meta, bam -> [ meta - meta.subMap('data_type'), bam ] }
bai = bai.map{ meta, bai -> [ meta - meta.subMap('data_type'), bai ] }
bam_bai = bam.join(bai, failOnMismatch:true, failOnDuplicate:true)
SENTIEON_DEDUP(bam_bai, fasta, fasta_fai)

// Join with the crai file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON {
bam = bam.mix(DRAGMAP_ALIGN.out.bam)
bam = bam.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bam ] })

bai = Channel.empty()
bai = bai.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bai ] })

// Gather reports of all tools used
reports = reports.mix(DRAGMAP_ALIGN.out.log)

Expand All @@ -47,6 +50,7 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON {

emit:
bam // channel: [ [meta], bam ]
bai // channel: [ [meta], bai ]
reports
versions // channel: [ versions.yml ]
}
7 changes: 6 additions & 1 deletion workflows/sarek.nf
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,10 @@ workflow SAREK {
[ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ]
}.groupTuple()

bai_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bai.map{ meta, bai ->
[ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bai', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bai ]
}.groupTuple()

// gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here
// Except if and only if save_mapped or (skipping markduplicates and sentieon-dedup)
if (
Expand Down Expand Up @@ -578,7 +582,6 @@ workflow SAREK {
// ch_bam_for_markduplicates will contain bam mapped with FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON when step is mapping
// Or bams that are specified in the samplesheet.csv when step is prepare_recalibration
cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map{ meta, input, index -> [ meta, input ] }

// if no MD is done, then run QC on mapped & converted CRAM files
// or the input BAM (+converted) or CRAM files
cram_skip_markduplicates = Channel.empty()
Expand Down Expand Up @@ -628,8 +631,10 @@ workflow SAREK {
// Gather used softwares versions
versions = versions.mix(BAM_MARKDUPLICATES_SPARK.out.versions)
} else if (params.tools && params.tools.split(',').contains('sentieon_dedup')) {
crai_for_markduplicates = params.step == 'mapping' ? bai_mapped : input_sample.map{ meta, input, index -> [ meta, index ] }
BAM_SENTIEON_DEDUP(
cram_for_markduplicates,
crai_for_markduplicates,
fasta,
fasta_fai,
intervals_for_preprocessing)
Expand Down