Skip to content

Commit

Permalink
Merge pull request #1135 from nf-core/avoid_generating_redundant_index
Browse files Browse the repository at this point in the history
Avoid redundant index generation in sentieon flow
  • Loading branch information
asp8200 authored Jun 27, 2023
2 parents f387cf3 + 78979b0 commit df0f578
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 7 deletions.
10 changes: 4 additions & 6 deletions subworkflows/local/bam_sentieon_dedup/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
// A when clause condition is defined in the conf/modules.config to determine if the module should be run

include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main'
include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main'
include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main'
include { SAMTOOLS_INDEX as INDEX_INPUT } from '../../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../modules/nf-core/samtools/index/main'

workflow BAM_SENTIEON_DEDUP {
take:
bam // channel: [mandatory] [ meta, bam ] // Although the channel is named "bam", it may contain cram-files.
bai
fasta // channel: [mandatory] [ fasta ]
fasta_fai // channel: [mandatory] [ fasta_fai ]
intervals_bed_combined // channel: [optional] [ intervals_bed ]
Expand All @@ -21,9 +19,9 @@ workflow BAM_SENTIEON_DEDUP {
versions = Channel.empty()
reports = Channel.empty()

INDEX_INPUT(bam)
bam_bai = bam.join(INDEX_INPUT.out.bai.concat(INDEX_INPUT.out.crai), failOnMismatch:true, failOnDuplicate:true)
// The concat operation is part of the above command since if the "bam" channel contains cram-files, then the index files will be in the channel INDEX_INPUT.out.crai and not in INDEX_INPUT.out.bai
bam = bam.map{ meta, bam -> [ meta - meta.subMap('data_type'), bam ] }
bai = bai.map{ meta, bai -> [ meta - meta.subMap('data_type'), bai ] }
bam_bai = bam.join(bai, failOnMismatch:true, failOnDuplicate:true)
SENTIEON_DEDUP(bam_bai, fasta, fasta_fai)

// Join with the crai file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON {
bam = bam.mix(DRAGMAP_ALIGN.out.bam)
bam = bam.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bam ] })

bai = Channel.empty()
bai = bai.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bai ] })

// Gather reports of all tools used
reports = reports.mix(DRAGMAP_ALIGN.out.log)

Expand All @@ -47,6 +50,7 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON {

emit:
bam // channel: [ [meta], bam ]
bai // channel: [ [meta], bai ]
reports
versions // channel: [ versions.yml ]
}
7 changes: 6 additions & 1 deletion workflows/sarek.nf
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,10 @@ workflow SAREK {
[ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ]
}.groupTuple()

bai_mapped = FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON.out.bai.map{ meta, bai ->
[ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bai', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bai ]
}.groupTuple()

// gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here
// Except if and only if save_mapped or (skipping markduplicates and sentieon-dedup)
if (
Expand Down Expand Up @@ -578,7 +582,6 @@ workflow SAREK {
// ch_bam_for_markduplicates will contain bam mapped with FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON when step is mapping
// Or bams that are specified in the samplesheet.csv when step is prepare_recalibration
cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map{ meta, input, index -> [ meta, input ] }

// if no MD is done, then run QC on mapped & converted CRAM files
// or the input BAM (+converted) or CRAM files
cram_skip_markduplicates = Channel.empty()
Expand Down Expand Up @@ -628,8 +631,10 @@ workflow SAREK {
// Gather used softwares versions
versions = versions.mix(BAM_MARKDUPLICATES_SPARK.out.versions)
} else if (params.tools && params.tools.split(',').contains('sentieon_dedup')) {
crai_for_markduplicates = params.step == 'mapping' ? bai_mapped : input_sample.map{ meta, input, index -> [ meta, index ] }
BAM_SENTIEON_DEDUP(
cram_for_markduplicates,
crai_for_markduplicates,
fasta,
fasta_fai,
intervals_for_preprocessing)
Expand Down

0 comments on commit df0f578

Please sign in to comment.