Skip to content

Commit

Permalink
Merge pull request #957 from maxulysse/mutect2
Browse files Browse the repository at this point in the history
Add failsafe options for Nextflow join operator
  • Loading branch information
maxulysse authored Mar 2, 2023
2 parents 4e60e8c + 59b3d0d commit b2a8891
Show file tree
Hide file tree
Showing 19 changed files with 61 additions and 54 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#929](https://github.com/nf-core/sarek/pull/929) - Fix somatic variant calling issues with msisensor following [#896](https://github.com/nf-core/sarek/pull/896)
- [#941](https://github.com/nf-core/sarek/pull/941) - Fix json validation for `tools`, `skip_tools` and `use_gatk_spark` [#892](https://github.com/nf-core/sarek/issues/892)
- [#954](https://github.com/nf-core/sarek/pull/954) - Fix missing annotation keys with snpeff and ensemblvep for `hg19`
- [#957](https://github.com/nf-core/sarek/pull/957) - Add `failOnDuplicate` and `failOnMismatch` options to all `join()` operator where it was possible

### Deprecated

Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@
"nf-core": {
"vcf_annotate_ensemblvep": {
"branch": "master",
"git_sha": "bea3ca998816a7f812b1bbbcb27c3a9ffbac0706",
"git_sha": "82b8c4ef7d34c5c1009649581af46b684183da12",
"installed_by": ["subworkflows"]
},
"vcf_annotate_snpeff": {
Expand Down
6 changes: 3 additions & 3 deletions subworkflows/local/bam_convert_samtools/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ workflow BAM_CONVERT_SAMTOOLS {

// Merge UNMAP
all_unmapped_bam = SAMTOOLS_VIEW_UNMAP_UNMAP.out.bam
.join(SAMTOOLS_VIEW_UNMAP_MAP.out.bam, remainder: true)
.join(SAMTOOLS_VIEW_MAP_UNMAP.out.bam, remainder: true)
.join(SAMTOOLS_VIEW_UNMAP_MAP.out.bam, failOnDuplicate: true, remainder: true)
.join(SAMTOOLS_VIEW_MAP_UNMAP.out.bam, failOnDuplicate: true, remainder: true)
.map{ meta, unmap_unmap, unmap_map, map_unmap -> [ meta, [ unmap_unmap, unmap_map, map_unmap ] ] }

SAMTOOLS_MERGE_UNMAP(all_unmapped_bam, fasta[1], fasta_fai)
Expand All @@ -52,7 +52,7 @@ workflow BAM_CONVERT_SAMTOOLS {
// join Mapped & unmapped fastq

reads_to_concat = COLLATE_FASTQ_MAP.out.fastq
.join(COLLATE_FASTQ_UNMAP.out.fastq)
.join(COLLATE_FASTQ_UNMAP.out.fastq, failOnDuplicate: true, failOnMismatch: true)
.map{ meta, mapped_reads, unmapped_reads -> [ meta, [ mapped_reads[0], mapped_reads[1], unmapped_reads[0], unmapped_reads[1] ] ] }

// Concatenate Mapped_R1 with Unmapped_R1 and Mapped_R2 with Unmapped_R2
Expand Down
14 changes: 7 additions & 7 deletions subworkflows/local/bam_joint_calling_germline_gatk/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {
// Rework meta for variantscalled.csv and annotation tools
MERGE_GENOTYPEGVCFS(gvcf_to_merge, dict.map{ it -> [ [ id:'dict' ], it ] } )

vqsr_input = MERGE_GENOTYPEGVCFS.out.vcf.join(MERGE_GENOTYPEGVCFS.out.tbi)
vqsr_input = MERGE_GENOTYPEGVCFS.out.vcf.join(MERGE_GENOTYPEGVCFS.out.tbi, failOnDuplicate: true)
indels_resource_label = known_indels_vqsr.mix(dbsnp_vqsr).collect()
snps_resource_label = known_snps_vqsr.mix(dbsnp_vqsr).collect()

Expand All @@ -82,16 +82,16 @@ workflow BAM_JOINT_CALLING_GERMLINE_GATK {

// Join results of variant recalibration into a single channel tuple
// Rework meta for variantscalled.csv and annotation tools
vqsr_input_snp = vqsr_input.join(VARIANTRECALIBRATOR_SNP.out.recal)
.join(VARIANTRECALIBRATOR_SNP.out.idx)
.join(VARIANTRECALIBRATOR_SNP.out.tranches)
vqsr_input_snp = vqsr_input.join(VARIANTRECALIBRATOR_SNP.out.recal, failOnDuplicate: true)
.join(VARIANTRECALIBRATOR_SNP.out.idx, failOnDuplicate: true)
.join(VARIANTRECALIBRATOR_SNP.out.tranches, failOnDuplicate: true)
.map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }

// Join results of variant recalibration into a single channel tuple
// Rework meta for variantscalled.csv and annotation tools
vqsr_input_indel = vqsr_input.join(VARIANTRECALIBRATOR_INDEL.out.recal)
.join(VARIANTRECALIBRATOR_INDEL.out.idx)
.join(VARIANTRECALIBRATOR_INDEL.out.tranches)
vqsr_input_indel = vqsr_input.join(VARIANTRECALIBRATOR_INDEL.out.recal, failOnDuplicate: true)
.join(VARIANTRECALIBRATOR_INDEL.out.idx, failOnDuplicate: true)
.join(VARIANTRECALIBRATOR_INDEL.out.tranches, failOnDuplicate: true)
.map{ meta, vcf, tbi, recal, index, tranche -> [ meta - meta.subMap('id') + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }

GATK4_APPLYVQSR_SNP(
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/bam_markduplicates/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ workflow BAM_MARKDUPLICATES {
INDEX_MARKDUPLICATES(GATK4_MARKDUPLICATES.out.cram)

// Join with the crai file
cram = GATK4_MARKDUPLICATES.out.cram.join(INDEX_MARKDUPLICATES.out.crai)
cram = GATK4_MARKDUPLICATES.out.cram.join(INDEX_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true)

// QC on CRAM
CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined)
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/bam_markduplicates_spark/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ workflow BAM_MARKDUPLICATES_SPARK {
INDEX_MARKDUPLICATES(GATK4_MARKDUPLICATES_SPARK.out.output)

// Join with the crai file
cram = GATK4_MARKDUPLICATES_SPARK.out.output.join(INDEX_MARKDUPLICATES.out.crai)
cram = GATK4_MARKDUPLICATES_SPARK.out.output.join(INDEX_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true)

// QC on CRAM
CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined)
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/bam_merge_index_samtools/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ workflow BAM_MERGE_INDEX_SAMTOOLS {
INDEX_MERGE_BAM(bam_all)

// Join with the bai file
bam_bai = bam_all.join(INDEX_MERGE_BAM.out.bai)
bam_bai = bam_all.join(INDEX_MERGE_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true)

// Gather versions of all tools used
versions = versions.mix(INDEX_MERGE_BAM.out.versions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
GATK4_HAPLOTYPECALLER(cram_intervals, fasta, fasta_fai, dict, dbsnp, dbsnp_tbi)

// For joint genotyping
genotype_intervals = GATK4_HAPLOTYPECALLER.out.vcf.join(GATK4_HAPLOTYPECALLER.out.tbi)
.join(cram_intervals)
genotype_intervals = GATK4_HAPLOTYPECALLER.out.vcf
.join(GATK4_HAPLOTYPECALLER.out.tbi, failOnMismatch: true)
.join(cram_intervals, failOnMismatch: true)
.map{ meta, gvcf, tbi, cram, crai, intervals, dragstr_model -> [ meta, gvcf, tbi, intervals ] }

// Figuring out if there is one or more vcf(s) from the same sample
Expand Down Expand Up @@ -86,7 +87,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
if (!skip_haplotypecaller_filter) {

VCF_VARIANT_FILTERING_GATK(
haplotypecaller_vcf.join(haplotypecaller_tbi),
haplotypecaller_vcf.join(haplotypecaller_tbi, failOnDuplicate: true, failOnMismatch: true),
fasta,
fasta_fai,
dict,
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/bam_variant_calling_somatic_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL {
if (tools.split(',').contains('strelka')) {
// Remap channel to match module/subworkflow
cram_strelka = (tools.split(',').contains('manta')) ?
cram.join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf).join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf_tbi) :
cram.join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf, failOnDuplicate: true, failOnMismatch: true).join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf_tbi, failOnDuplicate: true, failOnMismatch: true) :
cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [] ] }

BAM_VARIANT_CALLING_SOMATIC_STRELKA(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ workflow BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC {

FREEC_SOMATIC(controlfreec_input, fasta, fasta_fai, [], dbsnp, dbsnp_tbi, chr_files, mappability, intervals_bed, [])

ASSESS_SIGNIFICANCE(FREEC_SOMATIC.out.CNV.join(FREEC_SOMATIC.out.ratio))
ASSESS_SIGNIFICANCE(FREEC_SOMATIC.out.CNV.join(FREEC_SOMATIC.out.ratio, failOnDuplicate: true, failOnMismatch: true))
FREEC2BED(FREEC_SOMATIC.out.ratio)
FREEC2CIRCOS(FREEC_SOMATIC.out.ratio)
MAKEGRAPH(FREEC_SOMATIC.out.ratio.join(FREEC_SOMATIC.out.BAF))
MAKEGRAPH(FREEC_SOMATIC.out.ratio.join(FREEC_SOMATIC.out.BAF, failOnDuplicate: true, failOnMismatch: true))

ch_versions = ch_versions.mix(FREEC_SOMATIC.out.versions)
ch_versions = ch_versions.mix(ASSESS_SIGNIFICANCE.out.versions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,14 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
.map{ meta, table -> [ meta - meta.subMap('id') + [ id:meta.tumor_id + "_vs_" + meta.normal_id ], table ] }

// Contamination and segmentation tables created using calculatecontamination on the pileup summary table
CALCULATECONTAMINATION(pileup_table_tumor.join(pileup_table_normal))
CALCULATECONTAMINATION(pileup_table_tumor.join(pileup_table_normal, failOnDuplicate: true, failOnMismatch: true))

// Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables
vcf_to_filter = vcf.join(tbi).join(stats).join(LEARNREADORIENTATIONMODEL.out.artifactprior).join(CALCULATECONTAMINATION.out.segmentation).join(CALCULATECONTAMINATION.out.contamination)
vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(CALCULATECONTAMINATION.out.segmentation, failOnDuplicate: true, failOnMismatch: true)
.join(CALCULATECONTAMINATION.out.contamination, failOnDuplicate: true, failOnMismatch: true)
.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }

FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_TIDDIT {
TIDDIT_NORMAL(cram_normal, fasta, bwa)
TIDDIT_TUMOR(cram_tumor, fasta, bwa)

SVDB_MERGE(TIDDIT_NORMAL.out.vcf.join(TIDDIT_TUMOR.out.vcf).map{ meta, vcf_normal, vcf_tumor -> [ meta, [vcf_normal, vcf_tumor] ] }, false)
SVDB_MERGE(TIDDIT_NORMAL.out.vcf.join(TIDDIT_TUMOR.out.vcf, failOnDuplicate: true, failOnMismatch: true).map{ meta, vcf_normal, vcf_tumor -> [ meta, [vcf_normal, vcf_tumor] ] }, false)

vcf = SVDB_MERGE.out.vcf

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC {

FREEC_TUMORONLY(controlfreec_input, fasta, fasta_fai, [], dbsnp, dbsnp_tbi, chr_files, mappability, intervals_bed, [])

ASSESS_SIGNIFICANCE(FREEC_TUMORONLY.out.CNV.join(FREEC_TUMORONLY.out.ratio))
ASSESS_SIGNIFICANCE(FREEC_TUMORONLY.out.CNV.join(FREEC_TUMORONLY.out.ratio, failOnDuplicate: true, failOnMismatch: true))
FREEC2BED(FREEC_TUMORONLY.out.ratio)
FREEC2CIRCOS(FREEC_TUMORONLY.out.ratio)
MAKEGRAPH(FREEC_TUMORONLY.out.ratio.join(FREEC_TUMORONLY.out.BAF))
MAKEGRAPH(FREEC_TUMORONLY.out.ratio.join(FREEC_TUMORONLY.out.BAF, failOnDuplicate: true, failOnMismatch: true))

ch_versions = ch_versions.mix(FREEC_TUMORONLY.out.versions)
ch_versions = ch_versions.mix(ASSESS_SIGNIFICANCE.out.versions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
CALCULATECONTAMINATION(pileup_table.map{ meta, table -> [ meta, table, [] ] })

// Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables
vcf_to_filter = vcf.join(tbi).join(stats).join(LEARNREADORIENTATIONMODEL.out.artifactprior).join(CALCULATECONTAMINATION.out.segmentation).join(CALCULATECONTAMINATION.out.contamination)
vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(CALCULATECONTAMINATION.out.segmentation, failOnDuplicate: true, failOnMismatch: true)
.join(CALCULATECONTAMINATION.out.contamination, failOnDuplicate: true, failOnMismatch: true)
.map{ meta, vcf, tbi, stats, artifactprior, seg, cont -> [ meta, vcf, tbi, stats, artifactprior, seg, cont, [] ] }

FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/cram_merge_index_samtools/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ workflow CRAM_MERGE_INDEX_SAMTOOLS {
INDEX_CRAM(cram_all)

// Join with the crai file
cram_crai = cram_all.join(INDEX_CRAM.out.crai)
cram_crai = cram_all.join(INDEX_CRAM.out.crai, failOnDuplicate: true, failOnMismatch: true)

// Gather versions of all tools used
versions = versions.mix(INDEX_CRAM.out.versions.first())
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/vcf_variant_filtering_gatk/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ workflow VCF_VARIANT_FILTERING_GATK {

CNNSCOREVARIANTS(cnn_in, fasta, fasta_fai, dict, [], [])

FILTERVARIANTTRANCHES(CNNSCOREVARIANTS.out.vcf.join(CNNSCOREVARIANTS.out.tbi).combine(intervals_bed_combined), known_sites, known_sites_tbi, fasta, fasta_fai, dict)
FILTERVARIANTTRANCHES(CNNSCOREVARIANTS.out.vcf.join(CNNSCOREVARIANTS.out.tbi, failOnDuplicate: true, failOnMismatch: true).combine(intervals_bed_combined), known_sites, known_sites_tbi, fasta, fasta_fai, dict)

filtered_vcf = FILTERVARIANTTRANCHES.out.vcf
// add variantcaller to meta map and remove no longer necessary field: num_intervals
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions tests/test_gatk4_spark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
md5sum: 2d29d9e53894dcce96a1b5beb6ef3312
- path: results/multiqc
- path: results/preprocessing/markduplicates/test/test.md.cram
md5sum: 5a0ba5e67e828ed0bcf29ade62a1b2db
md5sum: b1338daf0d2f85c42fdc1bf12764bcae
- path: results/preprocessing/markduplicates/test/test.md.cram.crai
md5sum: 2de7ae39dd18ae332f14d23d32549524
md5sum: 4f249f7b2492c37fd2186b02f7e5b03c
- path: results/preprocessing/recal_table/test/test.recal.table
md5sum: 5b6e5078b4a90f6cb982fa0f0df616c2
- path: results/preprocessing/recalibrated/test/test.recal.cram
md5sum: fa292026b4076705f7cc2d9fd7f4b450
md5sum: b841ee6b350ff4e5e25ebe877794e837
- path: results/preprocessing/recalibrated/test/test.recal.cram.crai
md5sum: 952ab9c5dedcf2105247bc20129b40ee
md5sum: 91b334df15cef7afc73e68bacef8029b
- path: results/reports/fastqc/test-test_L1
- path: results/reports/markduplicates/test/test.md.cram.metrics
# text-based file changes md5sums on reruns
Expand Down Expand Up @@ -62,15 +62,15 @@
- path: results/csv/recalibrated.csv
md5sum: 2d29d9e53894dcce96a1b5beb6ef3312
- path: results/preprocessing/markduplicates/test/test.md.cram
md5sum: 5a0ba5e67e828ed0bcf29ade62a1b2db
md5sum: b1338daf0d2f85c42fdc1bf12764bcae
- path: results/preprocessing/markduplicates/test/test.md.cram.crai
md5sum: 2de7ae39dd18ae332f14d23d32549524
md5sum: 4f249f7b2492c37fd2186b02f7e5b03c
- path: results/preprocessing/recal_table/test/test.recal.table
md5sum: 5b6e5078b4a90f6cb982fa0f0df616c2
- path: results/preprocessing/recalibrated/test/test.recal.cram
md5sum: fa292026b4076705f7cc2d9fd7f4b450
md5sum: b841ee6b350ff4e5e25ebe877794e837
- path: results/preprocessing/recalibrated/test/test.recal.cram.crai
md5sum: 952ab9c5dedcf2105247bc20129b40ee
md5sum: 91b334df15cef7afc73e68bacef8029b
- path: results/multiqc
should_exist: false
- path: results/reports/fastqc
Expand Down
Loading

0 comments on commit b2a8891

Please sign in to comment.