diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 16dc1a445..be4b5d033 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,6 +43,7 @@ jobs: - "mutect2" - "msisensorpro" # - 'save_bam_mapped' + - "variantcalling_channel" - "skip_markduplicates" - "strelka" - "split_fastq" diff --git a/CHANGELOG.md b/CHANGELOG.md index b5b245a7c..02fb7a538 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#507](https://github.com/nf-core/sarek/pull/507), [#537](https://github.com/nf-core/sarek/pull/537) - Subway map for building indexes - [#512](https://github.com/nf-core/sarek/pull/512), [#531](https://github.com/nf-core/sarek/pull/531), [#537](https://github.com/nf-core/sarek/pull/537) - Subway map for pipeline - [#522](https://github.com/nf-core/sarek/pull/522) - Add QC for vcf files & MultiQC +- [#533](https://github.com/nf-core/sarek/pull/533) - Add param `--only_paired_variant_calling` to allow skipping of germline variantcalling for paired samples ### Changed diff --git a/conf/test.config b/conf/test.config index 57884bd79..43010f409 100644 --- a/conf/test.config +++ b/conf/test.config @@ -139,6 +139,13 @@ profiles { use_gatk_spark { params.use_gatk_spark = 'baserecalibrator,markduplicates' } + variantcalling_channels { + params.input = "${baseDir}/tests/csv/3.0/recalibrated.csv" + params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + params.wes = true + params.step = 'variant_calling' + params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + } } //This is apparently useless as it won't overwrite things in the modules.config diff --git a/nextflow.config b/nextflow.config index 638b60ef9..8dbbb6993 100644 --- a/nextflow.config +++ b/nextflow.config @@ -51,6 +51,7 @@ params { sequencing_center = null // No sequencing center to be written in BAM header by aligner // Variant Calling + only_paired_variant_calling = false //if true, skips germline variant calling for normal-paired samples ploidy = 2 //null (in ascat, test this works) // Use default value, you can use 2,3,4 ascat_purity = null // Use default value cf_coeff = 0.05 // default value for Control-FREEC diff --git a/nextflow_schema.json b/nextflow_schema.json index 0f4f600dc..5995db9b7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -232,6 +232,11 @@ "default": "", "fa_icon": "fas fa-toolbox", "properties": { + "only_paired_variant_calling": { + "type": "boolean", + "fa_icon": "fas fa-angle-double-right", + "description": "If true, skips germline variant calling for matched normal to tumor sample. Normal samples without matched tumor will still be processed through germline variant calling tools." + }, "ploidy": { "type": "number", "fa_icon": "fas fa-bacon", diff --git a/tests/test_only_paired_VC.yml b/tests/test_only_paired_VC.yml new file mode 100644 index 000000000..6c6740738 --- /dev/null +++ b/tests/test_only_paired_VC.yml @@ -0,0 +1,53 @@ +- name: Skip variant calling on matched normal + command: nextflow run main.nf -profile test,variantcalling_channels,docker --tools strelka --only_paired_variant_calling + tags: + - somatic + - strelka + - variant_calling + - variantcalling_channel + files: + - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz + - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz.tbi + - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz + - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz.tbi + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz + should_exist: false + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz.tbi + should_exist: false + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz + should_exist: false + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz.tbi + should_exist: false + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz.tbi + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz.tbi + +- name: Do germline variant calling on matched normal + command: nextflow run main.nf -profile test,variantcalling_channels,docker --tools strelka + tags: + - somatic + - strelka + - variant_calling + - variantcalling_channel + files: + - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz + - path: results/variant_calling/sample1/strelka/sample1.variants.vcf.gz.tbi + - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz + - path: results/variant_calling/sample1/strelka/sample1.genome.vcf.gz.tbi + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.variants.vcf.gz.tbi + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz + - path: results/variant_calling/sample3/strelka/sample3.genome.vcf.gz.tbi + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.variants.vcf.gz.tbi + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz + - path: results/variant_calling/sample2/strelka/sample2.genome.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_indels.vcf.gz.tbi + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz + - path: results/variant_calling/sample4_vs_sample3/strelka/sample4_vs_sample3.somatic_snvs.vcf.gz.tbi diff --git a/workflows/sarek.nf b/workflows/sarek.nf index eb34bcc8b..d793c6f76 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -627,6 +627,22 @@ workflow SAREK { // and remove patient ID field & null value for further processing [meta1, [cram1,crai1]] [meta2, [cram2,crai2]] cram_variant_calling_tumor_only = cram_variant_calling_tumor_filtered.transpose().map{ it -> [it[1], it[2], it[3]] } + if(params.only_paired_variant_calling){ + // Normal only samples + + // 1. Join with tumor samples, in each channel there is one key per patient now. Patients without matched tumor end up with: [patient1, [meta1], [cram1,crai1], null] as there is only one matched normal possible + cram_variant_calling_normal_joined = cram_variant_calling_normal_to_cross.join(cram_variant_calling_tumor_grouped, remainder: true) + + // 2. Filter out entries with last entry null + cram_variant_calling_normal_filtered = cram_variant_calling_normal_joined.filter{ it -> !(it.last()) } + + // 3. Remove patient ID field & null value for further processing [meta1, [cram1,crai1]] [meta2, [cram2,crai2]] (no transposing needed since only one normal per patient ID) + cram_variant_calling_status_normal = cram_variant_calling_normal_filtered.map{ it -> [it[1], it[2], it[3]] } + + }else{ + cram_variant_calling_status_normal = cram_variant_calling_status.normal + } + // Tumor - normal pairs // Use cross to combine normal with all tumor samples, i.e. multi tumor samples from recurrences cram_variant_calling_pair = cram_variant_calling_normal_to_cross.cross(cram_variant_calling_pair_to_cross) @@ -643,7 +659,7 @@ workflow SAREK { // GERMLINE VARIANT CALLING GERMLINE_VARIANT_CALLING( - cram_variant_calling_status.normal, + cram_variant_calling_status_normal, dbsnp, dbsnp_tbi, dict,