diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc90054c88..60d93f7426 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
+- [#1193](https://github.com/nf-core/sarek/pull/1193) - Adding support for Sentieon's DnaScope for germline variant-calling including joint-germline.
- [#1271](https://github.com/nf-core/sarek/pull/1271) - Back to dev
### Changed
diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config
index 66eb2041d2..85367196c2 100644
--- a/conf/modules/prepare_genome.config
+++ b/conf/modules/prepare_genome.config
@@ -76,7 +76,7 @@ process {
}
withName: 'TABIX_DBSNP' {
- ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('mutect2'))) }
+ ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope') || params.tools.split(',').contains('mutect2'))) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
@@ -96,7 +96,7 @@ process {
}
withName: 'TABIX_KNOWN_INDELS' {
- ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) }
+ ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope'))) ) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
@@ -106,7 +106,7 @@ process {
}
withName: 'TABIX_KNOWN_SNPS' {
- ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) }
+ ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') )) ) }
publishDir = [
enabled: (params.save_reference || params.build_only_index),
mode: params.publish_dir_mode,
diff --git a/conf/modules/sentieon_dnascope.config b/conf/modules/sentieon_dnascope.config
new file mode 100644
index 0000000000..fa431ae417
--- /dev/null
+++ b/conf/modules/sentieon_dnascope.config
@@ -0,0 +1,68 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = When to run the module.
+----------------------------------------------------------------------------------------
+*/
+
+// SENTIEON DNASCOPE
+
+process {
+
+ withName: 'SENTIEON_DNASCOPE' {
+ ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.dnascope" : "${meta.id}.dnascope.${intervals.simpleName}" }
+ ext.when = { params.tools && params.tools.split(',').contains('sentieon_dnascope') }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/"},
+ pattern: "*{vcf.gz,vcf.gz.tbi}",
+ saveAs: { meta.num_intervals > 1 ? null : "sentieon_dnascope/${meta.id}/${it}" }
+ ]
+ }
+
+ withName: 'MERGE_SENTIEON_DNASCOPE_VCFS' {
+ ext.prefix = { params.joint_germline ? "${meta.id}.dnascope.g" : "${meta.id}.dnascope.unfiltered" }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/" },
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ withName: 'MERGE_SENTIEON_DNASCOPE_GVCFS' {
+ ext.prefix = { "${meta.id}.dnascope.g" }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/" },
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ if (params.tools && params.tools.contains('sentieon_dnascope')) {
+ withName: '.*FILTERVARIANTTRANCHES' {
+ ext.prefix = {"${meta.id}.dnascope"}
+ ext.args = { "--info-key CNN_1D" }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/"},
+ pattern: "*{vcf.gz,vcf.gz.tbi}"
+ ]
+ }
+ }
+
+ withName: 'SENTIEON_DNAMODELAPPLY' {
+ ext.prefix = {"${meta.id}.dnascope.filtered"}
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/"},
+ pattern: "*{vcf.gz,vcf.gz.tbi}"
+ ]
+ }
+
+}
diff --git a/conf/modules/sentieon_dnascope_joint_germline.config b/conf/modules/sentieon_dnascope_joint_germline.config
new file mode 100644
index 0000000000..72dd6c3144
--- /dev/null
+++ b/conf/modules/sentieon_dnascope_joint_germline.config
@@ -0,0 +1,45 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = When to run the module.
+----------------------------------------------------------------------------------------
+*/
+
+// SENTIEON DNASCOPE JOINT_GERMLINE
+
+process {
+
+ // TO-DO: duplicate!!
+ withName: 'SENTIEON_GVCFTYPER' {
+ ext.args = { "--allow-old-rms-mapping-quality-annotation-data" }
+ ext.prefix = { meta.intervals_name }
+ publishDir = [
+ enabled: false
+ ]
+ }
+
+ if (params.tools && params.tools.contains('sentieon_dnascope') && params.joint_germline) {
+ withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:BCFTOOLS_SORT' {
+ ext.prefix = { vcf.baseName - ".vcf" + ".sort" }
+ publishDir = [
+ enabled: false
+ ]
+ }
+
+ withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_GENOTYPEGVCFS' {
+ ext.prefix = "joint_germline"
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/sentieon_dnascope/joint_variant_calling/" },
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ pattern: "*{vcf.gz,vcf.gz.tbi}"
+ ]
+ }
+ }
+}
diff --git a/conf/modules/sentieon_joint_germline.config b/conf/modules/sentieon_haplotyper_joint_germline.config
similarity index 100%
rename from conf/modules/sentieon_joint_germline.config
rename to conf/modules/sentieon_haplotyper_joint_germline.config
diff --git a/docs/output.md b/docs/output.md
index b250ed2817..b03f4c11d0 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -37,8 +37,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [GATK Germline Single Sample Variant Calling](#gatk-germline-single-sample-variant-calling)
- [GATK Joint Germline Variant Calling](#gatk-joint-germline-variant-calling)
- [GATK Mutect2](#gatk-mutect2)
+ - [Sentieon DNAscope](#sentieon-dnascope)
+ - [Sentieon DNAscope joint germline variant calling](#sentieon-dnascope-joint-germline-variant-calling)
- [Sentieon Haplotyper](#sentieon-haplotyper)
- - [Sentieon Joint Germline Variant Calling](#sentieon-joint-germline-variant-calling)
+ - [Sentieon Haplotyper joint germline variant calling](#sentieon-haplotyper-joint-germline-variant-calling)
- [Strelka2](#strelka2)
- [Structural Variants](#structural-variants)
- [Manta](#manta)
@@ -442,6 +444,53 @@ Files created:
+#### Sentieon DNAscope
+
+[Sentieon DNAscope](https://support.sentieon.com/appnotes/dnascope_ml/#dnascope-germline-variant-calling-with-a-machine-learning-model) is a variant-caller which aims at outperforming GATK's Haplotypecaller in terms of both speed and accuracy. DNAscope allows you to use a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering.
+
+
+Unfiltered VCF-files for normal samples
+
+**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`**
+
+- `.dnascope.unfiltered.vcf.gz` and `.dnascope.unfiltered.vcf.gz.tbi`
+ - VCF with tabix index
+
+
+
+The output from Sentieon's DNAscope can be controlled through the option `--sentieon_dnascope_emit_mode` for Sarek, see [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions).
+
+Unless `dnascope_filter` is listed under `--skip_tools` in the nextflow command, Sentieon's [DNAModelApply](https://support.sentieon.com/manual/usages/general/#dnamodelapply-algorithm) is applied to the unfiltered VCF-files in order to obtain filtered VCF-files.
+
+
+Filtered VCF-files for normal samples
+
+**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`**
+
+- `.dnascope.filtered.vcf.gz` and `.dnascope.filtered.vcf.gz.tbi`
+ - VCF with tabix index
+
+
+
+##### Sentieon DNAscope joint germline variant calling
+
+In Sentieon's package DNAscope, joint germline variant calling is done by first running Sentieon's Dnacope in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAscope.
+
+
+Output files from joint germline variant calling
+
+**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`**
+
+- `.dnascope.g.vcf.gz` and `.dnascope.g.vcf.gz.tbi`
+ - VCF with tabix index
+
+**Output directory: `{outdir}/variantcalling/sentieon_dnascope/joint_variant_calling/`**
+
+- `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi`
+ - VCF with tabix index
+
+
+
#### Sentieon Haplotyper
[Sentieon Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) is Sention's speedup version of GATK's Haplotypecaller (see above).
@@ -456,7 +505,7 @@ Files created:
-The output from Sentieon's Haplotyper can be controlled through the option `--sentieon_haplotyper_emit_mode` for Sarek, see [Basic usage of Sentieon functions in Sarek](#basic-usage-of-sentieon-functions-in-sarek).
+The output from Sentieon's Haplotyper can be controlled through the option `--sentieon_haplotyper_emit_mode` for Sarek, see [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions).
Unless `haplotyper_filter` is listed under `--skip_tools` in the nextflow command, GATK's CNNScoreVariants and FilterVariantTranches (see above) is applied to the unfiltered VCF-files in order to obtain filtered VCF-files.
@@ -470,16 +519,16 @@ Unless `haplotyper_filter` is listed under `--skip_tools` in the nextflow comman
-##### Sentieon Joint Germline Variant Calling
+##### Sentieon Haplotyper joint germline variant calling
-In Sentieon's package DNAseq, joint germline variant calling is done by first running Sentieon's Haplotyper in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions in Sarek](#basic-usage-of-sentieon-functions-in-sarek) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAseq. After joint genotyping, Sentieon's version of VQSR ([VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) and [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm)) is applied for filtering to produce the final multisample callset with the desired balance of precision and sensitivity.
+In Sentieon's package DNAseq, joint germline variant calling is done by first running Sentieon's Haplotyper in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAseq. After joint genotyping, Sentieon's version of VQSR ([VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) and [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm)) is applied for filtering to produce the final multisample callset with the desired balance of precision and sensitivity.
Output files from joint germline variant calling
**Output directory: `{outdir}/variantcalling/sentieon_haplotyper//`**
-- `.haplotypecaller.g.vcf.gz` and `.haplotypecaller.g.vcf.gz.tbi`
+- `.haplotyper.g.vcf.gz` and `.haplotyper.g.vcf.gz.tbi`
- VCF with tabix index
**Output directory: `{outdir}/variantcalling/sentieon_haplotyper/joint_variant_calling/`**
diff --git a/docs/usage.md b/docs/usage.md
index 0895fac23c..3b2767b302 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -1083,7 +1083,9 @@ nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat
### Available Sentieon functions
-Sarek contains the following Sentieon functions [bwa mem](https://support.sentieon.com/manual/usages/general/#bwa-mem-syntax), [LocusCollector](https://support.sentieon.com/manual/usages/general/#locuscollector-algorithm) + [Dedup](https://support.sentieon.com/manual/usages/general/#dedup-algorithm), [Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm), [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) and [VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) + [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm), so the basic processing of alignment of fastq-files to VCF-files can be done using speedup Sentieon functions.
+Sarek contains the following Sentieon functions from [DnaSeq](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/) : [bwa mem](https://support.sentieon.com/manual/usages/general/#bwa-mem-syntax), [LocusCollector](https://support.sentieon.com/manual/usages/general/#locuscollector-algorithm) + [Dedup](https://support.sentieon.com/manual/usages/general/#dedup-algorithm), [Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm), [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) and [VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) + [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm), so the basic processing of alignment of fastq-files to VCF-files can be done using speedup Sentieon functions.
+
+Sarek also contains the Sentieon functions [DnaScope](https://support.sentieon.com/manual/usages/general/?highlight=dnamodelapply#dnascope-algorithm) and [DNAModelApply](https://support.sentieon.com/manual/usages/general/?highlight=dnamodelapply#dnamodelapply-algorithm).
### Basic usage of Sentieon functions
@@ -1091,9 +1093,11 @@ To use Sentieon's aligner `bwa mem`, set the aligner option `sentieon-bwamem`. (
To use Sentieon's function `Dedup`, specify `sentieon_dedup` as one of the tools. (This can, for example, be done by adding `--tools sentieon_dedup` to the nextflow run command.)
-To use Sentieon's function `Haplotyper`, specify `sentieon_haplotyper` as one of the tools. This can, for example, be done by adding `--tools sentieon_haplotyper` to the nextflow run command. In order to skip the GATK-based variant-filter, one may add `--skip_tools haplotyper_filter` to the nextflow run command. Sarek also provides the option `sentieon_haplotyper_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) of Sentieon's haplotyper. Sentieon's haplotyper can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_haplotyper_emit_mode` to `,gvcf`, where `` is `variant`, `confident` or `all`.
+To use Sentieon's function `DNAscope`, specify `sentieon_dnascope` as one of the tools. This can, for example, be done by adding `--tools sentieon_dnascope` to the nextflow run command. In order to skip Sentieon's variant-filter `DNAModelApply`, one may add `--skip_tools dnascope_filter` to the nextflow run command. Sarek also provides the option `sentieon_dnascope_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#dnascope-algorithm) of Sentieon's dnascope. Sentieon's dnascope can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_dnascope_emit_mode` to `,gvcf`, where `` is `variant`, `confident` or `all`.
+
+Sentieon's function `Haplotyper` is used in much the same way as `DNAscope`. To use Sentieon's function `Haplotyper`, specify `sentieon_haplotyper` as one of the tools. This can, for example, be done by adding `--tools sentieon_haplotyper` to the nextflow run command. In order to skip the GATK-based variant-filter, one may add `--skip_tools haplotyper_filter` to the nextflow run command. Sarek also provides the option `sentieon_haplotyper_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) of Sentieon's haplotyper. Sentieon's haplotyper can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_haplotyper_emit_mode` to `,gvcf`, where `` is `variant`, `confident` or `all`.
-To use Sentieon's function `GVCFtyper` along with Sention's version of VQSR (`VarCal` and `ApplyVarCal`) for joint-germline genotyping, specify `sentieon_haplotyper` as one of the tools, set the option `sentieon_haplotyper_emit_mode` to `gvcf`, and add the option `joint_germline`. This can, for example, be done by adding `--tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf` to the nextflow run command.
+To use Sentieon's function `GVCFtyper` along with Sention's version of VQSR (`VarCal` and `ApplyVarCal`) for joint-germline genotyping, specify `sentieon_haplotyper` as one of the tools, set the option `sentieon_haplotyper_emit_mode` to `gvcf`, and add the option `joint_germline`. This can, for example, be done by adding `--tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf` to the nextflow run command. If `sentieon_dnascope` is chosen instead of `sentieon_haplotyper`, then Sention's version of VQSR is skipped, as recommended by Sentieon.
### Joint germline variant calling
diff --git a/modules.json b/modules.json
index 84b30a76dc..06ff0b6dab 100644
--- a/modules.json
+++ b/modules.json
@@ -388,6 +388,16 @@
"git_sha": "915a0b16ba3e40ef59e7b44843b3118e17a9c906",
"installed_by": ["modules"]
},
+ "sentieon/dnamodelapply": {
+ "branch": "master",
+ "git_sha": "43ef68091a1188fd8dc4c03f9341b556803c7514",
+ "installed_by": ["modules"]
+ },
+ "sentieon/dnascope": {
+ "branch": "master",
+ "git_sha": "4fb6fdc8046ec09cd30f92a2a252e9a0ba4a6309",
+ "installed_by": ["modules"]
+ },
"sentieon/gvcftyper": {
"branch": "master",
"git_sha": "6c9c11ee96796e53a01b4719286acce6af14bc3a",
diff --git a/modules/nf-core/sentieon/dnamodelapply/main.nf b/modules/nf-core/sentieon/dnamodelapply/main.nf
new file mode 100644
index 0000000000..3fe9a28f19
--- /dev/null
+++ b/modules/nf-core/sentieon/dnamodelapply/main.nf
@@ -0,0 +1,81 @@
+process SENTIEON_DNAMODELAPPLY {
+ tag "$meta.id"
+ label 'process_high'
+ label 'sentieon'
+
+ secret 'SENTIEON_LICENSE_BASE64'
+
+ container 'nf-core/sentieon:202112.06'
+
+ input:
+ tuple val(meta), path(vcf), path(idx)
+ tuple val(meta2), path(fasta)
+ tuple val(meta3), path(fai)
+ tuple val(meta4), path(ml_model)
+
+ output:
+ tuple val(meta), path("*.vcf.gz") , emit: vcf
+ tuple val(meta), path("*.vcf.gz.tbi"), emit: index
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: ''
+ def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: ''
+
+ """
+ if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url.
+ export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d)
+ else # Localhost license file
+ # The license file is stored as a nextflow variable like, for instance, this:
+ # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0)
+ export SENTIEON_LICENSE=\$(mktemp)
+ echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE
+ fi
+
+ if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then
+ # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license.
+ export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d)
+ export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d)
+ echo "Decoded and exported Sentieon test-license system environment variables"
+ fi
+
+ sentieon driver \\
+ -t $task.cpus \\
+ -r $fasta \\
+ $args \\
+ --algo DNAModelApply \\
+ --model $ml_model \\
+ -v $vcf \\
+ ${prefix}.vcf.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g")
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+ """
+ touch ${prefix}.vcf.gz
+ touch ${prefix}.vcf.gz.tbi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/sentieon/dnamodelapply/meta.yml b/modules/nf-core/sentieon/dnamodelapply/meta.yml
new file mode 100644
index 0000000000..ec429bea21
--- /dev/null
+++ b/modules/nf-core/sentieon/dnamodelapply/meta.yml
@@ -0,0 +1,78 @@
+name: "sentieon_dnamodelapply"
+description: modifies the input VCF file by adding the MLrejected FILTER to the variants
+keywords:
+ - dnamodelapply
+ - vcf
+ - filter
+ - sentieon
+tools:
+ - sentieon:
+ description: |
+ Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads.
+ Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system.
+ homepage: https://www.sentieon.com/
+ documentation: https://www.sentieon.com/
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. `[ id:'test' ]`
+ - meta3:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. `[ id:'test' ]`
+ - meta4:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. `[ id:'test' ]`
+ - vcf:
+ type: file
+ description: INPUT VCF file
+ pattern: "*.{vcf,vcf.gz}"
+ - idx:
+ type: file
+ description: Index of the input VCF file
+ pattern: "*.{tbi}"
+ - fasta:
+ type: file
+ description: Genome fasta file
+ pattern: "*.{fa,fasta}"
+ - fai:
+ type: file
+ description: Index of the genome fasta file
+ pattern: "*.fai"
+ - ml_model:
+ type: file
+ description: machine learning model file
+ pattern: "*.model"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: INPUT VCF file
+ pattern: "*.{vcf,vcf.gz}"
+ - index:
+ type: file
+ description: Index of the input VCF file
+ pattern: "*.{tbi}"
+
+authors:
+ - "@ramprasadn"
diff --git a/modules/nf-core/sentieon/dnascope/main.nf b/modules/nf-core/sentieon/dnascope/main.nf
new file mode 100644
index 0000000000..6be42a1728
--- /dev/null
+++ b/modules/nf-core/sentieon/dnascope/main.nf
@@ -0,0 +1,100 @@
+process SENTIEON_DNASCOPE {
+ tag "$meta.id"
+ label 'process_high'
+ label 'sentieon'
+
+ secret 'SENTIEON_LICENSE_BASE64'
+
+ container 'nf-core/sentieon:202112.06'
+
+ input:
+ tuple val(meta), path(bam), path(bai), path(intervals)
+ tuple val(meta2), path(fasta)
+ tuple val(meta3), path(fai)
+ tuple val(meta4), path(dbsnp)
+ tuple val(meta5), path(dbsnp_tbi)
+ tuple val(meta6), path(ml_model)
+ val(pcr_indel_model)
+ val(emit_vcf)
+ val(emit_gvcf)
+
+ output:
+ tuple val(meta), path("*.unfiltered.vcf.gz") , optional:true, emit: vcf // added the substring ".unfiltered" in the filename of the vcf-files since without that the g.vcf.gz-files were ending up in the vcf-channel
+ tuple val(meta), path("*.unfiltered.vcf.gz.tbi"), optional:true, emit: vcf_tbi
+ tuple val(meta), path("*.g.vcf.gz") , optional:true, emit: gvcf // these output-files have to have the extension ".vcf.gz", otherwise the subsequent GATK-MergeVCFs will fail.
+ tuple val(meta), path("*.g.vcf.gz.tbi") , optional:true, emit: gvcf_tbi
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+ def args = task.ext.args ?: '' // options for the driver
+ def args2 = task.ext.args2 ?: '' // options for the vcf generation
+ def args3 = task.ext.args3 ?: '' // options for the gvcf generation
+ def interval = intervals ? "--interval ${intervals}" : ''
+ def dbsnp_cmd = dbsnp ? "-d ${dbsnp}" : ''
+ def model_cmd = ml_model ? " --model ${ml_model}" : ''
+ def pcr_indel_model_cmd = pcr_indel_model ? " --pcr_indel_model ${pcr_indel_model}" : ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: ''
+ def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: ''
+ def vcf_cmd = ""
+ def gvcf_cmd = ""
+ def base_cmd = '--algo DNAscope ' + dbsnp_cmd + ' '
+
+ if (emit_vcf) { // emit_vcf can be the empty string, 'variant', 'confident' or 'all' but NOT 'gvcf'
+ vcf_cmd = base_cmd + args2 + ' ' + model_cmd + pcr_indel_model_cmd + ' --emit_mode ' + emit_vcf + ' ' + prefix + '.unfiltered.vcf.gz'
+ }
+
+ if (emit_gvcf) { // emit_gvcf can be either true or false
+ gvcf_cmd = base_cmd + args3 + ' ' + model_cmd + pcr_indel_model_cmd + ' --emit_mode gvcf ' + prefix + '.g.vcf.gz'
+ }
+
+ """
+ if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url.
+ export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d)
+ else # Localhost license file
+ # The license file is stored as a nextflow variable like, for instance, this:
+ # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0)
+ export SENTIEON_LICENSE=\$(mktemp)
+ echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE
+ fi
+
+ if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then
+ # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license.
+ export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d)
+ export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d)
+ echo "Decoded and exported Sentieon test-license system environment variables"
+ fi
+
+ sentieon driver $args -r $fasta -t $task.cpus -i $bam $interval $vcf_cmd $gvcf_cmd
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g")
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+ """
+ touch ${prefix}.unfiltered.vcf.gz
+ touch ${prefix}.unfiltered.vcf.gz.tbi
+ touch ${prefix}.g.vcf.gz
+ touch ${prefix}.g.vcf.gz.tbi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/sentieon/dnascope/meta.yml b/modules/nf-core/sentieon/dnascope/meta.yml
new file mode 100644
index 0000000000..34e0b97b4c
--- /dev/null
+++ b/modules/nf-core/sentieon/dnascope/meta.yml
@@ -0,0 +1,119 @@
+name: sentieon_dnascope
+description: DNAscope algorithm performs an improved version of Haplotype variant calling.
+keywords:
+ - dnascope
+ - sentieon
+ - variant_calling
+tools:
+ - sentieon:
+ description: |
+ Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads.
+ Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system.
+ homepage: https://www.sentieon.com/
+ documentation: https://www.sentieon.com/
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information.
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: BAM file.
+ pattern: "*.bam"
+ - bai:
+ type: file
+ description: BAI file
+ pattern: "*.bai"
+ - intervals:
+ type: file
+ description: bed or interval_list file containing interval in the reference that will be used in the analysis
+ pattern: "*.{bed,interval_list}"
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing meta information for fasta.
+ - fasta:
+ type: file
+ description: Genome fasta file
+ pattern: "*.{fa,fasta}"
+ - meta3:
+ type: map
+ description: |
+ Groovy Map containing meta information for fasta index.
+ - fai:
+ type: file
+ description: Index of the genome fasta file
+ pattern: "*.fai"
+ - meta4:
+ type: map
+ description: |
+ Groovy Map containing meta information for dbsnp.
+ - dbsnp:
+ type: file
+ description: Single Nucleotide Polymorphism database (dbSNP) file
+ pattern: "*.vcf.gz"
+ - meta5:
+ type: map
+ description: |
+ Groovy Map containing meta information for dbsnp_tbi.
+ - dbsnp_tbi:
+ type: file
+ description: Index of the Single Nucleotide Polymorphism database (dbSNP) file
+ pattern: "*.vcf.gz.tbi"
+ - meta6:
+ type: map
+ description: |
+ Groovy Map containing meta information for machine learning model for Dnascope.
+ - ml_model:
+ type: file
+ description: machine learning model file
+ pattern: "*.model"
+ - ml_model:
+ type: file
+ description: machine learning model file
+ pattern: "*.model"
+ - pcr_indel_model:
+ type: string
+ description: |
+ Controls the option pcr_indel_model for Dnascope.
+ The possible options are "NONE" (used for PCR free samples), and "HOSTILE", "AGGRESSIVE" and "CONSERVATIVE".
+ See Sentieons documentation for further explanation.
+ - emit_vcf:
+ type: string
+ description: |
+ Controls the vcf output from Dnascope.
+ Possible options are "all", "confident" and "variant".
+ See Sentieons documentation for further explanation.
+ - emit_gvcf:
+ type: boolean
+ description: If true, the haplotyper will output a gvcf
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information.
+ e.g. [ id:'test', single_end:false ]
+ - vcf:
+ type: file
+ description: Compressed VCF file
+ pattern: "*.unfiltered.vcf.gz"
+ - vcf_tbi:
+ type: file
+ description: Index of VCF file
+ pattern: "*.unfiltered.vcf.gz.tbi"
+ - gvcf:
+ type: file
+ description: Compressed GVCF file
+ pattern: "*.g.vcf.gz"
+ - gvcf_tbi:
+ type: file
+ description: Index of GVCF file
+ pattern: "*.g.vcf.gz.tbi"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+
+authors:
+ - "@ramprasadn"
diff --git a/nextflow.config b/nextflow.config
index ccf3f5068c..56cab933a6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -51,26 +51,29 @@ params {
seq_platform = 'ILLUMINA' // Default platform written in read group PL field by aligner
// Variant Calling
- only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired samples
- ascat_ploidy = null // default value for ASCAT
- ascat_min_base_qual = 20 // default value for ASCAT
- ascat_min_counts = 10 // default value for ASCAT
- ascat_min_map_qual = 35 // default value for ASCAT
- ascat_purity = null // default value for ASCAT
- cf_ploidy = "2" // default value for Control-FREEC
- cf_coeff = 0.05 // default value for Control-FREEC
- cf_contamination = 0 // default value for Control-FREEC
- cf_contamination_adjustment = false // by default we are not using this in Control-FREEC
- cf_mincov = 0 // ControlFreec default values
- cf_minqual = 0 // ControlFreec default values
- cf_window = null // by default we are not using this in Control-FREEC
- cnvkit_reference = null // by default the reference is build from the fasta file
- concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files
- ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
- wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers
- joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected
- joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling
- sentieon_haplotyper_emit_mode = "variant" // default value for Sentieon haplotyper
+ ascat_ploidy = null // default value for ASCAT
+ ascat_min_base_qual = 20 // default value for ASCAT
+ ascat_min_counts = 10 // default value for ASCAT
+ ascat_min_map_qual = 35 // default value for ASCAT
+ ascat_purity = null // default value for ASCAT
+ cf_ploidy = "2" // default value for Control-FREEC
+ cf_coeff = 0.05 // default value for Control-FREEC
+ cf_contamination = 0 // default value for Control-FREEC
+ cf_contamination_adjustment = false // by default we are not using this in Control-FREEC
+ cf_mincov = 0 // ControlFreec default values
+ cf_minqual = 0 // ControlFreec default values
+ cf_window = null // by default we are not using this in Control-FREEC
+ cnvkit_reference = null // by default the reference is build from the fasta file
+ concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files
+ ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
+ joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected
+ joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling
+ only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample
+ sentieon_dnascope_emit_mode = "variant" // default value for Sentieon dnascope
+ sentieon_dnascope_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.1.model"
+ sentieon_dnascope_pcr_indel_model = "CONSERVATIVE"
+ sentieon_haplotyper_emit_mode = "variant" // default value for Sentieon haplotyper
+ wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers
// Annotation
dbnsfp = null // No dbnsfp processed file
@@ -377,8 +380,10 @@ includeConfig 'conf/modules/manta.config'
includeConfig 'conf/modules/mpileup.config'
includeConfig 'conf/modules/msisensorpro.config'
includeConfig 'conf/modules/mutect2.config'
+includeConfig 'conf/modules/sentieon_dnascope.config'
+includeConfig 'conf/modules/sentieon_dnascope_joint_germline.config'
includeConfig 'conf/modules/sentieon_haplotyper.config'
-includeConfig 'conf/modules/sentieon_joint_germline.config'
+includeConfig 'conf/modules/sentieon_haplotyper_joint_germline.config'
includeConfig 'conf/modules/strelka.config'
includeConfig 'conf/modules/tiddit.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 51850b6fa9..68c6b77146 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -100,14 +100,14 @@
"fa_icon": "fas fa-toolbox",
"description": "Tools to use for duplicate marking, variant calling and/or for annotation.",
"help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
- "pattern": "^((ascat|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? **NB** `--skip_tools baserecalibrator_report` is actually just not saving the reports.\n> **NB** `--skip_tools markduplicates_report` does not skip `MarkDuplicates` but prevent the collection of duplicate metrics that slows down performance.",
- "pattern": "^((baserecalibrator|baserecalibrator_report|bcftools|documentation|fastqc|haplotypecaller_filter|haplotyper_filter|markduplicates|markduplicates_report|mosdepth|multiqc|samtools|vcftools|versions)?,?)*(? [ meta.subMap('num_intervals') + [ id:'joint_variant_calling', patient:'all_samples', variantcaller:'sentieon_haplotyper' ], vcf ]}.groupTuple()
+ gvcf_to_merge = BCFTOOLS_SORT.out.vcf.map{ meta, vcf -> [ meta.subMap('num_intervals') + [ id:'joint_variant_calling', patient:'all_samples', variantcaller:variant_caller ], vcf ]}.groupTuple()
// Merge scatter/gather vcfs & index
// Rework meta for variantscalled.csv and annotation tools
MERGE_GENOTYPEGVCFS(gvcf_to_merge, dict)
- vqsr_input = MERGE_GENOTYPEGVCFS.out.vcf.join(MERGE_GENOTYPEGVCFS.out.tbi, failOnDuplicate: true)
- indels_resource_label = known_indels_vqsr.mix(dbsnp_vqsr).collect()
- snps_resource_label = known_snps_vqsr.mix(dbsnp_vqsr).collect()
-
- // Recalibrate INDELs and SNPs separately
- SENTIEON_VARCAL_INDEL(
- vqsr_input,
- resource_indels_vcf,
- resource_indels_tbi,
- indels_resource_label,
- fasta,
- fai)
-
- SENTIEON_VARCAL_SNP(
- vqsr_input,
- resource_snps_vcf,
- resource_snps_tbi,
- snps_resource_label,
- fasta,
- fai)
-
- //Prepare SNPs and INDELs for Sentieon's applyvarcal
- // Step 1. : applyvarcal to SNPs
- // Step 2. : Use SENTIEON_APPLYVARCAL_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html
-
- // Join results of variant recalibration into a single channel tuple
- // Rework meta for variantscalled.csv and annotation tools
- vqsr_input_snp = vqsr_input.join(SENTIEON_VARCAL_SNP.out.recal, failOnDuplicate: true)
- .join(SENTIEON_VARCAL_SNP.out.idx, failOnDuplicate: true)
- .join(SENTIEON_VARCAL_SNP.out.tranches, failOnDuplicate: true)
- .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
-
- SENTIEON_APPLYVARCAL_SNP(
- vqsr_input_snp,
- fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] },
- fai.map{ fai -> [ [ id:fai.baseName ], fai ] })
-
- // Join results of SENTIEON_APPLYVARCAL_SNP and use as input for SENTIEON_APPLYVARCAL_INDEL to avoid duplicate entries in the result
- // Rework meta for variantscalled.csv and annotation tools
- vqsr_input_indel = SENTIEON_APPLYVARCAL_SNP.out.vcf.join(SENTIEON_APPLYVARCAL_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]}
- .join(SENTIEON_VARCAL_INDEL.out.recal, failOnDuplicate: true)
- .join(SENTIEON_VARCAL_INDEL.out.idx, failOnDuplicate: true)
- .join(SENTIEON_VARCAL_INDEL.out.tranches, failOnDuplicate: true)
- .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
-
- SENTIEON_APPLYVARCAL_INDEL(
- vqsr_input_indel,
- fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] },
- fai.map{ fai -> [ [ id:fai.baseName ], fai ] })
-
- // The following is an ugly monster to achieve the following:
- // When MERGE_GENOTYPEGVCFS and SENTIEON_APPLYVARCAL are run, then use output from SENTIEON_APPLYVARCAL
- // When MERGE_GENOTYPEGVCFS and NOT SENTIEON_APPLYVARCAL, then use the output from MERGE_GENOTYPEGVCFS
-
- merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
- merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}
-
- // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements
- vqsr_vcf_for_join = SENTIEON_APPLYVARCAL_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
- vqsr_tbi_for_join = SENTIEON_APPLYVARCAL_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}
-
- // Join on metamap
- // If both --> meta, vcf_merged, vcf_bqsr
- // If not VQSR --> meta, vcf_merged, []
- // if the second is empty, use the first
- genotype_vcf = merge_vcf_for_join.join(vqsr_vcf_for_join, remainder: true).map{
- meta, joint_vcf, recal_vcf ->
-
- vcf_out = recal_vcf ?: joint_vcf
-
- [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], vcf_out]
- }
-
- genotype_index = merge_tbi_for_join.join(vqsr_tbi_for_join, remainder: true).map{
- meta, joint_tbi, recal_tbi ->
-
- tbi_out = recal_tbi ?: joint_tbi
- [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], tbi_out]
+ merged_vcf = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
+ merged_tbi = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}
+
+ if (variant_caller == 'sentieon_dnascope') {
+ // As advised by Don Freed (Sentieon), VQSR is skipped for DnaScope
+ genotype_vcf = merged_vcf.map{
+ meta, vcf -> [ meta + [ patient:"all_samples", variantcaller:'sentieon_dnascope'], vcf ]
+ }
+ genotype_index = merged_tbi.map{
+ meta, tbi -> [ meta + [ patient:"all_samples", variantcaller:'sentieon_dnascope'], tbi ]
+ }
+ } else {
+ vqsr_input = MERGE_GENOTYPEGVCFS.out.vcf.join(MERGE_GENOTYPEGVCFS.out.tbi, failOnDuplicate: true)
+ indels_resource_label = known_indels_vqsr.mix(dbsnp_vqsr).collect()
+ snps_resource_label = known_snps_vqsr.mix(dbsnp_vqsr).collect()
+
+ // Recalibrate INDELs and SNPs separately
+ SENTIEON_VARCAL_INDEL(
+ vqsr_input,
+ resource_indels_vcf,
+ resource_indels_tbi,
+ indels_resource_label,
+ fasta,
+ fai)
+
+ SENTIEON_VARCAL_SNP(
+ vqsr_input,
+ resource_snps_vcf,
+ resource_snps_tbi,
+ snps_resource_label,
+ fasta,
+ fai)
+
+ //Prepare SNPs and INDELs for Sentieon's applyvarcal
+ // Step 1. : applyvarcal to SNPs
+ // Step 2. : Use SENTIEON_APPLYVARCAL_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html
+
+ // Join results of variant recalibration into a single channel tuple
+ // Rework meta for variantscalled.csv and annotation tools
+ vqsr_input_snp = vqsr_input.join(SENTIEON_VARCAL_SNP.out.recal, failOnDuplicate: true)
+ .join(SENTIEON_VARCAL_SNP.out.idx, failOnDuplicate: true)
+ .join(SENTIEON_VARCAL_SNP.out.tranches, failOnDuplicate: true)
+ .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
+
+ SENTIEON_APPLYVARCAL_SNP(
+ vqsr_input_snp,
+ fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] },
+ fai.map{ fai -> [ [ id:fai.baseName ], fai ] })
+
+ // Join results of SENTIEON_APPLYVARCAL_SNP and use as input for SENTIEON_APPLYVARCAL_INDEL to avoid duplicate entries in the result
+ // Rework meta for variantscalled.csv and annotation tools
+ vqsr_input_indel = SENTIEON_APPLYVARCAL_SNP.out.vcf.join(SENTIEON_APPLYVARCAL_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]}
+ .join(SENTIEON_VARCAL_INDEL.out.recal, failOnDuplicate: true)
+ .join(SENTIEON_VARCAL_INDEL.out.idx, failOnDuplicate: true)
+ .join(SENTIEON_VARCAL_INDEL.out.tranches, failOnDuplicate: true)
+ .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }
+
+ SENTIEON_APPLYVARCAL_INDEL(
+ vqsr_input_indel,
+ fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] },
+ fai.map{ fai -> [ [ id:fai.baseName ], fai ] })
+
+ // The following is an ugly monster to achieve the following:
+ // When MERGE_GENOTYPEGVCFS and SENTIEON_APPLYVARCAL are run, then use output from SENTIEON_APPLYVARCAL
+ // When MERGE_GENOTYPEGVCFS and NOT SENTIEON_APPLYVARCAL, then use the output from MERGE_GENOTYPEGVCFS
+
+ // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements
+ vqsr_vcf_for_join = SENTIEON_APPLYVARCAL_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
+ vqsr_tbi_for_join = SENTIEON_APPLYVARCAL_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}
+
+ // Join on metamap
+ // If both --> meta, vcf_merged, vcf_bqsr
+ // If not VQSR --> meta, vcf_merged, []
+ // if the second is empty, use the first
+ genotype_vcf = merged_vcf.join(vqsr_vcf_for_join, remainder: true).map{
+ meta, joint_vcf, recal_vcf ->
+
+ vcf_out = recal_vcf ?: joint_vcf
+
+ [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], vcf_out]
+ }
+
+ genotype_index = merged_tbi.join(vqsr_tbi_for_join, remainder: true).map{
+ meta, joint_tbi, recal_tbi ->
+
+ tbi_out = recal_tbi ?: joint_tbi
+ [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], tbi_out]
+ }
+
+ versions = versions.mix(SENTIEON_VARCAL_SNP.out.versions)
+ versions = versions.mix(SENTIEON_VARCAL_INDEL.out.versions)
+ versions = versions.mix(SENTIEON_APPLYVARCAL_INDEL.out.versions)
}
versions = versions.mix(SENTIEON_GVCFTYPER.out.versions)
- versions = versions.mix(SENTIEON_VARCAL_SNP.out.versions)
- versions = versions.mix(SENTIEON_VARCAL_INDEL.out.versions)
- versions = versions.mix(SENTIEON_APPLYVARCAL_INDEL.out.versions)
emit:
genotype_index // channel: [ val(meta), [ tbi ] ]
diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf
index 666c7c7b6b..5989023adf 100644
--- a/subworkflows/local/bam_variant_calling_germline_all/main.nf
+++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf
@@ -9,12 +9,16 @@ include { BAM_VARIANT_CALLING_DEEPVARIANT } from '../bam_variant_calling
include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes/main'
include { BAM_VARIANT_CALLING_GERMLINE_MANTA } from '../bam_variant_calling_germline_manta/main'
include { BAM_VARIANT_CALLING_HAPLOTYPECALLER } from '../bam_variant_calling_haplotypecaller/main'
+include { BAM_VARIANT_CALLING_SENTIEON_DNASCOPE } from '../bam_variant_calling_sentieon_dnascope/main'
include { BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER } from '../bam_variant_calling_sentieon_haplotyper/main'
include { BAM_VARIANT_CALLING_MPILEUP } from '../bam_variant_calling_mpileup/main'
include { BAM_VARIANT_CALLING_SINGLE_STRELKA } from '../bam_variant_calling_single_strelka/main'
include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_single_tiddit/main'
+include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main'
include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main'
+
+
workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
take:
tools // Mandatory, list of tools to apply
@@ -41,18 +45,24 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants
skip_haplotypecaller_filter // boolean: [mandatory] [default: false] whether to filter haplotypecaller single sample vcfs
sentieon_haplotyper_emit_mode // channel: [mandatory] value channel with string
+ sentieon_dnascope_emit_mode // channel: [mandatory] value channel with string
+ sentieon_dnascope_pcr_indel_model // channel: [mandatory] value channel with string
+ sentieon_dnascope_model // channel: [mandatory] value channel with string
main:
versions = Channel.empty()
//TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config
+ gvcf_sentieon_dnascope = Channel.empty()
+ gvcf_sentieon_haplotyper = Channel.empty()
+
vcf_deepvariant = Channel.empty()
vcf_freebayes = Channel.empty()
vcf_haplotypecaller = Channel.empty()
vcf_manta = Channel.empty()
vcf_mpileup = Channel.empty()
+ vcf_sentieon_dnascope = Channel.empty()
vcf_sentieon_haplotyper = Channel.empty()
- gvcf_sentieon_haplotyper = Channel.empty()
vcf_strelka = Channel.empty()
vcf_tiddit = Channel.empty()
@@ -180,6 +190,66 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions)
}
+ // SENTIEON DNASCOPE
+ if (tools.split(',').contains('sentieon_dnascope')) {
+ BAM_VARIANT_CALLING_SENTIEON_DNASCOPE(
+ cram,
+ fasta,
+ fasta_fai,
+ dict,
+ dbsnp,
+ dbsnp_tbi,
+ dbsnp_vqsr,
+ intervals,
+ joint_germline,
+ sentieon_dnascope_emit_mode,
+ sentieon_dnascope_pcr_indel_model,
+ sentieon_dnascope_model)
+
+ versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.versions)
+
+ vcf_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf
+ vcf_tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf_tbi
+ gvcf_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.gvcf
+ gvcf_tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.gvcf_tbi
+
+ if (joint_germline) {
+ BAM_JOINT_CALLING_GERMLINE_SENTIEON(
+ BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.genotype_intervals,
+ fasta,
+ fasta_fai,
+ dict,
+ dbsnp,
+ dbsnp_tbi,
+ dbsnp_vqsr,
+ known_sites_indels,
+ known_sites_indels_tbi,
+ known_indels_vqsr,
+ known_sites_snps,
+ known_sites_snps_tbi,
+ known_snps_vqsr,
+ 'sentieon_dnascope')
+
+ vcf_sentieon_dnascope = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf
+ versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.versions)
+ } else {
+ // If single sample track, check if filtering should be done
+ if (!(skip_tools && skip_tools.split(',').contains('dnascope_filter'))) {
+
+ SENTIEON_DNAMODELAPPLY(
+ vcf_sentieon_dnascope.join(vcf_tbi_sentieon_dnascope, failOnDuplicate: true, failOnMismatch: true),
+ fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] },
+ fasta_fai.map{ fai -> [ [ id:fai.baseName ], fai ] },
+ sentieon_dnascope_model.map{ model -> [ [ id:model.baseName ], model ] })
+
+ vcf_sentieon_dnascope = SENTIEON_DNAMODELAPPLY.out.vcf
+ versions = versions.mix(SENTIEON_DNAMODELAPPLY.out.versions)
+
+ }
+
+ }
+ }
+
// SENTIEON HAPLOTYPER
if (tools.split(',').contains('sentieon_haplotyper')) {
BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER(
@@ -215,7 +285,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
known_indels_vqsr,
known_sites_snps,
known_sites_snps_tbi,
- known_snps_vqsr)
+ known_snps_vqsr,
+ 'sentieon_haplotyper')
vcf_sentieon_haplotyper = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf
versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.versions)
@@ -270,6 +341,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
vcf_all = Channel.empty().mix(
vcf_deepvariant,
vcf_freebayes,
+ vcf_sentieon_dnascope,
vcf_haplotypecaller,
vcf_manta,
vcf_mpileup,
@@ -279,6 +351,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
)
emit:
+ gvcf_sentieon_dnascope
+ gvcf_sentieon_haplotyper
vcf_all
vcf_deepvariant
vcf_freebayes
@@ -286,8 +360,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
vcf_manta
vcf_mpileup
vcf_strelka
+ vcf_sentieon_dnascope
vcf_sentieon_haplotyper
- gvcf_sentieon_haplotyper
vcf_tiddit
versions
diff --git a/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf b/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf
new file mode 100644
index 0000000000..9eea9b2d61
--- /dev/null
+++ b/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf
@@ -0,0 +1,157 @@
+//
+// SENTIEON HAPLOTYPER germline variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
+include { GATK4_MERGEVCFS as MERGE_SENTIEON_DNASCOPE_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { GATK4_MERGEVCFS as MERGE_SENTIEON_DNASCOPE_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { SENTIEON_DNASCOPE } from '../../../modules/nf-core/sentieon/dnascope/main'
+
+workflow BAM_VARIANT_CALLING_SENTIEON_DNASCOPE {
+ take:
+ cram // channel: [mandatory] [ meta, cram, crai, interval.bed ]
+ fasta // channel: [mandatory]
+ fasta_fai // channel: [mandatory]
+ dict // channel: [mandatory]
+ dbsnp // channel: [optional]
+ dbsnp_tbi // channel: [optional]
+ dbsnp_vqsr // channel: [optional]
+ intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals
+ joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants
+ sentieon_dnascope_emit_mode // string
+ sentieon_dnascope_pcr_indel_model // string
+ sentieon_dnascope_model // channel
+
+ main:
+ versions = Channel.empty()
+
+ gvcf = Channel.empty()
+ vcf = Channel.empty()
+ genotype_intervals = Channel.empty()
+
+ // Combine cram and intervals for spread and gather strategy
+ cram_intervals_for_sentieon = cram.combine(intervals)
+ // Move num_intervals to meta map
+ .map{ meta, cram, crai, intervals, num_intervals -> [
+ meta + [
+ num_intervals:num_intervals,
+ intervals_name:intervals.simpleName,
+ variantcaller:'sentieon_dnascope'],
+ cram,
+ crai,
+ intervals
+ ]
+ }
+
+ emit_mode_items = sentieon_dnascope_emit_mode.split(',').each{ it -> it.toLowerCase().trim() }
+ lst = emit_mode_items - 'gvcf'
+ emit_vcf = lst.size() > 0 ? lst[0] : ''
+
+ SENTIEON_DNASCOPE(
+ cram_intervals_for_sentieon,
+ fasta.map{it -> [[:], it]},
+ fasta_fai.map{it -> [[:], it]},
+ dbsnp.map{it -> [[:], it]},
+ dbsnp_tbi.map{it -> [[:], it]},
+ sentieon_dnascope_model.map{it -> [[:], it]},
+ sentieon_dnascope_pcr_indel_model,
+ emit_vcf,
+ emit_mode_items.any{ it.equals('gvcf') })
+
+ if (joint_germline) {
+ genotype_intervals = SENTIEON_DNASCOPE.out.gvcf
+ .join(SENTIEON_DNASCOPE.out.gvcf_tbi, failOnMismatch: true)
+ .join(cram_intervals_for_sentieon, failOnMismatch: true)
+ .map{ meta, gvcf, tbi, cram, crai, intervals -> [ meta, gvcf, tbi, intervals ] }
+ }
+
+ // Figure out if using intervals or no_intervals
+ dnascope_vcf_branch = SENTIEON_DNASCOPE.out.vcf.map{
+ meta, vcf -> [ meta - meta.subMap('interval_name'), vcf]
+ }
+ .branch{
+ intervals: it[0].num_intervals > 1
+ no_intervals: it[0].num_intervals <= 1
+ }
+
+ dnascope_vcf_tbi_branch = SENTIEON_DNASCOPE.out.vcf_tbi.map{
+ meta, vcf_tbi -> [ meta - meta.subMap('interval_name'), vcf_tbi]
+ }
+ .branch{
+ intervals: it[0].num_intervals > 1
+ no_intervals: it[0].num_intervals <= 1
+ }
+
+ haplotyper_gvcf_branch = SENTIEON_DNASCOPE.out.gvcf.map{
+ meta, gvcf -> [ meta - meta.subMap('interval_name'), gvcf]
+ }
+ .branch{
+ intervals: it[0].num_intervals > 1
+ no_intervals: it[0].num_intervals <= 1
+ }
+
+ haplotyper_gvcf_tbi_branch = SENTIEON_DNASCOPE.out.gvcf_tbi.map{
+ meta, gvcf_tbi -> [ meta - meta.subMap('interval_name'), gvcf_tbi]
+ }
+ .branch{
+ intervals: it[0].num_intervals > 1
+ no_intervals: it[0].num_intervals <= 1
+ }
+
+ vcfs_for_merging = dnascope_vcf_branch.intervals.map{
+ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}
+
+ vcfs_for_merging = vcfs_for_merging.map{
+ meta, vcf -> [
+ meta - meta.subMap('intervals_name'),
+ vcf]}.groupTuple()
+
+ // VCFs
+ // Only when using intervals
+ MERGE_SENTIEON_DNASCOPE_VCFS(vcfs_for_merging, dict)
+
+ dnascope_vcf = Channel.empty().mix(
+ MERGE_SENTIEON_DNASCOPE_VCFS.out.vcf,
+ dnascope_vcf_branch.no_intervals)
+
+ haplotyper_tbi = Channel.empty().mix(
+ MERGE_SENTIEON_DNASCOPE_VCFS.out.tbi,
+ dnascope_vcf_tbi_branch.no_intervals)
+
+ // Remove no longer necessary field: num_intervals
+ vcf = dnascope_vcf.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] }
+ vcf_tbi = haplotyper_tbi.map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] }
+
+ // GVFs
+ // Only when using intervals
+ gvcfs_for_merging = haplotyper_gvcf_branch.intervals.map{
+ meta, vcf -> [groupKey(meta, meta.num_intervals), vcf]}
+
+ gvcfs_for_merging = gvcfs_for_merging.map{
+ meta, vcf -> [ meta - meta.subMap('intervals_name'), vcf ]
+ }.groupTuple()
+
+ MERGE_SENTIEON_DNASCOPE_GVCFS(gvcfs_for_merging, dict)
+
+ gvcf = Channel.empty().mix(
+ MERGE_SENTIEON_DNASCOPE_GVCFS.out.vcf,
+ haplotyper_gvcf_branch.no_intervals)
+
+ gvcf_tbi = Channel.empty().mix(
+ MERGE_SENTIEON_DNASCOPE_GVCFS.out.tbi,
+ haplotyper_gvcf_tbi_branch.no_intervals)
+
+ versions = versions.mix(SENTIEON_DNASCOPE.out.versions)
+ versions = versions.mix(MERGE_SENTIEON_DNASCOPE_VCFS.out.versions)
+ versions = versions.mix(MERGE_SENTIEON_DNASCOPE_GVCFS.out.versions)
+
+ emit:
+ versions
+ vcf
+ vcf_tbi
+ gvcf
+ gvcf_tbi
+ genotype_intervals // For joint genotyping
+
+}
diff --git a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf
index cf9bcf9e21..4b280d271c 100644
--- a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf
+++ b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf
@@ -42,7 +42,8 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
]
}
- emit_mode_items = sentieon_haplotyper_emit_mode.split(',')
+
+ emit_mode_items = sentieon_haplotyper_emit_mode.split(',').each{ it -> it.toLowerCase().trim() }
lst = emit_mode_items - 'gvcf'
emit_vcf = lst.size() > 0 ? lst[0] : ''
@@ -53,7 +54,7 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
dbsnp,
dbsnp_tbi,
emit_vcf,
- emit_mode_items.contains('gvcf'))
+ emit_mode_items.any{ it.equals('gvcf') })
if (joint_germline) {
genotype_intervals = SENTIEON_HAPLOTYPER.out.gvcf
diff --git a/tests/config/tags.yml b/tests/config/tags.yml
index 362bc525b9..4fabc3a7d5 100644
--- a/tests/config/tags.yml
+++ b/tests/config/tags.yml
@@ -314,6 +314,31 @@ haplotypecaller_skip_filter:
- tests/csv/3.0/mapped_single_bam.csv
- tests/test_haplotypecaller_skip_filter.yml
+## sentieon/dnascope
+sentieon/dnascope:
+ - conf/modules/sentieon_dnascope.config
+ - modules/nf-core/sentieon/dnascope/main.nf
+ - modules/nf-core/gatk4/mergevcfs/main.nf
+ - modules/nf-core/samtools/index/main.nf
+ - modules/nf-core/samtools/merge/main.nf
+ - subworkflows/local/bam_merge_index_samtools/main.nf
+ - subworkflows/local/bam_variant_calling_germline_all/main.nf
+ - subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf
+ - tests/csv/3.0/mapped_single_bam.csv
+ - tests/test_sentieon_dnascope.yml
+
+sentieon_dnascope_skip_filter:
+ - conf/modules/sentieon_dnascope.config
+ - modules/nf-core/sentieon/dnascope/main.nf
+ - modules/nf-core/gatk4/mergevcfs/main.nf
+ - modules/nf-core/samtools/index/main.nf
+ - modules/nf-core/samtools/merge/main.nf
+ - subworkflows/local/bam_merge_index_samtools/main.nf
+ - subworkflows/local/bam_variant_calling_germline_all/main.nf
+ - subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf
+ - tests/csv/3.0/mapped_single_bam.csv
+ - tests/test_sentieon_dnascope_skip_filter.yml
+
## sentieon/haplotyper
sentieon/haplotyper:
- conf/modules/sentieon_haplotyper.config
@@ -364,16 +389,27 @@ joint_germline:
- tests/csv/3.0/mapped_joint_bam.csv
- tests/test_joint_germline.yml
-## sentieon_joint_germline
-sentieon_joint_germline:
+## sentieon_dnascope_joint_germline
+sentieon_dnascope_joint_germline:
+ - conf/modules/prepare_genome.config
+ - conf/modules/sentieon_dnascope.config
+ - conf/modules/sentieon_dnascope_joint_germline.config
+ - modules/nf-core/sentieon/dnascope/main.nf
+ - subworkflows/local/bam_variant_calling_germline_all/main.nf
+ - subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf
+ - tests/csv/3.0/mapped_joint_bam.csv
+ - tests/test_sentieon_dnascop_joint_germline.yml
+
+## sentieon_haplotyper_joint_germline
+sentieon_haplotyper_joint_germline:
- conf/modules/prepare_genome.config
- conf/modules/sentieon_haplotyper.config
- - conf/modules/sentieon_joint_germline.config
+ - conf/modules/sentieon_haplotyper_joint_germline.config
- modules/nf-core/sentieon/haplotyper/main.nf
- subworkflows/local/bam_variant_calling_germline_all/main.nf
- subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf
- tests/csv/3.0/mapped_joint_bam.csv
- - tests/test_sentieon_joint_germline.yml
+ - tests/test_sentieon_haplotyper_joint_germline.yml
## manta
manta:
diff --git a/tests/test_gatk4_spark.yml b/tests/test_gatk4_spark.yml
index 32082446ec..8dbc8fb974 100644
--- a/tests/test_gatk4_spark.yml
+++ b/tests/test_gatk4_spark.yml
@@ -48,7 +48,7 @@
# conda changes md5sums for test
- path: results/preprocessing/mapped/
should_exist: false
-- name: Run default pipeline with gatk4_spark & skipping all QC steps
+- name: Run default pipeline with gatk4_spark and skipping all QC steps
command: nextflow run main.nf -profile test_cache,use_gatk_spark --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools --outdir results
tags:
- gatk4_spark
diff --git a/tests/test_sentieon_dnascope.yml b/tests/test_sentieon_dnascope.yml
new file mode 100644
index 0000000000..f51e0bca72
--- /dev/null
+++ b/tests/test_sentieon_dnascope.yml
@@ -0,0 +1,147 @@
+- name: Run variant calling on germline sample with sentieons dnascope
+ command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --outdir results
+ tags:
+ - germline
+ - sentieon/dnascope
+ - variant_calling
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: b2144d21a0ebfd807a8646f1751d0ddc
+ - path: results/multiqc
+ - path: results/preprocessing/converted/test/test.converted.cram
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/converted/test/test.converted.cram.crai
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/recalibrated/test/test.recal.cram
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+ should_exist: false
+ - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt
+ md5sum: 912c7d5b31784c50e0a75b4fcfa4997b
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary
+ md5sum: e67b24d296810a075378e5864bcea0fa
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count
+ md5sum: b77c120ee5cc0423267200c67d60c663
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual
+ # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files.
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi
+ # binary changes md5sums on reruns
+ - path: results/dnascope
+ should_exist: false
+- name: Run variant calling on germline sample with sentieons dnascope without intervals
+ command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --no_intervals --outdir results
+ tags:
+ - germline
+ - sentieon/dnascope
+ - no_intervals
+ - variant_calling
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: b2144d21a0ebfd807a8646f1751d0ddc
+ - path: results/multiqc
+ - path: results/no_intervals.bed
+ md5sum: f3dac01ea66b95fe477446fde2d31489
+ - path: results/no_intervals.bed.gz
+ md5sum: f3dac01ea66b95fe477446fde2d31489
+ - path: results/no_intervals.bed.gz.tbi
+ md5sum: f3dac01ea66b95fe477446fde2d31489
+ - path: results/preprocessing/converted/test/test.converted.cram
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/converted/test/test.converted.cram.crai
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/recalibrated/test/test.recal.cram
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+ should_exist: false
+ - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt
+ md5sum: 912c7d5b31784c50e0a75b4fcfa4997b
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary
+ md5sum: e67b24d296810a075378e5864bcea0fa
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count
+ md5sum: b77c120ee5cc0423267200c67d60c663
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual
+ # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files.
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi
+ # binary changes md5sums on reruns
+ - path: results/sentieon_dnascope
+ should_exist: false
+- name: Run variant calling on germline sample with sentieons dnascope output gvcf
+ command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --outdir results --sentieon_dnascope_emit_mode gvcf
+ tags:
+ - germline
+ - sentieon/dnascope
+ - variant_calling
+ files:
+ - path: results/csv/variantcalled.csv
+ should_exist: false
+ - path: results/multiqc
+ - path: results/preprocessing/converted/test/test.converted.cram
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/converted/test/test.converted.cram.crai
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/recalibrated/test/test.recal.cram
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+ should_exist: false
+ - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt
+ should_exist: false
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary
+ should_exist: false
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count
+ should_exist: false
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz.tbi
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi
+ should_exist: false
+ - path: results/dnascope
+ should_exist: false
+- name: Run variant calling on germline sample with sentieons dnascope output both gvcf and vcf
+ command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --outdir results --sentieon_dnascope_emit_mode variant,gvcf
+ tags:
+ - germline
+ - sentieon/dnascope
+ - variant_calling
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: b2144d21a0ebfd807a8646f1751d0ddc
+ - path: results/multiqc
+ - path: results/preprocessing/converted/test/test.converted.cram
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/converted/test/test.converted.cram.crai
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/recalibrated/test/test.recal.cram
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+ should_exist: false
+ - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz.tbi
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi
+ - path: results/dnascope
+ should_exist: false
diff --git a/tests/test_sentieon_dnascope_joint_germline.yml b/tests/test_sentieon_dnascope_joint_germline.yml
new file mode 100644
index 0000000000..e905b9cd53
--- /dev/null
+++ b/tests/test_sentieon_dnascope_joint_germline.yml
@@ -0,0 +1,68 @@
+- name: Run joint germline variant calling with sentieon dnascope
+ command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_dnascope --step variant_calling --joint_germline --outdir results --sentieon_dnascope_emit_mode gvcf
+ tags:
+ - germline
+ - sentieon_dnascope_joint_germline
+ - variant_calling
+ - sentieon/dnascope
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: 62d70060aad96337254efe2d7a1df170
+ - path: results/multiqc
+ - path: results/reports/bcftools/sentieon_dnascope/joint_variant_calling/joint_germline.bcftools_stats.txt
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.FILTER.summary
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.count
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.qual
+ - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz.tbi
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/testN/testN.dnascope.g.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/testN/testN.dnascope.g.vcf.gz.tbi
+ - path: results/variant_calling/sentieon_dnascope/testT/testT.dnascope.g.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/testT/testT.dnascope.g.vcf.gz.tbi
+ - path: results/dnascope
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+ should_exist: false
+ - path: results/reports/bcftools/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.bcftools_stats.txt
+ should_exist: false
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.FILTER.summary
+ should_exist: false
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.TsTv.count
+ should_exist: false
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.TsTv.qual
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.vcf.gz
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.vcf.gz.tbi
+ should_exist: false
+- name: Run joint germline variant calling with sentieon dnascope all intervals at once
+ command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_dnascope --step variant_calling --joint_germline --outdir results --sentieon_dnascope_emit_mode gvcf --nucleotides_per_second 100
+ tags:
+ - germline
+ - sentieon_dnascope_joint_germline
+ - variant_calling
+ - sentieon/dnascope
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: 62d70060aad96337254efe2d7a1df170
+ - path: results/multiqc
+ - path: results/preprocessing/recalibrated/test/test.recal.cram
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+ should_exist: false
+ - path: results/reports/bcftools/sentieon_dnascope/joint_variant_calling/joint_germline.bcftools_stats.txt
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.FILTER.summary
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.count
+ - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.qual
+ - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz.tbi
+ - path: results/variant_calling/sentieon_dnascope/testN/testN.dnascope.g.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/testN/testN.dnascope.g.vcf.gz.tbi
+ - path: results/variant_calling/sentieon_dnascope/testT/testT.dnascope.g.vcf.gz
+ - path: results/variant_calling/sentieon_dnascope/testT/testT.dnascope.g.vcf.gz.tbi
+ - path: results/dnascope
+ should_exist: false
diff --git a/tests/test_sentieon_dnascope_skip_filter.yml b/tests/test_sentieon_dnascope_skip_filter.yml
new file mode 100644
index 0000000000..16bbca9e7c
--- /dev/null
+++ b/tests/test_sentieon_dnascope_skip_filter.yml
@@ -0,0 +1,81 @@
+- name: Run variant calling on germline sample with sentieon dnascope and skip filter
+ command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --skip_tools dnascope_filter --outdir results
+ tags:
+ - germline
+ - sentieon_dnascope_skip_filter
+ - variant_calling
+ - sentieon/dnascope
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: 10254414c0679ba1fb25e41b9ff548cc
+ - path: results/multiqc
+ - path: results/preprocessing/converted/test/test.converted.cram
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/converted/test/test.converted.cram.crai
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/recalibrated/test/test.recal.cram
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+ should_exist: false
+ - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.unfiltered.bcftools_stats.txt
+ md5sum: f915fe1591ababb0da5e7b43dfc35092
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.FILTER.summary
+ md5sum: 87a84b5f8ac3d3cbeeef7d60afcdbfe7
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.count
+ md5sum: b77c120ee5cc0423267200c67d60c663
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.qual
+ # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files.
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi
+ # binary changes md5sums on reruns
+ - path: results/sentieon_dnascope
+ should_exist: false
+- name: Run variant calling on germline sample with sentieon dnascope without intervals and skip filter
+ command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --skip_tools dnascope_filter --no_intervals --outdir results
+ tags:
+ - germline
+ - sentieon_dnascope_skip_filter
+ - no_intervals
+ - variant_calling
+ - sentieon/dnascope
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: 10254414c0679ba1fb25e41b9ff548cc
+ - path: results/multiqc
+ - path: results/no_intervals.bed
+ md5sum: f3dac01ea66b95fe477446fde2d31489
+ - path: results/no_intervals.bed.gz
+ md5sum: f3dac01ea66b95fe477446fde2d31489
+ - path: results/no_intervals.bed.gz.tbi
+ md5sum: f3dac01ea66b95fe477446fde2d31489
+ - path: results/preprocessing/converted/test/test.converted.cram
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/converted/test/test.converted.cram.crai
+ # binary changes md5sums on reruns
+ - path: results/preprocessing/recalibrated/test/test.recal.cram
+ should_exist: false
+ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai
+ should_exist: false
+ - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.unfiltered.bcftools_stats.txt
+ md5sum: f915fe1591ababb0da5e7b43dfc35092
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.FILTER.summary
+ md5sum: 87a84b5f8ac3d3cbeeef7d60afcdbfe7
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.count
+ md5sum: b77c120ee5cc0423267200c67d60c663
+ - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.qual
+ # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files.
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi
+ should_exist: false
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz
+ # binary changes md5sums on reruns
+ - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi
+ # binary changes md5sums on reruns
+ - path: results/sentieon_dnascope
+ should_exist: false
diff --git a/tests/test_sentieon_joint_germline.yml b/tests/test_sentieon_haplotyper_joint_germline.yml
similarity index 97%
rename from tests/test_sentieon_joint_germline.yml
rename to tests/test_sentieon_haplotyper_joint_germline.yml
index 4637571aec..1c12f101db 100644
--- a/tests/test_sentieon_joint_germline.yml
+++ b/tests/test_sentieon_haplotyper_joint_germline.yml
@@ -2,7 +2,7 @@
command: nextflow run main.nf -profile test_cache,software_license,targeted --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --outdir results --sentieon_haplotyper_emit_mode gvcf
tags:
- germline
- - sentieon_joint_germline
+ - sentieon_haplotyper_joint_germline
- variant_calling
- sentieon/haplotyper
files:
@@ -31,7 +31,7 @@
command: nextflow run main.nf -profile test_cache,software_license,targeted --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --outdir results --sentieon_haplotyper_emit_mode gvcf --nucleotides_per_second 100
tags:
- germline
- - sentieon_joint_germline
+ - sentieon_haplotyper_joint_germline
- variant_calling
- sentieon/haplotyper
files:
@@ -58,7 +58,7 @@
command: nextflow run main.nf -profile test_cache,software_license,tools_germline --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --outdir results --sentieon_haplotyper_emit_mode gvcf -stub-run
tags:
- germline
- - sentieon_joint_germline
+ - sentieon_haplotyper_joint_germline
- variant_calling
- vqsr
files:
diff --git a/workflows/sarek.nf b/workflows/sarek.nf
index 5ae80fbae1..a6ed5b2e50 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek.nf
@@ -50,6 +50,8 @@ def checkPathParamList = [
params.multiqc_config,
params.pon,
params.pon_tbi,
+ params.sentieon_dnascope_model,
+ params.snpeff_cache,
params.spliceai_indel,
params.spliceai_indel_tbi,
params.spliceai_snv,
@@ -255,18 +257,60 @@ if (!params.dbsnp && !params.known_indels) {
if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('baserecalibrator')))) {
error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.")
}
- if (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) {
- log.warn "If GATK's Haplotypecaller or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-"
+ if (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope'))) {
+ log.warn "If GATK's Haplotypecaller, Sentieon's Dnascpe or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-"
}
}
-if (params.joint_germline && (!params.tools || !(params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper')))) {
- error("The GATK's Haplotypecaller or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ")
+if (params.joint_germline && (!params.tools || !(params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope')))) {
+ error("The GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ")
}
-if (params.joint_germline && (!params.dbsnp || !params.known_indels || !params.known_snps || params.no_intervals)) {
- log.warn "If GATK's Haplotypecaller or Sentieon's Haplotyper is specified, without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator \nJoint germline variant calling also requires intervals in order to genotype the samples. As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed."
+if (
+ params.tools &&
+ (
+ params.tools.split(',').contains('haplotypecaller') ||
+ params.tools.split(',').contains('sentieon_haplotyper') ||
+ params.tools.split(',').contains('sentieon_dnascope')
+ ) &&
+ params.joint_germline &&
+ (
+ !params.dbsnp ||
+ !params.known_indels ||
+ !params.known_snps ||
+ params.no_intervals
+ )
+ ) {
+ log.warn("""If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, \
+but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), \
+no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information \
+see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator \n\
+Joint germline variant calling also requires intervals in order to genotype the samples. \
+As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed.""")
}
+if (params.tools &&
+ params.tools.split(',').contains('sentieon_dnascope') &&
+ params.joint_germline &&
+ (
+ !params.sentieon_dnascope_emit_mode ||
+ !params.sentieon_dnascope_emit_mode.split(',').contains('gvcf')
+ )
+ ) {
+ error("When using Sentieon Dnascope for joint-germline variant-calling the option `--sentieon_dnascope_emit_mode` has to include `gvcf`.")
+}
+
+if (params.tools &&
+ params.tools.split(',').contains('sentieon_haplotyper') &&
+ params.joint_germline &&
+ (
+ !params.sentieon_haplotyper_emit_mode ||
+ !params.sentieon_haplotyper_emit_mode.split(',').contains('gvcf')
+ )
+ ) {
+ error("When using Sentieon Haplotyper for joint-germline variant-calling the option `--sentieon_haplotyper_emit_mode` has to include `gvcf`.")
+}
+
+
// Fails when --joint_mutect2 is used without enabling mutect2
if (params.joint_mutect2 && (!params.tools || !params.tools.split(',').contains('mutect2'))) {
error("The mutect2 should be specified as one of the tools when doing joint somatic variant calling with Mutect2. (The mutect2 could be specified by adding `--tools mutect2` to the nextflow command.)")
@@ -297,20 +341,21 @@ if ((params.download_cache) && (params.snpeff_cache || params.vep_cache)) {
*/
// Initialize file channels based on params, defined in the params.genomes[params.genome] scope
-ascat_alleles = params.ascat_alleles ? Channel.fromPath(params.ascat_alleles).collect() : Channel.empty()
-ascat_loci = params.ascat_loci ? Channel.fromPath(params.ascat_loci).collect() : Channel.empty()
-ascat_loci_gc = params.ascat_loci_gc ? Channel.fromPath(params.ascat_loci_gc).collect() : Channel.value([])
-ascat_loci_rt = params.ascat_loci_rt ? Channel.fromPath(params.ascat_loci_rt).collect() : Channel.value([])
-cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : []
-chr_dir = params.chr_dir ? Channel.fromPath(params.chr_dir).collect() : Channel.value([])
-dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([])
-fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty()
-fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty()
-germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input
-known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([])
-known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([])
-mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([])
-pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended)
+ascat_alleles = params.ascat_alleles ? Channel.fromPath(params.ascat_alleles).collect() : Channel.empty()
+ascat_loci = params.ascat_loci ? Channel.fromPath(params.ascat_loci).collect() : Channel.empty()
+ascat_loci_gc = params.ascat_loci_gc ? Channel.fromPath(params.ascat_loci_gc).collect() : Channel.value([])
+ascat_loci_rt = params.ascat_loci_rt ? Channel.fromPath(params.ascat_loci_rt).collect() : Channel.value([])
+cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : []
+chr_dir = params.chr_dir ? Channel.fromPath(params.chr_dir).collect() : Channel.value([])
+dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([])
+fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty()
+fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty()
+germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input
+known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([])
+known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([])
+mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([])
+pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended)
+sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([])
// Initialize value channels based on params, defined in the params.genomes[params.genome] scope
ascat_genome = params.ascat_genome ?: Channel.empty()
@@ -1164,7 +1209,10 @@ workflow SAREK {
known_snps_vqsr,
params.joint_germline,
params.skip_tools && params.skip_tools.split(',').contains('haplotypecaller_filter'), // true if filtering should be skipped
- params.sentieon_haplotyper_emit_mode)
+ params.sentieon_haplotyper_emit_mode,
+ params.sentieon_dnascope_emit_mode,
+ params.sentieon_dnascope_pcr_indel_model,
+ sentieon_dnascope_model)
// TUMOR ONLY VARIANT CALLING
BAM_VARIANT_CALLING_TUMOR_ONLY_ALL(
@@ -1232,6 +1280,7 @@ workflow SAREK {
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_freebayes)
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_haplotypecaller)
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_manta)
+ vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_sentieon_dnascope)
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_sentieon_haplotyper)
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_strelka)
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_tiddit)