Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sentieon haplotyper refactored #1074

Merged
merged 11 commits into from
Jun 7, 2023
2 changes: 1 addition & 1 deletion .github/workflows/pytest-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: pytest-workflow
# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
on:
pull_request:
branches: [dev, master]
branches: [dev, master, sentieon]

# Cancel if a newer run is started
concurrency:
Expand Down
4 changes: 1 addition & 3 deletions conf/modules/sentieon_haplotyper.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
process {

withName: 'SENTIEON_HAPLOTYPER' {
// ext.args = { params.joint_germline ? "-ERC GVCF" : "" } // TO-DO: Change that
// ext.prefix = { meta.num_intervals <= 1 ? ( params.joint_germline ? "${meta.id}.haplotyper.g" : "${meta.id}.haplotyper" ) : ( params.joint_germline ? "${meta.id}.haplotyper.${intervals.simpleName}.g" :"${meta.id}.haplotyper.${intervals.simpleName}" ) } // old value
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.haplotyper" : "${meta.id}.haplotyper.${intervals.simpleName}" }
ext.when = { params.tools && params.tools.split(',').contains('sentieon_haplotyper') }
publishDir = [
Expand All @@ -30,7 +28,7 @@ process {
}

withName: 'MERGE_SENTIEON_HAPLOTYPER_VCFS' {
ext.prefix = { params.joint_germline ? "${meta.id}.haplotyper.g" : "${meta.id}.haplotyper.unfiltered" } // TO-DO: This shouldn't be relevant for gvcf. But check with the joint-germline flow.
ext.prefix = { params.joint_germline ? "${meta.id}.haplotyper.g" : "${meta.id}.haplotyper.unfiltered" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/sentieon_haplotyper/${meta.id}/" },
Expand Down
17 changes: 9 additions & 8 deletions modules/nf-core/sentieon/haplotyper/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 10 additions & 7 deletions modules/nf-core/sentieon/haplotyper/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ params {
ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2
wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers
joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected
sentieon_haplotyper_out_format = "vcf"
sentieon_haplotyper_emit_mode = "variant"

// Annotation
vep_out_format = 'vcf'
Expand Down
13 changes: 6 additions & 7 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -372,14 +372,13 @@
"help_text": "use the `--dont-use-soft-clipped-bases` params with GATK Mutect2.",
"hidden": true
},
"sentieon_haplotyper_out_format": {
"sentieon_haplotyper_emit_mode": {
"type": "string",
"default": "vcf",
"description": "Option for selecting Sentieon Haplotyper output format.",
"enum": ["vcf", "gvcf", "both"],
"help_text": "Sets the format of the output file or files from Sentieon's Haplotyper. Available formats: vcf, gvcf, or both",
"fa_icon": "fas fa-table",
"hidden": true
"default": "variant",
"fa_icon": "fas fa-toolbox",
"description": "Option for selecting output and emit-mode of Sentieon's Haplotyper.",
"help_text": "The option `--sentieon_haplotyper_emit_mode` can be set to the same string values as the Haplotyper's `--emit_mode`. To output both a vcf and a gvcf, specify both a vcf-option (currently, `all`, `confident` and `variant`) and `gvcf`. For example, to obtain a vcf and gvcf one could set `--sentieon_haplotyper_emit_mode` to `variant, gvcf`.",
"pattern": "^(all|confident|gvcf|variant|gvcf,all|gvcf,confident|gvcf,variant|all,gvcf|confident,gvcf|variant,gvcf)(?<!,)$"
}
}
},
Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/bam_variant_calling_germline_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
known_sites_snps_tbi
known_snps_vqsr
joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants
sentieon_haplotyper_out_format // channel: [mandatory] value channel with string
sentieon_haplotyper_emit_mode // channel: [mandatory] value channel with string

main:
versions = Channel.empty()
Expand Down Expand Up @@ -174,7 +174,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
intervals_bed_combined_haplotypec,
(skip_tools && skip_tools.split(',').contains('haplotyper_filter')),
joint_germline,
sentieon_haplotyper_out_format)
sentieon_haplotyper_emit_mode)

versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.versions)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped, no_intervals.bed if no_intervals
skip_haplotyper_filter // boolean: [mandatory] [default: false] skip haplotyper filter
joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants
sentieon_haplotyper_out_format // channel: [mandatory] value channel with string
sentieon_haplotyper_emit_mode

main:
versions = Channel.empty()
Expand All @@ -45,13 +45,19 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
]
}


emit_mode_items = sentieon_haplotyper_emit_mode.split(',')
lst = emit_mode_items - 'gvcf'
emit_vcf = lst.size() > 0 ? lst[0] : ''

SENTIEON_HAPLOTYPER(
cram_intervals_for_sentieon,
fasta,
fasta_fai,
dbsnp,
dbsnp_tbi,
sentieon_haplotyper_out_format)
emit_vcf,
emit_mode_items.contains('gvcf'))

versions = versions.mix(SENTIEON_HAPLOTYPER.out.versions)

Expand Down
6 changes: 3 additions & 3 deletions tests/test_sentieon_haplotyper.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
- path: results/sentieon_haplotyper
should_exist: false
- name: Run variant calling on germline sample with sentieons haplotyper output gvcf
command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_haplotyper --step variant_calling --outdir results --sentieon_haplotyper_out_format gvcf
command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_haplotyper --step variant_calling --outdir results --sentieon_haplotyper_emit_mode gvcf
tags:
- germline
- sentieon/haplotyper
Expand Down Expand Up @@ -115,8 +115,8 @@
should_exist: false
- path: results/haplotyper
should_exist: false
- name: Run variant calling on germline sample with sentieons haplotyper output both vcf and gvcf
command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_haplotyper --step variant_calling --outdir results --sentieon_haplotyper_out_format both
- name: Run variant calling on germline sample with sentieons haplotyper output both gvcf and vcf
command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_haplotyper --step variant_calling --outdir results --sentieon_haplotyper_emit_mode variant,gvcf
tags:
- germline
- sentieon/haplotyper
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sentieon_joint_germline.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
- name: Run joint germline variant calling with sentieon haplotyper
command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --skip_tools haplotyper_filter --outdir results --sentieon_haplotyper_out_format gvcf
command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --skip_tools haplotyper_filter --outdir results --sentieon_haplotyper_emit_mode gvcf
tags:
- germline
- sentieon_joint_germline
Expand Down
6 changes: 3 additions & 3 deletions workflows/sarek.nf
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ if (params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.s
log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode")
}

if (params.tools && params.tools.contains("sentieon_haplotyper") && params.joint_germline && (!params.sentieon_haplotyper_out_format || !(params.sentieon_haplotyper_out_format.contains('gvcf') || params.sentieon_haplotyper_out_format.contains('both')))) {
error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_out_format` to `gvcf` or `both`.")
if (params.tools && params.tools.contains("sentieon_haplotyper") && params.joint_germline && (!params.sentieon_haplotyper_emit_mode || !(params.sentieon_haplotyper_emit_mode.contains('gvcf')))) {
error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.")
}

// Fails or warns when missing files or params for ascat
Expand Down Expand Up @@ -962,7 +962,7 @@ workflow SAREK {
known_sites_snps_tbi,
known_snps_vqsr,
params.joint_germline,
params.sentieon_haplotyper_out_format)
params.sentieon_haplotyper_emit_mode)

// TUMOR ONLY VARIANT CALLING
BAM_VARIANT_CALLING_TUMOR_ONLY_ALL(
Expand Down