forked from nf-core/modules
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* commit before git merge * complete the vep update * update modules in meta * fix tests * update bcftools query test * remove merge + update scatter * update comments + fix tests * ssssh eclint * updated subwf description * add contains to tests * Update subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf Co-authored-by: Maxime U Garcia <maxime.garcia@seqera.io> * added snpeff * align includes * Update subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/meta.yml Co-authored-by: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> * changes suggested by review * add snpeff reports * fix tests * made scatter optional * re-added the original subwf * update meta.yml --------- Co-authored-by: Maxime U Garcia <maxime.garcia@seqera.io> Co-authored-by: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com>
- Loading branch information
1 parent
4c1acbc
commit 4512120
Showing
9 changed files
with
854 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
191 changes: 191 additions & 0 deletions
191
subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
// | ||
// Run VEP and/or SNPEFF to annotate VCF files | ||
// | ||
|
||
include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' | ||
include { SNPEFF_SNPEFF } from '../../../modules/nf-core/snpeff/snpeff/main' | ||
include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' | ||
include { BCFTOOLS_PLUGINSCATTER } from '../../../modules/nf-core/bcftools/pluginscatter/main' | ||
include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat/main' | ||
include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' | ||
|
||
workflow VCF_ANNOTATE_ENSEMBLVEP_SNPEFF { | ||
take: | ||
ch_vcf // channel: [ val(meta), path(vcf), path(tbi), [path(file1), path(file2)...] ] | ||
ch_fasta // channel: [ val(meta2), path(fasta) ] (optional) | ||
val_vep_genome // value: genome to use | ||
val_vep_species // value: species to use | ||
val_vep_cache_version // value: cache version to use | ||
ch_vep_cache // channel: [ path(cache) ] (optional) | ||
ch_vep_extra_files // channel: [ path(file1), path(file2)... ] (optional) | ||
val_snpeff_db // value: the db version to use for snpEff | ||
ch_snpeff_cache // channel: [ path(cache) ] (optional) | ||
val_tools_to_use // value: a list of tools to use options are: ["ensemblvep", "snpeff"] | ||
val_sites_per_chunk // value: the amount of variants per scattered VCF | ||
|
||
main: | ||
ch_versions = Channel.empty() | ||
|
||
// Check if val_sites_per_chunk is set and scatter if it is | ||
if(val_sites_per_chunk) { | ||
// | ||
// Prepare the input VCF channel for scattering (split VCFs from custom files) | ||
// | ||
|
||
ch_input = ch_vcf | ||
.multiMap { meta, vcf, tbi, custom_files -> | ||
vcf: [ meta, vcf, tbi ] | ||
custom: [ meta, custom_files ] | ||
} | ||
|
||
// | ||
// Scatter the input VCFs into multiple VCFs. These VCFs contain the amount of variants | ||
// specified by `val_sites_per_chunk`. The lower this value is, the more files will be created | ||
// | ||
|
||
BCFTOOLS_PLUGINSCATTER( | ||
ch_input.vcf, | ||
val_sites_per_chunk, | ||
[], | ||
[], | ||
[], | ||
[] | ||
) | ||
ch_versions = ch_versions.mix(BCFTOOLS_PLUGINSCATTER.out.versions.first()) | ||
|
||
// | ||
// Run the annotation with EnsemblVEP | ||
// | ||
|
||
ch_scatter = BCFTOOLS_PLUGINSCATTER.out.scatter | ||
.map { meta, vcfs -> | ||
// This checks if multiple files were created using the scatter process | ||
// If multiple files are created, a list will be made as output of the process | ||
// So if the output isn't a list, there is always one file and if there is a list, | ||
// the amount of files in the list gets counted by .size() | ||
is_list = vcfs instanceof ArrayList | ||
count = is_list ? vcfs.size() : 1 | ||
[ meta, is_list ? vcfs : [vcfs], count ] | ||
// Channel containing the list of VCFs and the size of this list | ||
} | ||
.transpose(by:1) // Transpose on the VCFs => Creates an entry for each VCF in the list | ||
.combine(ch_input.custom, by: 0) // Re-add the sample specific custom files | ||
.multiMap { meta, vcf, count, custom_files -> | ||
// Define the new ID. The `_annotated` is to disambiguate the VEP output with its input | ||
new_id = "${meta.id}${vcf.name.replace(meta.id,"").tokenize(".")[0]}_annotated" as String | ||
new_meta = meta + [id:new_id] | ||
|
||
// Create channels: one with the VEP input and one with the original ID and count of scattered VCFs | ||
input: [ new_meta, vcf, custom_files ] | ||
count: [ new_meta, meta.id, count ] | ||
} | ||
|
||
ch_vep_input = ch_scatter.input | ||
} else { | ||
// Use the normal input when no scattering has to be performed | ||
ch_vep_input = ch_vcf.map { meta, vcf, tbi, files -> [ meta, vcf, files ] } | ||
} | ||
|
||
// Annotate with ensemblvep if it's part of the requested tools | ||
if("ensemblvep" in val_tools_to_use){ | ||
ENSEMBLVEP_VEP( | ||
ch_vep_input, | ||
val_vep_genome, | ||
val_vep_species, | ||
val_vep_cache_version, | ||
ch_vep_cache, | ||
ch_fasta, | ||
ch_vep_extra_files | ||
) | ||
ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions.first()) | ||
|
||
ch_vep_output = ENSEMBLVEP_VEP.out.vcf | ||
ch_vep_reports = ENSEMBLVEP_VEP.out.report | ||
} else { | ||
ch_vep_output = ch_vep_input.map { meta, vcf, files -> [ meta, vcf ] } | ||
ch_vep_reports = Channel.empty() | ||
} | ||
|
||
// Annotate with snpeff if it's part of the requested tools | ||
if("snpeff" in val_tools_to_use){ | ||
SNPEFF_SNPEFF( | ||
ch_vep_output, | ||
val_snpeff_db, | ||
ch_snpeff_cache | ||
) | ||
ch_versions = ch_versions.mix(SNPEFF_SNPEFF.out.versions.first()) | ||
|
||
ch_snpeff_output = SNPEFF_SNPEFF.out.vcf | ||
ch_snpeff_reports = SNPEFF_SNPEFF.out.report | ||
ch_snpeff_html = SNPEFF_SNPEFF.out.summary_html | ||
ch_snpeff_genes = SNPEFF_SNPEFF.out.genes_txt | ||
} else { | ||
ch_snpeff_output = ch_vep_output | ||
ch_snpeff_reports = Channel.empty() | ||
ch_snpeff_html = Channel.empty() | ||
ch_snpeff_genes = Channel.empty() | ||
} | ||
|
||
// Gather the files back together if they were scattered | ||
if(val_sites_per_chunk) { | ||
// | ||
// Concatenate the VCFs back together with bcftools concat | ||
// | ||
|
||
ch_concat_input = ch_snpeff_output | ||
.join(ch_scatter.count, failOnDuplicate:true, failOnMismatch:true) | ||
.map { meta, vcf, id, count -> | ||
new_meta = meta + [id:id] | ||
[ groupKey(new_meta, count), vcf ] | ||
} | ||
.groupTuple() // Group the VCFs which need to be concatenated | ||
.map { it + [[]] } | ||
|
||
BCFTOOLS_CONCAT( | ||
ch_concat_input | ||
) | ||
ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions.first()) | ||
|
||
// | ||
// Sort the concatenate output (bcftools concat is unable to do this on its own) | ||
// | ||
|
||
BCFTOOLS_SORT( | ||
BCFTOOLS_CONCAT.out.vcf | ||
) | ||
ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first()) | ||
|
||
ch_ready_vcfs = BCFTOOLS_SORT.out.vcf | ||
} else { | ||
ch_ready_vcfs = ch_snpeff_output | ||
} | ||
|
||
// | ||
// Index the resulting bgzipped VCFs | ||
// | ||
|
||
ch_tabix_input = ch_ready_vcfs | ||
.branch { meta, vcf -> | ||
// Split the bgzipped VCFs from the unzipped VCFs (only bgzipped VCFs should be indexed) | ||
bgzip: vcf.extension == "gz" | ||
unzip: true | ||
return [ meta, vcf, [] ] | ||
} | ||
|
||
TABIX_TABIX( | ||
ch_tabix_input.bgzip | ||
) | ||
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) | ||
|
||
ch_vcf_tbi = ch_tabix_input.bgzip | ||
.join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) | ||
.mix(ch_tabix_input.unzip) | ||
|
||
emit: | ||
vcf_tbi = ch_vcf_tbi // channel: [ val(meta), path(vcf), path(tbi) ] | ||
vep_reports = ch_vep_reports // channel: [ path(html) ] | ||
snpeff_reports = ch_snpeff_reports // channel: [ path(csv) ] | ||
snpeff_html = ch_snpeff_html // channel: [ path(html) ] | ||
snpeff_genes = ch_snpeff_genes // channel: [ path(genes) ] | ||
versions = ch_versions // channel: [ versions.yml ] | ||
} |
73 changes: 73 additions & 0 deletions
73
subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/meta.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json | ||
name: vcf_annotate_ensemblvep_snpeff | ||
description: | | ||
Perform annotation with ensemblvep and/or snpeff and bgzip + tabix index the resulting VCF file. This subworkflow uses the scatter-gather method to run VEP/snpEff in parallel to increase throughput. The input VCF is split into multiple smaller VCFs of fixed size, which are annotated separately and concatenated back together to a single output file per sample. Only VCF/BCF outputs are currently supported. | ||
keywords: | ||
- vcf | ||
- annotation | ||
- ensemblvep | ||
- snpeff | ||
modules: | ||
- ensemblvep/vep | ||
- snpeff/snpeff | ||
- tabix/tabix | ||
- bcftools/pluginscatter | ||
- bcftools/concat | ||
input: | ||
- ch_vcf: | ||
description: | | ||
vcf file to annotate | ||
Structure: [ val(meta), path(vcf), path(tbi) ] | ||
- ch_fasta: | ||
description: | | ||
Reference genome fasta file (optional) | ||
Structure: [ val(meta2), path(fasta) ] | ||
- val_vep_genome: | ||
type: string | ||
description: genome to use for ensemblvep | ||
- val_vep_species: | ||
type: string | ||
description: species to use for ensemblvep | ||
- val_vep_cache_version: | ||
type: integer | ||
description: cache version to use for ensemblvep | ||
- ch_vep_cache: | ||
description: | | ||
the root cache folder for ensemblvep (optional) | ||
Structure: [ path(cache) ] | ||
- ch_vep_extra_files: | ||
description: | | ||
any extra files needed by plugins for ensemblvep (optional) | ||
Structure: [ path(file1), path(file2)... ] | ||
- val_snpeff_db: | ||
type: string | ||
description: database to use for snpeff | ||
- ch_snpeff_cache: | ||
description: | | ||
the root cache folder for snpeff (optional) | ||
Structure: [ path(cache) ] | ||
- val_tools_to_use: | ||
type: list | ||
description: The tools to use. Options => '["ensemblvep", "snpeff"]' | ||
- val_sites_per_chunk: | ||
type: integer | ||
description: | | ||
The amount of variants per scattered VCF. | ||
Set this value to `null`, `[]` or `false` to disable scattering. | ||
output: | ||
- vcf_tbi: | ||
description: | | ||
Compressed vcf file + tabix index | ||
Structure: [ val(meta), path(vcf), path(tbi) ] | ||
- reports: | ||
type: file | ||
description: html reports | ||
pattern: "*.html" | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@maxulysse" | ||
- "@matthdsm" | ||
- "@nvnieuwk" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,19 @@ | ||
- name: bcftools query | ||
- name: bcftools query test_bcftools_query | ||
command: nextflow run ./tests/modules/nf-core/bcftools/query -entry test_bcftools_query -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bcftools/query/nextflow.config | ||
tags: | ||
- bcftools/query | ||
- bcftools | ||
files: | ||
- path: output/bcftools/out.txt | ||
md5sum: c32a6d28f185822d8fe1eeb7e42ec155 | ||
md5sum: 51d135de052f3bcef50dcd6b74806094 | ||
- path: output/bcftools/versions.yml | ||
|
||
- name: bcftools query with optional files | ||
- name: bcftools query test_bcftools_query_with_optional_files | ||
command: nextflow run ./tests/modules/nf-core/bcftools/query -entry test_bcftools_query_with_optional_files -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bcftools/query/nextflow.config | ||
tags: | ||
- bcftools/query | ||
- bcftools | ||
files: | ||
- path: output/bcftools/out.txt | ||
md5sum: 5a87e0865df2f0ab2884fc113ec2a70d | ||
md5sum: 1785d1957ba7206df852d0689b91753f | ||
- path: output/bcftools/versions.yml |
Oops, something went wrong.