diff --git a/CHANGELOG.md b/CHANGELOG.md index 33cd4faf95..0a74cd27a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#597](https://github.com/nf-core/sarek/pull/597) - Added tiddit for tumor variant calling - [#600](https://github.com/nf-core/sarek/pull/600) - Added description for UMI related params in schema - [#604](https://github.com/nf-core/sarek/pull/604), [#617](https://github.com/nf-core/sarek/pull/617) - Added full size tests WGS 30x NA12878 +- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--dbnsfp_fields` to allow configuration of fields for the `dbnsfp` `VEP` plugin +- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--dbnsfp_consequence` to allow configuration of consequence for the `dbnsfp` `VEP` plugin +- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--vep_version` to allow more configuration on the vep container definition - [#620](https://github.com/nf-core/sarek/pull/620) - Added checks for sex information when running a CNV tools ### Changed @@ -111,6 +114,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#600](https://github.com/nf-core/sarek/pull/600) - Remove `nf-core lint` warnings - [#602](https://github.com/nf-core/sarek/pull/602) - Fixed bug in `alignment_to_fastq` and added tests - [#609](https://github.com/nf-core/sarek/pull/609) - Remove unused intervals code, reorganize combined intervals file +- [#613](https://github.com/nf-core/sarek/pull/613) - Fixed filenames for `dbnsfp` and `SpliceAI` `VEP` plugin - [#615](https://github.com/nf-core/sarek/pull/615) - Fix ASCAT igenomes file paths - [#619](https://github.com/nf-core/sarek/pull/619) - Fix issue with checking samplesheet content with AWS diff --git a/conf/igenomes.config b/conf/igenomes.config index 24665185a8..9273c9090b 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -35,6 +35,7 @@ params { vep_cache_version = 105 vep_genome = 'GRCh37' vep_species = 'homo_sapiens' + vep_version = '104.3' } 'GATK.GRCh38' { ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_alleles_hg38.zip" @@ -64,6 +65,7 @@ params { vep_cache_version = 105 vep_genome = 'GRCh38' vep_species = 'homo_sapiens' + vep_version = '104.3' } 'Ensembl.GRCh37' { bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" @@ -92,6 +94,7 @@ params { vep_cache_version = 102 vep_genome = 'GRCm38' vep_species = 'mus_musculus' + vep_version = '104.3' } 'TAIR10' { bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" @@ -116,6 +119,7 @@ params { vep_cache_version = 105 vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' + vep_version = '104.3' } 'CanFam3.1' { bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" diff --git a/conf/modules.config b/conf/modules.config index 5615d5d618..5f1b330cba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1142,17 +1142,18 @@ process{ // VEP if (params.tools && (params.tools.contains('vep') || params.tools.contains('merge'))) { withName: 'ENSEMBLVEP' { - // If just VEP: _VEP.ann.vcf - ext.prefix = { "${vcf.baseName.minus(".vcf")}_VEP" } ext.args = [ '--everything --filter_common --per_gene --total_length --offline --format vcf', - (params.vep_dbnsfp && params.dbnsfp) ? '--plugin dbNSFP,dbNSFP.gz,rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF' : '', - (params.vep_loftee) ? '--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-104.3/share/ensembl-vep-104.3-0' : '', - (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? '--plugin SpliceAI,snv=spliceai_scores.raw.snv.hg38.vcf.gz,indel=spliceai_scores.raw.indel.hg38.vcf.gz' : '', - (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', - (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf' + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf' ].join(' ').trim() - if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:104.3.${params.vep_genome}" : "nfcore/vep:104.3.${params.genome}" } + // If just VEP: _VEP.ann.vcf + ext.prefix = { "${vcf.baseName.minus(".vcf")}_VEP" } + if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:${params.vep_version}.${params.vep_genome}" : "nfcore/vep:${params.vep_version}.${params.genome}" } publishDir = [ [ mode: params.publish_dir_mode, @@ -1171,7 +1172,7 @@ process{ // SNPEFF THEN VEP if (params.tools && params.tools.contains('merge')) { withName: ".*:ANNOTATION_MERGE:ENSEMBLVEP" { - // If megre: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab + // If merge: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab ext.prefix = { "${vcf.baseName.minus(".ann.vcf")}_VEP" } } } diff --git a/conf/test.config b/conf/test.config index b63be13f46..f2cd22c0c6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -41,6 +41,7 @@ params { vep_cache_version = 104 vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' + vep_version = '104.3' // Ignore params that will throw warning through params validation schema_ignore_params = "genomes,test_data" diff --git a/main.nf b/main.nf index 70a4c156f8..445e7d66ad 100644 --- a/main.nf +++ b/main.nf @@ -52,6 +52,7 @@ params.pon_tbi = WorkflowMain.getGenomeAttribute(params, 'pon_tbi' params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db') params.snpeff_genome = WorkflowMain.getGenomeAttribute(params, 'snpeff_genome') params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version') +params.vep_version = WorkflowMain.getGenomeAttribute(params, 'vep_version') params.vep_genome = WorkflowMain.getGenomeAttribute(params, 'vep_genome') params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_species') diff --git a/nextflow.config b/nextflow.config index 14c1c8c64d..ea94858fb8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -76,6 +76,8 @@ params { vep_dbnsfp = null // dbnsfp plugin disabled within VEP dbnsfp = null // No dbnsfp processed file dbnsfp_tbi = null // No dbnsfp processed file index + dbnsfp_consequence = null // No default consequence for dbnsfp plugin + dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin vep_loftee = null // loftee plugin disabled within VEP vep_spliceai = null // spliceai plugin disabled within VEP spliceai_snv = null // No spliceai_snv file diff --git a/nextflow_schema.json b/nextflow_schema.json index 4fbade381a..4c1b841bb2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -325,7 +325,7 @@ "type": "string", "fa_icon": "fas fa-database", "description": "Path to dbNSFP processed file.", - "help_text": "To be used with `--vep_dbnsfp`.", + "help_text": "To be used with `--vep_dbnsfp`.\ndbNSFP files and more information are available at https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp and https://sites.google.com/site/jpopgen/dbNSFP/", "hidden": true }, "dbnsfp_tbi": { @@ -335,6 +335,21 @@ "help_text": "To be used with `--vep_dbnsfp`.", "hidden": true }, + "dbnsfp_consequence": { + "type": "string", + "fa_icon": "fas fa-database", + "description": "Consequence to annotate with", + "help_text": "To be used with `--vep_dbnsfp`.\nThis params is used to filter/limit outputs to a specific effect of the variant.\nThe set of consequence terms is defined by the Sequence Ontology and an overview of those used in VEP can be found here: https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html\nIf one wants to filter using several consequences, then separate those by using '&' (i.e. 'consequence=3_prime_UTR_variant&intron_variant'.", + "hidden": true + }, + "dbnsfp_fields": { + "type": "string", + "fa_icon": "fas fa-database", + "description": "Fields to annotate with", + "default": "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF", + "help_text": "To be used with `--vep_dbnsfp`.\nThis params can be used to retrieve individual values from the dbNSFP file. The values correspond to the name of the columns in the dbNSFP file and are separated by comma.\nThe column names might differ between the different dbNSFP versions. Please check the Readme.txt file, which is provided with the dbNSFP file, to obtain the correct column names. The Readme file contains also a short description of the provided values and the version of the tools used to generate them.\nDefault value are explained below;/nrs_dbSNP - rs number from dbSNP/nHGVSc_VEP - HGVS coding variant presentation from VEP. Multiple entries separated by ';', corresponds to Ensembl_transcriptid/nHGVSp_VEP - HGVS protein variant presentation from VEP. Multiple entries separated by ';', corresponds to Ensembl_proteinid/n1000Gp3_EAS_AF - Alternative allele frequency in the 1000Gp3 East Asian descendent samples/n1000Gp3_AMR_AF - Alternative allele counts in the 1000Gp3 American descendent samples/nLRT_score - Original LRT two-sided p-value (LRTori), ranges from 0 to 1/nGERP++_RS - Conservation score. The larger the score, the more conserved the site, ranges from -12.3 to 6.17/ngnomAD_exomes_AF - Alternative allele frequency in the whole gnomAD exome samples/n.", + "hidden": true + }, "vep_loftee": { "type": "boolean", "fa_icon": "fas fa-database", @@ -401,6 +416,14 @@ "description": "Path to VEP cache.", "help_text": "To be used with `--annotation_cache`.", "hidden": true + }, + "vep_out_format": { + "type": "string", + "default": "vcf", + "description": "VEP output-file format.", + "enum": ["json", "tab", "vcf"], + "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", + "fa_icon": "fas fa-table" } } }, @@ -425,13 +448,12 @@ "ascat_chromosomes": { "type": "string", "fa_icon": "fa-solid fa-text", - "default": "'c(1:22, 'X')'", + "default": "c(1:22, 'X')", "help_text": "Specify specific chromosomes to run ASCAT on, i.e 'c('21', '22')'." }, "ascat_genome": { "type": "string", "fa_icon": "fa-solid fa-text", - "default": "hg38", "description": "ASCAT genome.", "help_text": "Must be set to run ASCAT, either hg19 or hg38. If you use AWS iGenomes, this has already been set for you appropriately." }, @@ -589,13 +611,11 @@ "description": "VEP cache version.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, - "vep_out_format": { + "vep_version": { "type": "string", - "default": "vcf", - "description": "VEP output-file format.", - "enum": ["json", "tab", "vcf"], - "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", - "fa_icon": "fas fa-table" + "fa_icon": "fas fa-tag", + "description": "VEP version.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "save_reference": { "type": "boolean",