diff --git a/config/nxf_vcf.config b/config/nxf_vcf.config index 8de7643c..4675298b 100644 --- a/config/nxf_vcf.config +++ b/config/nxf_vcf.config @@ -7,9 +7,9 @@ env { CMD_VEP = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vep-111.0.sif vep" CMD_FILTERVEP = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vep-111.0.sif filter_vep" CMD_STRANGER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/stranger-0.8.1_v2.sif stranger" - CMD_VCFREPORT="apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-report-7.0.3.sif" - CMD_VCFDECISIONTREE = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-decision-tree-5.0.0.sif" - CMD_VCFINHERITANCEMATCHER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-inheritance-matcher-3.2.1.sif" + CMD_VCFREPORT="apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-report-7.2.0.sif" + CMD_VCFDECISIONTREE = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-decision-tree-5.1.1.sif" + CMD_VCFINHERITANCEMATCHER = "apptainer exec --no-mount home --bind \${TMPDIR} ${APPTAINER_CACHEDIR}/vcf-inheritance-matcher-3.3.2.sif" // workaround for SAMtools https://github.com/samtools/samtools/issues/1366#issuecomment-769170935 REF_PATH = ":" @@ -66,7 +66,7 @@ params { expansionhunter_variant_catalog = "${projectDir}/resources/GRCh38/expansionhunter_variant_catalog.json" stranger_catalog = "${projectDir}/resources/GRCh38/variant_catalog_grch38_fixed.json" vep_custom_phylop = "${projectDir}/resources/GRCh38/hg38.phyloP100way.bw" - vep_plugin_clinvar = "${projectDir}/resources/GRCh38/clinvar_20241001-stripped.tsv.gz" + vep_plugin_clinvar = "${projectDir}/resources/GRCh38/clinvar_20241126-stripped.tsv.gz" vep_plugin_gnomad = "${projectDir}/resources/GRCh38/gnomad.total.v4.1.sites.stripped.tsv.gz" vep_plugin_spliceai_indel = "${projectDir}/resources/GRCh38/spliceai_scores.masked.indel.hg38.vcf.gz" vep_plugin_spliceai_snv = "${projectDir}/resources/GRCh38/spliceai_scores.masked.snv.hg38.vcf.gz" @@ -111,7 +111,8 @@ params { include_crams = true max_records = "" max_samples = "" - template = "${projectDir}/resources/vip-report-template-v6.2.0.html" + template = "${projectDir}/resources/vip-report-template-v7.0.2.html" + config = "${projectDir}/resources/vcf_report_config.json" metadata = "${projectDir}/resources/field_metadata.json" GRCh38 { diff --git a/docs/advanced/annotations.md b/docs/advanced/annotations.md index b81d4fa2..94a6cde5 100644 --- a/docs/advanced/annotations.md +++ b/docs/advanced/annotations.md @@ -1,9 +1,15 @@ # Annotations -VIP annotates variant effects and genotype data for samples using a rich set of tools. Annotions can be used to [classify variants using classification trees](classification_trees.md) and displayed in [reports](report_templates.md). + +VIP annotates variant effects and genotype data for samples using a rich set of tools. Annotions can be used +to [classify variants using classification trees](classification_trees.md) and displayed +in [reports](report_templates.md). ## Overview -The table contains annotations available in most output files. Depending on the workflow and the configuration used additional annotations might be available, -check the output file headers for the complete overview. Similarly, some annotations listed below might be missing from your output file depending on the sample sheet content and configuration. + +The table contains annotations available in most output files. Depending on the workflow and the configuration used +additional annotations might be available, +check the output file headers for the complete overview. Similarly, some annotations listed below might be missing from +your output file depending on the sample sheet content and configuration. | annotation | type | source | description | |---------------------------------------------|------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -30,7 +36,7 @@ check the output file headers for the complete overview. Similarly, some annotat | INFO/CSQ/CLIN_SIG | string list | VEP | ClinVar classification(s) (do not use, see [here](https://github.com/Ensembl/ensembl-vep/issues/1213)) | | INFO/CSQ/clinVar_CLNID | integer list | VEP plugin | ClinVar variation identifier | | INFO/CSQ/clinVar_CLNREVSTAT | categorical list | VEP plugin | ClinVar review status for the Variation ID. Categories: practice_guideline, reviewed_by_expert_panel, criteria_provided, _multiple_submitters, _no_conflicts, _single_submitter, _conflicting_interpretations, no_assertion_criteria_provided, no_assertion_provided | -| INFO/CSQ/clinVar_CLNSIG | string | VEP plugin | Clinical significance for this single variant; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_interpretations_of_pathogenicity | +| INFO/CSQ/clinVar_CLNSIG | string | VEP plugin | Clinical significance for this single variant; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_classifications_of_pathogenicity, Other | | INFO/CSQ/clinVar_CLNSIGINCL | string | VEP plugin | Clinical significance for a haplotype or genotype that includes this variant. Reported as pairs of VariationID:clinical significance; multiple values are separated by a vertical bar. Categories: Benign, Likely_benign, Uncertain_significance, Likely_pathogenic, Pathogenic, Conflicting_interpretations_of_pathogenicity | | INFO/CSQ/Codons | string | VEP | Reference and variant codon sequence | | INFO/CSQ/Consequence | string list | VEP | Effect(s) described as Sequence Ontology term(s) | @@ -102,64 +108,114 @@ check the output file headers for the complete overview. Similarly, some annotat | INFO/CSQ/VKGL_CL | string | VEP plugin | VKGL consensus variant classification | ## Details -VIP uses the [Ensemble Effect Predictor](https://github.com/Ensembl/ensembl-vep) to annotate all variants with their consequences. We use VEP with the `refseq` option for the transcripts, and with the flags for `sift` and `polyphen` annotations enabled. + +VIP uses the [Ensemble Effect Predictor](https://github.com/Ensembl/ensembl-vep) to annotate all variants with their +consequences. We use VEP with the `refseq` option for the transcripts, and with the flags for `sift` and `polyphen` +annotations enabled. ### Plugins + Below we describe the other sources which we annotate using the VEP plugin framework. #### CAPICE -[CAPICE](https://github.com/molgenis/capice) is a computational method for predicting the pathogenicity of SNVs and InDels. It is a gradient boosting tree model trained using a variety of genomic annotations used by CADD score and trained on the clinical significance. CAPICE performs consistently across diverse independent synthetic, and real clinical data sets. It ourperforms the current best method in pathogenicity estimation for variants of different molecular consequences and allele frequency. -We run the CAPICE application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the CAPICE output file. +[CAPICE](https://github.com/molgenis/capice) is a computational method for predicting the pathogenicity of SNVs and +InDels. It is a gradient boosting tree model trained using a variety of genomic annotations used by CADD score and +trained on the clinical significance. CAPICE performs consistently across diverse independent synthetic, and real +clinical data sets. It ourperforms the current best method in pathogenicity estimation for variants of different +molecular consequences and allele frequency. + +We run the CAPICE application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from +the CAPICE output file. #### VKGL -The datashare workgroup of VKGL has set up a [central database](https://www.vkgl.nl/nl/diagnostiek/vkgl-datashare-database) to enable mutual sharing of variant classifications through a partly automatic process. An additional goal is the public sharing of these data. The currently publicly available part of the database consists of DNA variant classifications established based on (former) diagnostic questions. -We add the classifications from an export of the database and use a VEP plugin to annotate the VEP output with the classifications from the this file. +The datashare workgroup of VKGL has set up +a [central database](https://www.vkgl.nl/nl/diagnostiek/vkgl-datashare-database) to enable mutual sharing of variant +classifications through a partly automatic process. An additional goal is the public sharing of these data. The +currently publicly available part of the database consists of DNA variant classifications established based on (former) +diagnostic questions. + +We add the classifications from an export of the database and use a VEP plugin to annotate the VEP output with the +classifications from the this file. #### SpliceAI -SpliceAI is an open-source deep learning splicing prediction algorithm that has demonstrated in the past few years its high ability to predict splicing defects caused by DNA variations. -We add the scores from the available precomputed scores of SpliceAI and use a copy of the available [VEP plugin](https://github.com/Ensembl/VEP_plugins/blob/release/109/SpliceAI.pm) to annotate the VEP output with the classifications from the this file. +SpliceAI is an open-source deep learning splicing prediction algorithm that has demonstrated in the past few years its +high ability to predict splicing defects caused by DNA variations. + +We add the scores from the available precomputed scores of SpliceAI and use a copy of the +available [VEP plugin](https://github.com/Ensembl/VEP_plugins/blob/release/109/SpliceAI.pm) to annotate the VEP output +with the classifications from the this file. #### AnnotSV -[AnnotSV](https://lbgi.fr/AnnotSV/) is a program for annotating and ranking structural variations from genomes of several organisms. -We run the AnnotSV application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the AnnotSV output file. +[AnnotSV](https://lbgi.fr/AnnotSV/) is a program for annotating and ranking structural variations from genomes of +several organisms. + +We run the AnnotSV application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from +the AnnotSV output file. #### HPO -A file based on the HPO [phenotype_to_genes.txt](http://purl.obolibrary.org/obo/hp/hpoa/phenotype_to_genes.txt) is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence. + +A file based on the HPO [phenotype_to_genes.txt](http://purl.obolibrary.org/obo/hp/hpoa/phenotype_to_genes.txt) is used +to annotate VEP consequences with the inheritance modes associated with the gene of this consequence. #### Inheritance -A file based on the [CGD database](https://research.nhgri.nih.gov/CGD/) is used to annotate VEP consequences with the inheritance modes associated with the gene of this consequence. + +A file based on the [CGD database](https://research.nhgri.nih.gov/CGD/) is used to annotate VEP consequences with the +inheritance modes associated with the gene of this consequence. #### Grantham -The [Grantham score](https://www.science.org/doi/10.1126/science.185.4154.862) attempts to predict the distance between two amino acids, in an evolutionary sense. A lower Grantham score reflects less evolutionary distance. A higher Grantham score reflects a greater evolutionary distance. -We use a copy of the VEP plugin by Duarte Molha to annotate the VEP output with Grantham scores. +The [Grantham score](https://www.science.org/doi/10.1126/science.185.4154.862) attempts to predict the distance between +two amino acids, in an evolutionary sense. A lower Grantham score reflects less evolutionary distance. A higher Grantham +score reflects a greater evolutionary distance. + +We use a copy of the VEP plugin by Duarte Molha to annotate the VEP output with Grantham scores. #### GADO -[GADO](https://www.nature.com/articles/s41467-019-10649-4/) can be used to prioritize genes based on the HPO terms of a patient.. -We run the GADO commandline application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the scores from the GADO output file. +[GADO](https://www.nature.com/articles/s41467-019-10649-4/) can be used to prioritize genes based on the HPO terms of a +patient.. + +We run the GADO commandline application in the VIP pipeline and use a VEP plugin to annotate the VEP output with the +scores from the GADO output file. #### AlphScore -[AlphScore](https://doi.org/10.5281/zenodo.6288138) is a method to predict the pathogenicity of missense variants using features derived from AlphaFold2. + +[AlphScore](https://doi.org/10.5281/zenodo.6288138) is a method to predict the pathogenicity of missense variants using +features derived from AlphaFold2. We add the available precomputed scores of AlphScore using a custom VEP plugin. #### ncER -[The non-coding essential regulation (ncER)](https://www.nature.com/articles/s41467-019-13212-3) score indicates if a region is likely to be essential in terms of regulation. -The ncER file VIP uses is the version provided by GREEN-VARAN (https://github.com/edg1983/GREEN-VARAN) on Zenodo: https://zenodo.org/records/5636163 + +[The non-coding essential regulation (ncER)](https://www.nature.com/articles/s41467-019-13212-3) score indicates if a +region is likely to be essential in terms of regulation. +The ncER file VIP uses is the version provided by GREEN-VARAN (https://github.com/edg1983/GREEN-VARAN) on +Zenodo: https://zenodo.org/records/5636163 #### ReMM -[The Regulatory Mendelian Mutation (ReMM)](https://charite.github.io/software-remm-score.html) score was created for relevance prediction of non-coding variations (SNVs and small InDels) in the human genome (hg19) in terms of Mendelian diseases. The VEP plugin is build on top of the GREEN-DB dataset (GRCh38) for ReMM scores: https://zenodo.org/records/3955933 + +[The Regulatory Mendelian Mutation (ReMM)](https://charite.github.io/software-remm-score.html) score was created for +relevance prediction of non-coding variations (SNVs and small InDels) in the human genome (hg19) in terms of Mendelian +diseases. The VEP plugin is build on top of the GREEN-DB dataset (GRCh38) for ReMM +scores: https://zenodo.org/records/3955933 #### FATHMM-MKL -[FATHMM-MKL](https://fathmm.biocompute.org.uk/fathmmMKL.htm) predicts the Functional Consequences of Coding and Non-Coding Single Nucleotide Variants (SNVs) - This plugin annotates non-coding scores only, and is build on top of the GREEN-DB dataset (GRCh38) for FATHMM-MKL non coding scores: https://zenodo.org/records/3981121 - #### GREEN-DB constraint scores -[GREEN-DB](https://doi.org/10.1101/2020.09.17.301960) GREEN-DB is a comprehensive collection of 2.4 million regulatory elements in the human genome collected from previously published databases, high-throughput screenings and functional studies. -This plugin annotates the constrain scores only, and is build on top of the GREEN-DB bed files (GRCh38): https://zenodo.org/records/5636209 -GREEN-DB constrains scores are annotated per region type: enhancers, promotors, bivalent, insulators, silencers. If multiple regions of the same type overlap, VIP annotates the highest constraint score. \ No newline at end of file +[FATHMM-MKL](https://fathmm.biocompute.org.uk/fathmmMKL.htm) predicts the Functional Consequences of Coding and +Non-Coding Single Nucleotide Variants (SNVs) +This plugin annotates non-coding scores only, and is build on top of the GREEN-DB dataset (GRCh38) for FATHMM-MKL non +coding scores: https://zenodo.org/records/3981121 + +#### GREEN-DB constraint scores + +[GREEN-DB](https://doi.org/10.1101/2020.09.17.301960) GREEN-DB is a comprehensive collection of 2.4 million regulatory +elements in the human genome collected from previously published databases, high-throughput screenings and functional +studies. +This plugin annotates the constrain scores only, and is build on top of the GREEN-DB bed files ( +GRCh38): https://zenodo.org/records/5636209 +GREEN-DB constrains scores are annotated per region type: enhancers, promotors, bivalent, insulators, silencers. If +multiple regions of the same type overlap, VIP annotates the highest constraint score. \ No newline at end of file diff --git a/docs/advanced/report_templates.md b/docs/advanced/report_templates.md index 08b0b253..811c53bb 100644 --- a/docs/advanced/report_templates.md +++ b/docs/advanced/report_templates.md @@ -1,8 +1,10 @@ # Report templates + VIP outputs a standalone HTML report that can be viewed in any modern browser. The report is based on the input sample sheet information and the output variant `vcf` data. ## Default + As a default VIP uses a report template that is suitable for most analysis: ![Example report](../img/report_example.png) @@ -10,7 +12,9 @@ As a default VIP uses a report template that is suitable for most analysis: *Above: default report template* ## Customization -Using the `vcf.report.template` parameter (see [here](../usage/config.md#parameters)) it is possible to specify a different report template to create reports tailered to your needs. + +Using the `vcf.report.template` parameter (see [here](../usage/config.md#parameters)) it is possible to specify a +different report template to create reports tailered to your needs. The following repositories might be of interest when creating a new report template: @@ -22,4 +26,11 @@ The following repositories might be of interest when creating a new report templ The `vip-report` tool creates reports based on a report template as described in the following repositories: - [vip-report](https://github.com/molgenis/vip-report) -- [vip-utils](https://github.com/molgenis/vip-utils) \ No newline at end of file +- [vip-utils](https://github.com/molgenis/vip-utils) + +## Configuration + +A configuration .json file can be used in combination with a `vcf.report.template` to create reports that e.g. show +specific variant content or variant filters. The allowed contents of a configuration .json file depends on the +`vcf.report.template` used. For the default `vcf.report.template` the configuration options are +described [here](https://github.com/molgenis/vip-report-template). \ No newline at end of file diff --git a/docs/usage/config.md b/docs/usage/config.md index 95940dd1..979a2fbb 100644 --- a/docs/usage/config.md +++ b/docs/usage/config.md @@ -74,7 +74,7 @@ Both the zipped and unzipped fasta should have an index. | sv.cutesv.min_read_len | 500 | Ignores reads that only report alignments with not longer than bp | | sv.cutesv.min_siglength | 10 | Minimum length of SV signal to be extracted | | sv.cutesv.min_size | 30 | Minimum size of SV to be reported | -| sv.cutesv.min_support | 2 | Minimum number of reads that support a SV to be reported. Please note that the default is lower than the default of cuteSV itself to prevent missed SV calls. | +| sv.cutesv.min_support | 2 | Minimum number of reads that support a SV to be reported. Please note that the default is lower than the default of cuteSV itself to prevent missed SV calls. | | sv.cutesv.read_range | 1000 | The interval range for counting reads distribution | | sv.cutesv.report_readid | false | Enable to report supporting read ids for each SV | | sv.cutesv.retain_work_dir | false | Enable to retain temporary folder and files | @@ -142,6 +142,7 @@ Both the zipped and unzipped fasta should have an index. | vcf.report.include_crams | true | allowed values: [true, false]. true: include cram files in the report for showing alignments in the genome browser, false: do not include the crams in the report, no aligments are shown in the genome browser. This will result in a smaller report size. | | vcf.report.max_records | | | | vcf.report.max_samples | | | +| vcf.report.config | | `vcf.report.template` configuration file | | vcf.report.template | | for details, see [here](../advanced/report_templates.md) | | vcf.report.GRCh38.genes | *installed* | | diff --git a/install.sh b/install.sh index 50989e72..dd1261e0 100755 --- a/install.sh +++ b/install.sh @@ -87,9 +87,9 @@ download_files() { urls+=("4d58cc7a4e3e497a245095a62562e27e" "images/spectre-0.2.1-patched_v2.sif") urls+=("8f6e06847776448e004df8b863571109" "images/straglr-1.4.4_vip_v3.sif") urls+=("9c69ac645e04b91c8f480289c536429c" "images/stranger-0.8.1_v2.sif") - urls+=("599813c5c0d547c955bd071ccdf220f8" "images/vcf-decision-tree-5.0.0.sif") - urls+=("f238b75e85e8a097447bad471369d0b2" "images/vcf-inheritance-matcher-3.2.1.sif") - urls+=("87b2d9031b1b8351d2da14dd0095fbea" "images/vcf-report-7.0.3.sif") + urls+=("8575e48d7da8a7f0728d08dc204e42a5" "images/vcf-decision-tree-5.1.1.sif") + urls+=("57a7329781d3cb0e5491c5f84fd49dcd" "images/vcf-inheritance-matcher-3.3.2.sif") + urls+=("9357590531fd4f1af1ab610ddafbdd3b" "images/vcf-report-7.2.0.sif") urls+=("7bffc236a7c65b2b2e2e5f7d64beaa87" "images/vep-111.0.sif") urls+=("82be3c18406e7c027ee4cec83a723d71" "nextflow-24.04.2-all") if [ "${assembly}" == "ALL" ] || [ "${assembly}" == "GRCh37" ]; then @@ -110,8 +110,8 @@ download_files() { urls+=("498c22d840476a757be5f5b0e382f8d6" "resources/GRCh38/GRCh38_ncER_perc.bed.gz.tbi") urls+=("7cc9cafbdde0b00f8007bc2911310b50" "resources/GRCh38/capice_model_v5.1.2-v3.ubj") urls+=("03d4fb2f5fe500daa77c54455626f8f5" "resources/GRCh38/clinical_repeats.bed") - urls+=("52a5ef527d2509d2196329a67b447975" "resources/GRCh38/clinvar_20241001-stripped.tsv.gz") - urls+=("1231b9c88fb19034d313c4e003667238" "resources/GRCh38/clinvar_20241001-stripped.tsv.gz.tbi") + urls+=("8da13608d59915f031beade9c3981c2e" "resources/GRCh38/clinvar_20241126-stripped.tsv.gz") + urls+=("6b1ed6d55e870e37d2bed360abc26fe2" "resources/GRCh38/clinvar_20241126-stripped.tsv.gz.tbi") urls+=("72f12f9ee918878030022c46ec850038" "resources/GRCh38/expansionhunter_variant_catalog.json") urls+=("e4c68d0e98a9b5401542b2e8d5b05e82" "resources/GRCh38/gnomad.total.v4.1.sites.stripped.tsv.gz") urls+=("eebfca693425c159d87479fef26d3774" "resources/GRCh38/gnomad.total.v4.1.sites.stripped.tsv.gz.tbi") @@ -147,7 +147,7 @@ download_files() { # update utils/install.sh when updating inheritance.tsv urls+=("df31eb0fe9ebd9ae26c8d6f5f7ba6e57" "resources/inheritance_20240115.tsv") urls+=("7138e76a38d6f67935699d06082ecacf" "resources/vep/cache/homo_sapiens_refseq_vep_111_GRCh38.tar.gz") - urls+=("78962f0c7c6fe5c63ef7c66b627c95a0" "resources/vip-report-template-v6.2.0.html") + urls+=("e5d17440fc84b49b2fba7a30b500ca93" "resources/vip-report-template-v7.0.2.html") # when modifying urls array, please keep list in 'ls -l' order for ((i = 0; i < ${#urls[@]}; i += 2)); do download_file "${base_url}" "${urls[i+1]}" "${urls[i+0]}" "${output_dir}" "${validate}" diff --git a/modules/vcf/inheritance.nf b/modules/vcf/inheritance.nf index 0ec6a9e7..cdb271c1 100644 --- a/modules/vcf/inheritance.nf +++ b/modules/vcf/inheritance.nf @@ -16,6 +16,7 @@ process inheritance { vcfOutIndex = "${vcfOut}.csi" vcfOutStats = "${vcfOut}.stats" + metadata = params.vcf.classify.metadata probands = meta.probands.collect{ proband -> proband.individual_id}.join(",") pedigree = "${meta.project.id}.ped" pedigreeContent = createPedigree(meta.project.samples) diff --git a/modules/vcf/report.nf b/modules/vcf/report.nf index 5fe006d8..1a78cc96 100644 --- a/modules/vcf/report.nf +++ b/modules/vcf/report.nf @@ -1,5 +1,6 @@ include { basename } from './utils' include { createPedigree } from '../utils' +import groovy.json.JsonOutput process report { label 'vcf_report' @@ -31,11 +32,14 @@ process report { crams = meta.crams ? meta.crams.collect { "${it.individual_id}=${it.cram}" }.join(",") : "" includeCrams = params.vcf.report.include_crams + configJsonStr = new File(params.vcf.report.config).getText('UTF-8').replaceFirst("\\{", "{\"vip\": {\"filter_field\": {\"type\": \"genotype\",\"name\": \"VIPC_S\"},\"params\":" + JsonOutput.toJson(params) + "},") + probands = meta.probands.collect{ proband -> proband.individual_id }.join(",") hpoIds = meta.project.samples.findAll{ sample -> !sample.hpo_ids.isEmpty() }.collect{ sample -> [sample.individual_id, sample.hpo_ids.join(";")].join("/") }.join(",") pedigree = "${meta.project.id}.ped" pedigreeContent = createPedigree(meta.project.samples) + template 'report.sh' stub: diff --git a/modules/vcf/templates/inheritance.sh b/modules/vcf/templates/inheritance.sh index caedfffc..2ed261c4 100644 --- a/modules/vcf/templates/inheritance.sh +++ b/modules/vcf/templates/inheritance.sh @@ -13,6 +13,7 @@ inheritance () { args+=("-jar" "/opt/vcf-inheritance-matcher/lib/vcf-inheritance-matcher.jar") args+=("--input" "!{vcf}_replaced.vcf.gz") args+=("--output" "!{vcfOut}_replaced.vcf.gz") + args+=("--metadata" "!{metadata}") if [ -n "!{pedigree}" ]; then args+=("--pedigree" "!{pedigree}") fi diff --git a/modules/vcf/templates/report.sh b/modules/vcf/templates/report.sh index 891748ad..255c9e06 100644 --- a/modules/vcf/templates/report.sh +++ b/modules/vcf/templates/report.sh @@ -60,6 +60,10 @@ report() { if [ -n "!{template}" ]; then args+=("--template" "!{template}") fi + cat << EOF > "vip_report_config.json" +!{configJsonStr} +EOF + args+=("--template_config" "vip_report_config.json") if [ -n "!{crams}" ] && [[ "!{includeCrams}" == "true" ]]; then args+=("--cram" "!{crams}") fi diff --git a/resources/decision_tree_GRCh38.json b/resources/decision_tree_GRCh38.json index 1b6ea552..507efaf9 100644 --- a/resources/decision_tree_GRCh38.json +++ b/resources/decision_tree_GRCh38.json @@ -65,7 +65,9 @@ { "field": "INFO/CSQ/clinVar_CLNSIG", "operator": "contains_any", - "value": [ "Conflicting_interpretations_of_pathogenicity" ] + "value": [ + "Conflicting_classifications_of_pathogenicity" + ] } ], "outcomeTrue": { @@ -78,7 +80,10 @@ { "field": "INFO/CSQ/clinVar_CLNSIG", "operator": "contains_any", - "value": [ "Likely_pathogenic", "Pathogenic" ] + "value": [ + "Likely_pathogenic", + "Pathogenic" + ] } ], "outcomeTrue": { @@ -91,7 +96,9 @@ { "field": "INFO/CSQ/clinVar_CLNSIG", "operator": "contains_any", - "value": [ "Uncertain_significance" ] + "value": [ + "Uncertain_significance" + ] } ], "outcomeTrue": { @@ -104,7 +111,10 @@ { "field": "INFO/CSQ/clinVar_CLNSIG", "operator": "contains_any", - "value": [ "Likely_benign", "Benign" ] + "value": [ + "Likely_benign", + "Benign" + ] } ], "outcomeTrue": { diff --git a/resources/field_metadata.json b/resources/field_metadata.json index 345ba648..d9da4b0a 100644 --- a/resources/field_metadata.json +++ b/resources/field_metadata.json @@ -1,21 +1,93 @@ { "format": { + "ADFL": { + "label": "Flanking reads", + "description": "Number of flanking reads consistent with the allele", + "numberType": "NUMBER", + "numberCount": 1, + "type": "STRING" + }, + "ADIR": { + "label": "In-repeat reads", + "description": "Number of in-repeat reads consistent with the allele", + "numberType": "NUMBER", + "numberCount": 1, + "type": "STRING" + }, + "ADSP": { + "label": "Spanning reads", + "description": "Number of spanning reads consistent with the allele", + "numberType": "NUMBER", + "numberCount": 1, + "type": "STRING" + }, + "LC": { + "label": "Coverage", + "description": "Locus coverage", + "numberType": "NUMBER", + "numberCount": 1, + "type": "FLOAT" + }, + "REPCI": { + "label": "Repeat CI", + "description": "Confidence interval for the number of repeat units spanned by the allele", + "numberType": "NUMBER", + "numberCount": 1, + "type": "STRING" + }, + "REPCN": { + "label": "Repeats", + "description": "Number of repeat units spanned by the allele", + "numberType": "NUMBER", + "numberCount": 1, + "type": "STRING" + }, "VI": { "label": "Inheritance", "description": "An enumeration of possible inheritance modes based on the pedigree of the sample. Potential values: AD, AD_IP, AR, AR_C, XLR, XLD, YL, MT", "numberType": "OTHER", "separator": ",", "type": "CATEGORICAL", - "categories": [ - "AD", - "AD_IP", - "AR", - "AR_C", - "XLD", - "XLR", - "YL", - "MT" - ] + "categories": { + "AD": { + "label": "AD", + "description": "Autosomal dominant" + }, + "AD_IP": { + "label": "AD_IP", + "description": "Autosomal dominant incomplete penetrance" + }, + "AR": { + "label": "AR", + "description": "Autosomal recessive" + }, + "AR_C": { + "label": "AR_C", + "description": "Autosomal recessive compound hetrozygote" + }, + "XLD": { + "label": "XLD", + "description": "X-linked dominant" + }, + "XLR": { + "label": "XLR", + "description": "X-linked recessive" + }, + "YL": { + "label": "YL", + "description": "Y-linked" + }, + "MT": { + "label": "MT", + "description": "Mitochondrial" + } + } + }, + "VIPC_S": { + "label": "VIP sample classification", + "numberType": "OTHER", + "type": "STRING", + "separator": "," } }, "info": { @@ -41,13 +113,28 @@ "numberType": "NUMBER", "numberCount": 1, "type": "CATEGORICAL", - "categories": [ - "B", - "LB", - "VUS", - "LP", - "P" - ] + "categories": { + "B": { + "label": "B", + "description": "Benign" + }, + "LB": { + "label": "LB", + "description": "Likely benign" + }, + "VUS": { + "label": "VUS", + "description": "Variant of uncertain significance" + }, + "LP": { + "label": "LP", + "description": "Likely pathogenic" + }, + "P": { + "label": "P", + "description": "Pathogenic" + } + } }, "CAPICE_SC": { "label": "CAPICE", @@ -90,34 +177,48 @@ "type": "INTEGER" }, "clinVar_CLNSIG": { - "label": "ClinVar variant", + "label": "ClinVar", "description": "Clinical significance for this single variant", "numberType": "OTHER", - "separator": "/", + "separator": "&", "type": "CATEGORICAL", - "categories": [ - "Benign", - "Likely_benign", - "Uncertain_significance", - "Likely_pathogenic", - "Pathogenic", - "Conflicting_interpretations_of_pathogenicity" - ] + "categories": { + "Benign": { + "label": "B", + "description": "Benign" + }, + "Likely_benign": { + "label": "LB", + "description": "Likely benign" + }, + "Uncertain_significance": { + "label": "VUS", + "description": "Variant of uncertain significance" + }, + "Likely_pathogenic": { + "label": "LP", + "description": "Likely pathogenic" + }, + "Pathogenic": { + "label": "P", + "description": "Pathogenic" + }, + "Conflicting_classifications_of_pathogenicity": { + "label": "Conflict", + "description": "Conflicting classifications of pathogenicity" + }, + "Other": { + "label": "Other", + "description": "Classifications that do not match the common categorical values." + } + } }, "clinVar_CLNSIGINCL": { "label": "ClinVar variant combination", "description": "Clinical significance for a haplotype or genotype that includes this variant", "numberType": "OTHER", "separator": "&", - "type": "CATEGORICAL", - "categories": [ - "Benign", - "Likely_benign", - "Uncertain_significance", - "Likely_pathogenic", - "Pathogenic", - "Conflicting_interpretations_of_pathogenicity" - ] + "type": "STRING" }, "clinVar_CLNREVSTAT": { "label": "ClinVar status", @@ -125,17 +226,52 @@ "numberType": "OTHER", "separator": "&", "type": "CATEGORICAL", - "categories": [ - "practice_guideline", - "reviewed_by_expert_panel", - "criteria_provided", - "_multiple_submitters", - "_no_conflicts", - "_single_submitter", - "_conflicting_interpretations", - "no_assertion_criteria_provided", - "no_assertion_provided" - ] + "categories": { + "practice_guideline": { + "label": "Practice guideline", + "description": "There is a submitted record with a classification from a practice guideline" + }, + "reviewed_by_expert_panel": { + "label": "Reviewed by expert panel", + "description": "There is a submitted record with a classification from an expert panel" + }, + "criteria_provided": { + "label": "Criteria provided", + "description": "Assertion criteria and evidence for the classification (or a public contact) were provided" + }, + "_multiple_submitters": { + "label": "Multiple submitters", + "description": "There are multiple submitted records with a classification" + }, + "_no_conflicts": { + "label": "No conflicts", + "description": "The classifications agree" + }, + "_single_submitter": { + "label": "Single submitter", + "description": "There is a single submitted record with a classification" + }, + "_conflicting_classifications": { + "label": "Conflicting_classifications", + "description": "There are conflicting classifications" + }, + "no_assertion_criteria_provided": { + "label": "No assertion criteria provided", + "description": "There are one or more submitted records with a classification but without assertion criteria and evidence for the classification (or a public contact)" + }, + "no_assertion_provided": { + "label": "No assertion provided" + }, + "no_classification_for_the_single_variant": { + "label": "No classification for the single variant" + }, + "no_classification_provided": { + "label": "No classification provided" + }, + "no_classifications_from_unflagged_records": { + "label": "No classifications from unflagged records" + } + } }, "Consequence": { "label": "Effect", @@ -199,11 +335,20 @@ "numberType": "NUMBER", "numberCount": 1, "type": "CATEGORICAL", - "categories": [ - "Transcript", - "RegulatoryFeature", - "MotifFeature" - ], + "categories": { + "Transcript": { + "label": "Transcript", + "description": "Transcript" + }, + "RegulatoryFeature": { + "label": "Regulatory", + "description": "Regulatory feature" + }, + "MotifFeature": { + "label": "Motif", + "description": "Motif feature" + } + }, "required": true }, "FLAGS": { @@ -215,14 +360,20 @@ }, "GADO_PD": { "label": "GADO_PD", - "description": "The GADO prediction for the relation between the HPO terms of the proband(s) and the gene, HC: high confidence, LC: low confidence.", + "description": "Gene-phenotypes association based on the GeneNetwork Assisted Diagnostic Optimization (GADO) gene prioritization Z-scores, see https://www.genenetwork.nl/gado and https://doi.org/10.1038/s41467-019-10649-4", "numberType": "NUMBER", "numberCount": 1, "type": "CATEGORICAL", - "categories": [ - "LC", - "HC" - ] + "categories": { + "HC": { + "label": "High confidence", + "description": "Gene phenotype relation predicted by GADO with high confidence; Z-Score greater than 5" + }, + "LC": { + "label": "Low confidence", + "description": "Gene phenotype relation predicted by GADO with low confidence; Z-Score greater than 3 but below 5" + } + } }, "GADO_SC": { "label": "GADO_SC", @@ -268,15 +419,22 @@ }, "gnomAD_SRC": { "label": "gnomAD SRC", - "description": "gnomAD source (E=exomes, G=genomes, T=total)", + "description": "gnomAD source", "numberType": "NUMBER", "numberCount": 1, "type": "CATEGORICAL", - "categories": [ - "E", - "G", - "T" - ] + "categories": { + "E": { + "label": "Exomes" + }, + "G": { + "label": "Genomes" + }, + "T": { + "label": "Total", + "description": "Total: exomes & genomes" + } + } }, "gnomAD_QC": { "label": "gnomAD QC", @@ -308,7 +466,7 @@ }, "HPO": { "label": "HPO", - "description": "Human phenotype ontology matches.", + "description": "Human Phenotype Ontology (HPO) terms describing phenotypic abnormalities, see https://hpo.jax.org/ and https://doi.org/10.1093/nar/gkad1005", "numberType": "OTHER", "separator": "&", "type": "STRING" @@ -319,27 +477,82 @@ "numberType": "NUMBER", "numberCount": 1, "type": "CATEGORICAL", - "categories": [ - "LOW", - "MODERATE", - "HIGH", - "MODIFIER" - ], + "categories": { + "LOW": { + "label": "Low" + }, + "MODERATE": { + "label": "Moderate" + }, + "HIGH": { + "label": "High" + }, + "MODIFIER": { + "label": "Modifier" + } + }, "required": true }, "IncompletePenetrance": { "label": "Gene: Inc.Pen.", - "description": "The gene is associated with Incomplete Penetrance (1:true)", + "description": "Is gene associated with incomplete penetrance?", "numberType": "NUMBER", "numberCount": 1, - "type": "STRING" + "type": "CATEGORICAL", + "categories": { + "1": { + "label": "True", + "description": "Gene is associated with incomplete penetrance" + } + }, + "nullValue": { + "label": "False" + } }, "InheritanceModesGene": { "label": "Inh.Pat.", "description": "Inheritance pattern", "numberType": "OTHER", "separator": "&", - "type": "STRING" + "type": "CATEGORICAL", + "categories": { + "AD": { + "label": "AD", + "description": "Autosomal dominant" + }, + "AD_IP": { + "label": "AD_IP", + "description": "Autosomal dominant incomplete penetrance" + }, + "AR": { + "label": "AR", + "description": "Autosomal recessive" + }, + "AR_C": { + "label": "AR_C", + "description": "Autosomal recessive compound hetrozygote" + }, + "XL": { + "label": "XL", + "description": "X-linked" + }, + "XLD": { + "label": "XLD", + "description": "X-linked dominant" + }, + "XLR": { + "label": "XLR", + "description": "X-linked recessive" + }, + "YL": { + "label": "YL", + "description": "Y-linked" + }, + "MT": { + "label": "MT", + "description": "Mitochondrial" + } + } }, "ncER": { "label": "ncER", @@ -447,8 +660,8 @@ "type": "STRING" }, "VIPC": { - "label": "VIP", - "description": "VIP classification", + "label": "VIP classification", + "description": "Variant consequence classification predicted by the Variant Interpretation Pipeline (VIP), see https://vip.molgeniscloud.org/ and https://doi.org/10.1101/2024.04.11.24305656", "numberType": "NUMBER", "numberCount": 1, "type": "STRING", @@ -464,17 +677,32 @@ }, "VKGL_CL": { "label": "VKGL", - "description": "VKGL consensus classification", + "description": "Variant consensus classification from the Vereniging Klinisch Genetische Laboratoriumdiagnostiek (VKGL) datashare database, see https://vkgl.nl/nl/diagnostiek/vkgl-datashare-database", "numberType": "NUMBER", "numberCount": 1, "type": "CATEGORICAL", - "categories": [ - "B", - "LB", - "VUS", - "LP", - "P" - ] + "categories": { + "B": { + "label": "B", + "description": "Benign" + }, + "LB": { + "label": "LB", + "description": "Likely benign" + }, + "VUS": { + "label": "VUS", + "description": "Variant of uncertain significance" + }, + "LP": { + "label": "LP", + "description": "Likely pathogenic" + }, + "P": { + "label": "P", + "description": "Pathogenic" + } + } }, "VKGL_UMCG": { "label": "MVL", @@ -482,15 +710,72 @@ "numberType": "NUMBER", "numberCount": 1, "type": "CATEGORICAL", - "categories": [ - "B", - "LB", - "VUS", - "LP", - "P" - ] + "categories": { + "B": { + "label": "B", + "description": "Benign" + }, + "LB": { + "label": "LB", + "description": "Likely benign" + }, + "VUS": { + "label": "VUS", + "description": "Variant of uncertain significance" + }, + "LP": { + "label": "LP", + "description": "Likely pathogenic" + }, + "P": { + "label": "P", + "description": "Pathogenic" + } + } + } + } + }, + "STR_NORMAL_MAX": { + "label": "STR normal max", + "description": "Maximum number of repeats allowed to call as normal as defined in the Stranger catalogue", + "numberType": "NUMBER", + "numberCount": 1, + "type": "INTEGER" + }, + "STR_PATHOLOGIC_MIN": { + "label": "STR pathologic min", + "description": "Mininum number of repeats required to call as pathologic as defined in the Stranger catalogue", + "numberType": "NUMBER", + "numberCount": 1, + "type": "INTEGER" + }, + "STR_STATUS": { + "label": "STR status", + "description": "Repeat expansion status as decided by Stranger", + "numberType": "NUMBER", + "numberCount": 1, + "type": "CATEGORICAL", + "categories": { + "normal": { + "label": "normal", + "description": "Repeat count is smaller than or equal to the maximum number of repeats allowed to call as normal" + }, + "pre_mutation": { + "label": "pre-mutation", + "description": "Repeat count is greater than the maximum number of repeats allowed to call as normal and smaller than the minimum number of repeats required to call as pathologic" + }, + "full_mutation": { + "label": "full-mutation", + "description": "Repeat count is greater than or equal to the minimum number of repeats required to call as pathologic" } } + }, + "SVTYPE": { + "label": "SV type", + "description": "Type of structural variant", + "numberType": "NUMBER", + "numberCount": 1, + "type": "STRING" } } } diff --git a/resources/vcf_report_config.json b/resources/vcf_report_config.json new file mode 100644 index 00000000..b1955b2f --- /dev/null +++ b/resources/vcf_report_config.json @@ -0,0 +1,595 @@ +{ + "sample_variants": { + "sorts": { + "all": [ + { + "selected": true, + "orders": [ + { + "direction": "desc", + "field": { + "type": "info", + "name": "CSQ/CAPICE_SC" + } + } + ] + }, + { + "selected": false, + "orders": [ + { + "direction": "asc", + "field": { + "type": "info", + "name": "CSQ/CAPICE_SC" + } + } + ] + }, + { + "selected": false, + "orders": [ + { + "direction": "desc", + "field": { + "type": "info", + "name": "CSQ/gnomAD_HN" + } + } + ] + }, + { + "selected": false, + "orders": [ + { + "direction": "asc", + "field": { + "type": "info", + "name": "CSQ/gnomAD_HN" + } + } + ] + } + ], + "str": [ + { + "selected": false, + "orders": [ + { + "direction": "desc", + "field": { + "type": "info", + "name": "STR_NORMAL_MAX" + } + } + ] + }, + { + "selected": false, + "orders": [ + { + "direction": "asc", + "field": { + "type": "info", + "name": "STR_NORMAL_MAX" + } + } + ] + }, + { + "selected": false, + "orders": [ + { + "direction": "desc", + "field": { + "type": "info", + "name": "STR_PATHOLOGIC_MIN" + } + } + ] + }, + { + "selected": false, + "orders": [ + { + "direction": "asc", + "field": { + "type": "info", + "name": "STR_PATHOLOGIC_MIN" + } + } + ] + } + ] + }, + "cells": { + "all": [ + { + "type": "composed", + "name": "locus" + }, + { + "type": "fixed", + "name": "ref" + }, + { + "type": "info", + "name": "SVTYPE", + "label": "Type" + }, + { + "type": "composed", + "name": "genotype", + "label": "Proband" + }, + { + "type": "composed", + "name": "genotype_maternal", + "label": "Mother" + }, + { + "type": "composed", + "name": "genotype_paternal", + "label": "Father" + }, + { + "type": "group", + "fields": [ + { + "type": "info", + "name": "CSQ/Consequence" + }, + { + "type": "composed", + "name": "gene" + }, + { + "type": "composed", + "name": "inheritancePattern" + }, + { + "type": "composed", + "name": "hpo" + }, + { + "type": "info", + "name": "CSQ/HGVSc" + }, + { + "type": "info", + "name": "CSQ/HGVSp" + }, + { + "type": "info", + "name": "CSQ/CAPICE_SC" + }, + { + "type": "composed", + "name": "vipC" + }, + { + "type": "composed", + "name": "vipCS" + }, + { + "type": "composed", + "name": "vkgl" + }, + { + "type": "composed", + "name": "clinVar" + }, + { + "type": "composed", + "name": "gnomAdAf" + }, + { + "type": "info", + "name": "CSQ/gnomAD_HN" + }, + { + "type": "info", + "name": "CSQ/PUBMED" + } + ] + } + ], + "snv": [ + { + "type": "composed", + "name": "locus" + }, + { + "type": "fixed", + "name": "ref" + }, + { + "type": "info", + "name": "SVTYPE" + }, + { + "type": "composed", + "name": "genotype", + "label": "Proband" + }, + { + "type": "composed", + "name": "genotype_maternal", + "label": "Mother" + }, + { + "type": "composed", + "name": "genotype_paternal", + "label": "Father" + }, + { + "type": "group", + "fields": [ + { + "type": "info", + "name": "CSQ/Consequence" + }, + { + "type": "composed", + "name": "gene" + }, + { + "type": "info", + "name": "CSQ/IncompletePenetrance" + }, + { + "type": "composed", + "name": "inheritancePattern" + }, + { + "type": "info", + "name": "CSQ/HGVSc" + }, + { + "type": "info", + "name": "CSQ/HGVSp" + }, + { + "type": "info", + "name": "CSQ/CAPICE_SC" + }, + { + "type": "composed", + "name": "vipC" + }, + { + "type": "composed", + "name": "vipCS" + }, + { + "type": "composed", + "name": "vipCS" + }, + { + "type": "composed", + "name": "vkgl" + }, + { + "type": "info", + "name": "CSQ/clinVar_CLNSIG" + }, + { + "type": "info", + "name": "CSQ/gnomAD_AF" + }, + { + "type": "info", + "name": "CSQ/gnomAD_HN" + }, + { + "type": "info", + "name": "CSQ/PUBMED" + } + ] + } + ], + "str": [ + { + "type": "composed", + "name": "locus" + }, + { + "type": "composed", + "name": "genotype", + "label": "Proband" + }, + { + "type": "composed", + "name": "genotype_maternal", + "label": "Mother" + }, + { + "type": "composed", + "name": "genotype_paternal", + "label": "Father" + }, + { + "type": "genotype", + "name": "REPCI" + }, + { + "type": "genotype", + "name": "LC" + }, + { + "type": "genotype", + "name": "ADSP" + }, + { + "type": "info", + "name": "STR_NORMAL_MAX" + }, + { + "type": "info", + "name": "STR_PATHOLOGIC_MIN" + }, + { + "type": "group", + "fields": [ + { + "type": "composed", + "name": "gene" + }, + { + "type": "info", + "name": "CSQ/Feature" + }, + { + "type": "composed", + "name": "inheritancePattern" + }, + { + "type": "composed", + "name": "vipC" + }, + { + "type": "composed", + "name": "vipCS" + } + ] + } + ] + }, + "filters": { + "all": [ + { + "type": "composed", + "name": "locus" + }, + { + "type": "info", + "name": "CSQ/SYMBOL" + }, + { + "type": "composed", + "name": "hpo", + "label": "Gene-phenotype association (HPO)" + }, + { + "type": "info", + "name": "CSQ/GADO_PD", + "label": "Gene-phenotype association (GADO)" + }, + { + "type": "info", + "name": "CSQ/IncompletePenetrance" + }, + { + "type": "composed", + "name": "vipC" + }, + { + "type": "composed", + "name": "vipCS" + }, + { + "type": "info", + "name": "CSQ/VKGL_CL" + }, + { + "type": "info", + "name": "CSQ/clinVar_CLNSIG" + }, + { + "type": "genotype", + "name": "VI" + }, + { + "type": "composed", + "name": "inheritanceMatch", + "label": "Inheritance match", + "description": "Indication if the inheritance pattern of any gene associated with the variant matches the inheritance pattern suitable for the samples in the family of the sample. For more information see https://github.com/molgenis/vip-inheritance-matcher ." + }, + { + "type": "composed", + "name": "deNovo", + "label": "De Novo", + "description": "Indication if the variant is De Novo or inherited from the parents of the sample." + }, + { + "type": "composed", + "name": "allelicImbalance", + "label": "Allelic Imbalance", + "description": "Sample genotype shows allelic imbalance, calculated by: the allelic depth (AD) value of the first allele divided by the sum of the AD values for the genotype. For hetrozygotic genotypes values below 0.02 or above 0.8 are considered imbalanced. For homozygotic genotypes values between 0.02 and 0.98 are considered imbalanced." + } + ], + "str": [ + { + "type": "composed", + "name": "locus" + }, + { + "type": "info", + "name": "CSQ/HPO", + "label": "Gene-phenotype association (HPO)" + }, + { + "type": "info", + "name": "CSQ/GADO_PD", + "label": "Gene-phenotype association (GADO)" + }, + { + "type": "info", + "name": "CSQ/SYMBOL" + }, + { + "type": "composed", + "name": "vipC" + }, + { + "type": "composed", + "name": "vipCS" + } + ] + }, + "recordsPerPage": { + "str": [ + { + "number": 10 + }, + { + "number": 20, + "selected": true + }, + { + "number": 50 + }, + { + "number": 100 + } + ] + } + }, + "variants": { + "cells": { + "all": [ + { + "type": "composed", + "name": "locus" + }, + { + "type": "fixed", + "name": "id" + }, + { + "type": "fixed", + "name": "ref" + }, + { + "type": "fixed", + "name": "alt" + }, + { + "type": "fixed", + "name": "qual" + }, + { + "type": "fixed", + "name": "filter" + }, + { + "type": "info", + "name": ".*" + } + ] + }, + "filters": { + "all": [ + { + "type": "composed", + "name": "locus" + }, + { + "type": "fixed", + "name": "id" + }, + { + "type": "fixed", + "name": "ref" + }, + { + "type": "fixed", + "name": "alt" + }, + { + "type": "fixed", + "name": "qual" + }, + { + "type": "fixed", + "name": "filter" + }, + { + "type": "info", + "name": ".*" + } + ] + } + }, + "sample_variant": { + "cells": { + "all": [ + { + "type": "composed", + "name": "vipC" + }, + { + "type": "composed", + "name": "vipCS" + }, + { + "type": "info", + "name": "CSQ/((?!VIPC|VIPP).)*" + } + ] + }, + "sample_cells": { + "all": [ + { + "type": "composed", + "name": "genotype", + "label": "Genotype" + }, + { + "type": "genotype", + "name": "((?!GT|VIPC_S|VIPP_S).)*" + } + ] + } + }, + "variant": { + "cells": { + "all": [ + { + "type": "composed", + "name": "vipC" + }, + { + "type": "info", + "name": "CSQ/((?!VIPC|VIPP).)*" + } + ] + } + }, + "sample_variant_consequence": { + "sample_cells": { + "all": [ + { + "type": "composed", + "name": "genotype", + "label": "Genotype" + }, + { + "type": "genotype", + "name": "((?!GT|VIPC_S|VIPP_S).)*" + } + ] + } + }, + "variant_consequence": {} +} diff --git a/utils/apptainer/build.sh b/utils/apptainer/build.sh index 91f22458..857e852f 100644 --- a/utils/apptainer/build.sh +++ b/utils/apptainer/build.sh @@ -96,9 +96,9 @@ main() { images+=("spectre-0.2.1-patched_v2") images+=("stranger-0.8.1_v2") images+=("straglr-1.4.4_vip_v3") - images+=("vcf-decision-tree-5.0.0") - images+=("vcf-inheritance-matcher-3.2.1") - images+=("vcf-report-7.0.3") + images+=("vcf-decision-tree-5.1.1") + images+=("vcf-inheritance-matcher-3.3.2") + images+=("vcf-report-7.2.0") for i in "${!images[@]}"; do echo "---Building ${images[$i]}---" diff --git a/utils/apptainer/def/vcf-decision-tree-5.0.0.def b/utils/apptainer/def/vcf-decision-tree-5.1.1.def similarity index 80% rename from utils/apptainer/def/vcf-decision-tree-5.0.0.def rename to utils/apptainer/def/vcf-decision-tree-5.1.1.def index 4073f10e..de7fee6a 100644 --- a/utils/apptainer/def/vcf-decision-tree-5.0.0.def +++ b/utils/apptainer/def/vcf-decision-tree-5.1.1.def @@ -7,8 +7,8 @@ From: sif/build/openjdk-21.sif %post version_major=5 - version_minor=0 - version_patch=0 + version_minor=1 + version_patch=1 # install apk update @@ -16,7 +16,7 @@ From: sif/build/openjdk-21.sif mkdir -p /opt/vcf-decision-tree/lib curl -Ls -o /opt/vcf-decision-tree/lib/vcf-decision-tree.jar "https://github.com/molgenis/vip-decision-tree/releases/download/v${version_major}.${version_minor}.${version_patch}/vcf-decision-tree.jar" - echo "5c40990aa4f300627e3bed261b2173361db165aea4c5b4aa84012df6277f95be /opt/vcf-decision-tree/lib/vcf-decision-tree.jar" | sha256sum -c + echo "c0687c5a05b0789ed516a0228c072ef8ba7ff0695c4ccd6cc4d8629f40cbe552 /opt/vcf-decision-tree/lib/vcf-decision-tree.jar" | sha256sum -c # cleanup apk del .build-dependencies diff --git a/utils/apptainer/def/vcf-inheritance-matcher-3.2.1.def b/utils/apptainer/def/vcf-inheritance-matcher-3.3.2.def similarity index 81% rename from utils/apptainer/def/vcf-inheritance-matcher-3.2.1.def rename to utils/apptainer/def/vcf-inheritance-matcher-3.3.2.def index 9cefcb39..480ac9f8 100644 --- a/utils/apptainer/def/vcf-inheritance-matcher-3.2.1.def +++ b/utils/apptainer/def/vcf-inheritance-matcher-3.3.2.def @@ -7,8 +7,8 @@ From: sif/build/openjdk-21.sif %post version_major=3 - version_minor=2 - version_patch=1 + version_minor=3 + version_patch=2 # install apk update @@ -16,7 +16,7 @@ From: sif/build/openjdk-21.sif mkdir -p /opt/vcf-inheritance-matcher/lib curl -Ls -o /opt/vcf-inheritance-matcher/lib/vcf-inheritance-matcher.jar "https://github.com/molgenis/vip-inheritance-matcher/releases/download/v${version_major}.${version_minor}.${version_patch}/vcf-inheritance-matcher.jar" - echo "9d40033e69188277ac342844b2e50d4044b824ae92638c9ffe1cf1219e1df699 /opt/vcf-inheritance-matcher/lib/vcf-inheritance-matcher.jar" | sha256sum -c + echo "92331d16c06c00c00b84c3c28aab910ee50b439add0c23e7219515514a2875ae /opt/vcf-inheritance-matcher/lib/vcf-inheritance-matcher.jar" | sha256sum -c # cleanup apk del .build-dependencies diff --git a/utils/apptainer/def/vcf-report-7.0.3.def b/utils/apptainer/def/vcf-report-7.2.0.def similarity index 78% rename from utils/apptainer/def/vcf-report-7.0.3.def rename to utils/apptainer/def/vcf-report-7.2.0.def index a9f2c968..502d1e2e 100644 --- a/utils/apptainer/def/vcf-report-7.0.3.def +++ b/utils/apptainer/def/vcf-report-7.2.0.def @@ -7,8 +7,8 @@ From: sif/build/openjdk-21.sif %post version_major=7 - version_minor=0 - version_patch=3 + version_minor=2 + version_patch=0 # install apk update @@ -16,7 +16,7 @@ From: sif/build/openjdk-21.sif mkdir -p /opt/vcf-report/lib curl -Ls -o /opt/vcf-report/lib/vcf-report.jar "https://github.com/molgenis/vip-report/releases/download/v${version_major}.${version_minor}.${version_patch}/vcf-report.jar" - echo "c081d478d14378136c1fd0691068d7760911602bf308dd48386016a2284e0458 /opt/vcf-report/lib/vcf-report.jar" | sha256sum -c + echo "06d49d172d01c102f483fe5a00b9f7b6e5186b84f7d173597c34f1a6b7034443 /opt/vcf-report/lib/vcf-report.jar" | sha256sum -c # cleanup apk del .build-dependencies diff --git a/utils/create_clinvar.sh b/utils/create_clinvar.sh index b945975a..bb379369 100644 --- a/utils/create_clinvar.sh +++ b/utils/create_clinvar.sh @@ -19,11 +19,59 @@ strip() { echo -e "1 chr1\n2 chr2\n3 chr3\n4 chr4\n5 chr5\n6 chr6\n7 chr7\n8 chr8\n9 chr9\n10 chr10\n11 chr11\n12 chr12\n13 chr13\n14 chr14\n15 chr15\n16 chr16\n17 chr17\n18 chr18\n19 chr19\n20 chr20\n21 chr21\n22 chr22\nX chrX\nY chrY\nMT chrM\n" > "chr_mapping.tmp" bcftools annotate --rename-chrs chr_mapping.tmp --no-version --threads 8 "${input}" |\ bcftools query --print-header --format '%CHROM\t%POS\t%ID\t%REF\t%ALT\t%INFO/CLNSIG\t%INFO/CLNSIGINCL\t%INFO/CLNREVSTAT\n' |\ - bgzip --stdout --compress-level 9 --threads 8 > "${output}" - tabix "${output}" --begin 2 --end 2 --sequence 1 --skip-lines 1 + bgzip --stdout --compress-level 9 --threads 8 > "${output}" + rm "chr_mapping.tmp" } +map(){ + local -r input="${1}" + local -r output="${2}" + + zcat "${input}" | awk 'BEGIN { + FS=OFS="\t"; + mapping["Benign"]="Benign"; + mapping["Likely_benign"]="Likely_benign"; + mapping["Uncertain_significance"]="Uncertain_significance"; + mapping["Likely_pathogenic"]="Likely_pathogenic"; + mapping["Pathogenic"]="Pathogenic"; + mapping["Conflicting_classifications_of_pathogenicity"]="Conflicting_classifications_of_pathogenicity"; + mapping["Benign/Likely_benign"]="Likely_benign"; + mapping["Pathogenic/Likely_pathogenic"]="Likely_pathogenic"; + } + NR==1 { + print + } + NR>1 { + split($6, values, "|"); + new_values = ""; + delete seen; + + for (i in values) { + val = values[i]; + if (val in mapping) { + val = mapping[val]; + } + else{ + val = "Other"; + } + if (!seen[val]++) { + new_values = (new_values == "" ? val : new_values "|" val); + } + } + $6 = new_values; + print + }' | + bgzip --stdout --compress-level 9 --threads 8 > "${output}" + + rm "${input}" +} + +index(){ + local -r output="${1}" + tabix "${output}" --begin 2 --end 2 --sequence 1 --skip-lines 1 +} + validate() { local -r input="${1}" local -r output="${2}" @@ -126,7 +174,9 @@ main() { fi validate "${input}" "${output}" "${assembly}" - strip "${input}" "${output}" "${assembly}" + strip "${input}" "${input}_stripped.tsv" "${assembly}" + map "${input}_stripped.tsv" "${output}" + index "${output}" } main "${@}"