Skip to content

Commit

Permalink
Merge pull request #18 from Leon-Bichmann/feature/vcf_to_fasts
Browse files Browse the repository at this point in the history
dockerImage added
  • Loading branch information
b-schubert authored Jul 20, 2018
2 parents d1c5bcf + 7a4235d commit 15f43d2
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 8 deletions.
4 changes: 2 additions & 2 deletions knime/descriptors/variants2proteins.ctd
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0" ?>
<tool name="Variants2Proteins" version="1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://github.com/genericworkflownodes/CTDopts/raw/master/schemas/CTD_0_3.xsd">
<manual>Variants2Proteins consumes a VCF File and generates all possible neoantigen proteins based on the annotated variants contained in the VCF file by extracting the annotated transcript sequences from Ensemble and integrating the variants. Optionally, it consumes a text file, containing gene IDs of the reference system used for annotation, which are used as filter during the neoantigen generation. Additionally the user can specify whether frameshift mutations, or deletions and insertions, besides single point mutations, should be considered. NeoEpitopePrediction currently supports annotations for GRCh37 and GRCh38 only.
<manual>Variants2Proteins consumes a VCF File and generates all possible neoantigen proteins based on the annotated variants contained in the VCF file by extracting the annotated transcript sequences from Ensemble and integrating the variants. Optionally, it consumes a text file, containing gene IDs of the reference system used for annotation, which are used as filter during the neoantigen generation. Additionally the user can specify whether frameshift mutations, or deletions and insertions, besides single point mutations, should be considered. Variants2Proteins currently supports annotations for GRCh37 and GRCh38 only.

Please cite the original publication of the used prediction method alongside ImmunoNodes.
</manual>
<description>Variants2Proteins consumes a VCF File and generates all possible neoantigen proteins based on the annotated variants contained in the VCF file by extracting the annotated transcript sequences from Ensemble and integrating the variants. Optionally, it consumes a text file, containing gene IDs of the reference system used for annotation, which are used as filter during the neoantigen generation. Additionally the user can specify whether frameshift mutations, or deletions and insertions, besides single point mutations, should be considered. NeoEpitopePrediction currently supports annotations for GRCh37 and GRCh38 only.
<description>Variants2Proteins consumes a VCF File and generates all possible neoantigen proteins based on the annotated variants contained in the VCF file by extracting the annotated transcript sequences from Ensemble and integrating the variants. Optionally, it consumes a text file, containing gene IDs of the reference system used for annotation, which are used as filter during the neoantigen generation. Additionally the user can specify whether frameshift mutations, or deletions and insertions, besides single point mutations, should be considered. Variants2Proteins currently supports annotations for GRCh37 and GRCh38 only.

Please cite the original publication of the used prediction method alongside ImmunoNodes.
</description>
Expand Down
1 change: 1 addition & 0 deletions knime/plugin.properties
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ tool.CleavagePrediction.dockerImage=aperim/immunonodes
tool.EpitopeConservation.dockerImage=aperim/immunonodes
tool.Distance2SelfCalculation.dockerImage=aperim/immunonodes
tool.Distance2SelfGeneration.dockerImage=aperim/immunonodes
tool.Variants2Proteins.dockerImage=aperim/immunonodes
17 changes: 11 additions & 6 deletions src/variants2proteins.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import time
import sys
import argparse
import logging

from Fred2.Core import Protein, Peptide, Allele, MutationSyntax, Variant
from Fred2.Core.Variant import VariationType
Expand Down Expand Up @@ -88,9 +89,14 @@ def get_type(ref, alt):
isSynonymous = False

for co in info.split(","):
#Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|TSL|APPRIS|SIFT|PolyPhen|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE">
_,var_type,_,gene,_,transcript_type,transcript_id,_,_,_,_,_,_,transcript_pos,prot_pos,aa_mutation = co.strip().split("|")[:16]
HGNC_ID=co.strip().split("|")[22]
#skip additional info fields without annotation
try:
#Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|TSL|APPRIS|SIFT|PolyPhen|AF|AFR_AF|AMR_AF|EAS_AF|EUR_AF|SAS_AF|AA_AF|EA_AF|gnomAD_AF|gnomAD_AFR_AF|gnomAD_AMR_AF|gnomAD_ASJ_AF|gnomAD_EAS_AF|gnomAD_FIN_AF|gnomAD_NFE_AF|gnomAD_OTH_AF|gnomAD_SAS_AF|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE">
_,var_type,_,gene,_,transcript_type,transcript_id,_,_,_,_,_,_,transcript_pos,prot_pos,aa_mutation = co.strip().split("|")[:16]
HGNC_ID=co.strip().split("|")[22]
except ValueError:
logging.warning("INFO field in different format in line: {}, skipping...".format(str(i)))
continue

#pass every other feature type except Transcript (RegulatoryFeature, MotifFeature.)
#pass genes that are uninterresting for us
Expand All @@ -102,9 +108,8 @@ def get_type(ref, alt):
#generate mutation syntax

#positioning in Fred2 is 0-based!!!
if transcript_pos != "":
coding[transcript_id] = MutationSyntax(transcript_id, int(transcript_pos)-1,
-1 if prot_pos == "" else int(prot_pos)-1, co, "", geneID=HGNC_ID)
if transcript_pos != "" and '?' not in transcript_pos:
coding[transcript_id] = MutationSyntax(transcript_id, int(transcript_pos.split("-")[0])-1, -1 if prot_pos == "" else int(prot_pos.split("-")[0])-1, co, "", geneID=HGNC_ID)

#is variant synonymous?
isSynonymous = any(t == "synonymous_variant" for t in var_type.split("&"))
Expand Down

0 comments on commit 15f43d2

Please sign in to comment.