-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreadVCF.py
98 lines (85 loc) · 2.8 KB
/
readVCF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import sys,re
def readVCFLine(line):
if line[0] == "#":
return(None)
variation = line.rstrip().split("\t")
event_type=""
chrA=variation[0]
posA=int(variation[1]);
posB=0;
description ={}
INFO=variation[7].split(";");
for tag in INFO:
tag=tag.split("=")
if(len(tag) > 1):
description[tag[0]]=tag[1];
format={}
format_keys={}
if len(variation) > 8:
format_string=variation[8].split(":")
i = 0;
for key in format_string:
format_keys[i]=key
format[key]=[]
i += 1
format_fields=variation[9:]
for sample in format_fields:
i=0
format_string= sample.split(":")
for i in range(0,len(format_keys)):
format[format_keys[i]].append(format_string[i])
i += 1
#Delly translocations
if("TRA" in variation[4]):
event_type="BND"
chrB=description["CHR2"]
posB=int(description["END"]);
if chrA > chrB:
chrT= chrA
chrA = chrB
chrB= chrT
tmpPos=posB
posB=posA
PosA=tmpPos
#intrachromosomal variant
elif(not "]" in variation[4] and not "[" in variation[4]):
chrB=chrA;
posB=int(description["END"]);
#sometimes the fermikit intra chromosomal events are inverted i.e the end pos is a lower position than the start pos
if(posB < posA):
tmp=posB
posB=posA
posA=tmp
event_type=variation[4].strip("<").rstrip(">");
if "<" in variation[4] and ">" in variation[4]:
if "DUP" in event_type:
event_type="DUP"
if "INS" in event_type:
event_type="BND"
else:
if "SVTYPE" in description:
event_type=description["SVTYPE"];
if "INS" in event_type:
event_type = "BND"
#if the variant is given as a breakpoint, it is stored as a precise variant in the db
else:
B=variation[4];
B=re.split("[],[]",B);
for string in B:
if string.count(":"):
lst=string.split(":");
chrB=lst[0]
posB=int(lst[1]);
if chrA > chrB:
chrT = chrA
chrA = chrB
chrB = chrT
tmpPos=posB
posB=posA
posA=tmpPos
elif chrA == chrB and posA > posB:
tmpPos=posB
posB=posA
posA=tmpPos
event_type="BND"
return( chrA, posA, chrB, posB,event_type,description,format);