-
Notifications
You must be signed in to change notification settings - Fork 0
/
read.py
155 lines (104 loc) · 5.32 KB
/
read.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import sys
import os
def extractPDBInfo(input1,input2):
""" read the PDB file and chainId to map the residue serial number with residue number & chain info """
keyList = []
valueList = []
pdbTups = ()
pdbInfoMap = {}
index = 0
fileName = open(input1,'r')
if fileName:
for line in fileName:
line = line.strip()
if line.split()[0][0:6] == 'HETATM' and len(line.split()[0]) > 8:
if line.split()[1] == 'CA' and line.split()[3] == input2:
index = index + 1
keyList.append(index)
valueList.append(line.split()[4])
elif line.split()[1] == 'CA' and len(line.split()[3]) > 1 and line.split()[3][0:1] == input2:
index = index + 1
keyList.append(index)
valueList.append(line.split()[3][1:])
if line.split()[0] == 'ATOM' or line.split()[0] == 'HETATM':
if line.split()[2] == 'CA' and line.split()[4] == input2:
index = index + 1
keyList.append(index)
valueList.append(line.split()[5])
elif line.split()[2] == 'CA' and len(line.split()[4]) > 1 and line.split()[4][0:1] == input2:
index = index + 1
keyList.append(index)
valueList.append(line.split()[4][1:])
pdbInfoMap = dict(zip(keyList,valueList))
return pdbInfoMap
def extractBestAlign(input):
""" For the aligned sequence extract the alignment score and chainId of each pdb chain sequence. Alignment score generated by running blastP. """
lineNum = 0
alignIndex = []
identity = []
alignIdentityMap = {}
chainId = ''
fileName = open(input,"r")
if fileName:
for line in fileName:
line = line.strip()
lineNum = lineNum + 1
### PdbId along with chainInfo present in the alignment file on linenum 4 ###
if lineNum == 4:
chainId = line.split("=")[1][1:6]
### lineNumber of the line where score is metnioned in the alignment file
if line[0:5] == 'Score':
alignIndex.append(int(lineNum))
### Store the identitity score for each alignments in the alignment file ###
if line[0:10] == 'Identities':
identity.append(float(line.split("=")[1].split(",")[0].split("(")[1][:-2]))
### For the last line of the alignment file assign dummy alignment identity score of 0.0
alignIndex.append(int(lineNum))
identity.append(0.0)
### Build the alignment identity dictionary by mapping each alignment with the identity score ###
alignIdentityMap = dict(zip(alignIndex,identity))
return alignIndex,alignIdentityMap,chainId
def extractAlignInfo(input1,input2,input3):
lineNum = 0
qIndex = []
qRes = []
qKeyList = []
sIndex = []
sRes = []
sKeyList = []
newList = []
qResMap = {}
sResMap = {}
pdbInfoMap = {}
transcriptPdbMap = {}
qSequence = ''
Sequence = ''
pdbInfoMap = extractPDBInfo(input3[0:4]+'.pdb',input3[4:5])
### read the blast output file and map the aligned query & subject sequences with corresponding alignment position
fileName = open(input1,"r")
if fileName:
for line in fileName:
line = line.strip()
lineNum = lineNum + 1
#### extract aligned sequence along with its start and end position #####
if float(input2[1]) >= 96.0 and lineNum > int(input2[0]) and lineNum < int(input2[2]) and (line[0:5] == 'Query') and (line[0:6] != 'Query='):
qIndex.append(int(line.split()[1]))
qIndex.append(int(line.split()[3]))
qSequence = qSequence + ''.join(line.split()[2].split())
#### extract subject sequence along with its start and end position #####
#if float(input2[1]) >= 96.0 and lineNum > int(input2[0]) and lineNum < int(input2[2]) and (line[0:5] == 'Query') and (line[0:6] != 'Query='):
if float(input2[1]) >= 96.0 and lineNum > int(input2[0]) and lineNum < int(input2[2]) and (line[0:5] == 'Sbjct') and (line[0:6] != 'Sbjct='):
sIndex.append(int(line.split()[1]))
sIndex.append(int(line.split()[3]))
Sequence = Sequence + ''.join(line.split()[2].split())
if qIndex:
qKeyList = range(qIndex[0],qIndex[len(qIndex)-1]+1) # list of residue index for the aligned query sequence
qRes = list(qSequence.strip('[]')) # aligned query sequence residue element
sKeyList = range(sIndex[0],sIndex[0]+len(Sequence)) # lust of residue index for the aligned subject sequence
sRes = list(Sequence.strip('[]')) #aligned subject sequence residue element
for item in qKeyList:
newList.append(pdbInfoMap[item]) #This stores the pdb residue number for each residue index of the aligned query sequence
qResMap = dict(zip(newList,qRes)) #Map aligned query residues with the pdb residue number
#sResMap = dict(zip(sKeyList,sRes))
transcriptPdbMap = dict(zip(sKeyList,newList)) #Map subject residue number to the query residue number
return qResMap,transcriptPdbMap #return query residue map and trancrit to pdb mapping