-
Notifications
You must be signed in to change notification settings - Fork 0
/
extractFrustrationInfo.py
416 lines (258 loc) · 13 KB
/
extractFrustrationInfo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
"""extractFrustrationInfo
Usage:
extractFrustrationInfo.py -I <mappedSNPInfo> -nd <nativePDBDir> -md <mutPDBDir> -F <frstnExecDir> -P <pbdSeqDir> -O <frustrationOutFile>
extractFrustrationInfo.py (-h | --help)
Options:
-h --help Show this screen.
"""
from docopt import docopt
import sys
import os
import subprocess
import glob
from collections import OrderedDict
def extractChainInfo(input):
""" Read the pdb file to extract the chainId info and map it to each residue belonging to that particular chain"""
chainId = []
chainIdnr = []
keyList = []
valueList = []
chainInfoMap = {}
pdbFile = input
try:
fileName = open(pdbFile,"r")
if fileName:
for line in fileName:
if line.split()[0] == 'ATOM':
chainId.append(line.split()[4][0])
chainIdnr = list(OrderedDict.fromkeys(chainId))
for index in range(0,len(chainIdnr)):
keyList.append(index+1)
valueList.append(chainIdnr[index])
chainInfoMap = dict(zip(keyList,valueList))
except IOError:
print 'There is no file named', pdbFile
print chainInfoMap
return chainInfoMap
def extractChainLength(inputSeq):
""" extract the sequence of the PDB file and store it in a FASTA format file. """
sequence = ''
pdbSeq = arguments["<pbdSeqDir>"]+'pdb_seq.py'
pdbSeq = pdbSeq + ' '+inputSeq[:-4]+ ' > ' + inputSeq[:-4]+'.fasta'
p1 = subprocess.Popen(pdbSeq,shell=True)
p1.communicate()
fileInp = open(inputSeq[:-4]+'.fasta','r')
if fileInp:
for line in fileInp:
line = line.strip()
if line[0:1] != '>' and line[0:1] !=' ':
sequence = sequence + line.strip()
removeFASTA = 'rm '+inputSeq[:-4]+'.fasta'
p2 = subprocess.Popen(removeFASTA,shell=True)
p2.communicate()
return len(sequence)
def extractResNumFrustration(inputPDB,resIndex,flag):
""" extract the residule level frustration index for each mutated residue position """
RMFI = "NA"
BMFI = "NA"
AA = "NA"
### frustration output file name ###
furstrationFileName = './'+flag+'/'+inputPDB+'.pdb_res_renum'
try:
fileName = open(furstrationFileName,"r")
if fileName:
for index in range(1,2):
fileName.next()
for line in fileName:
line = line.strip()
if not line:
RMFI = "NA"
BMFI = "NA"
#''' extract relevant info if the residue number matches'''
elif int(''.join(filter(lambda x: x.isdigit(), line.split()[0]))) == int(resIndex):
RMFI = line.split()[2] #residue level rustration index
BMFI = line.split()[1] #burial frustration index
AA = line.split()[3] # Amino acid identity
except IOError:
print 'There is no file named', furstrationFileName
return RMFI,BMFI,AA
def calcFrustration(inputPDB,chainId,inputFlag,arguments):
""" run residue level frustration calculation for the given PDB and store the output in relevant directory """
chainLength = 0
residueFrustration = 0.0
burialFrustration = 0.0
configFrustration = 0.0
AccessSurfaceArea = ()
pdbSubset = arguments["<pbdSeqDir>"]+'pdb_subset.py'
FrustMeter = arguments["<frstnExecDir>"]
nativePDBDir = arguments["<nativePDBDir>"]
mutatedPDBDir = arguments["<mutPDBDir>"]
''' for the native PDB frustration output gets stored in the native subdirectory'''
if inputFlag == 'native':
nativeInput = inputPDB.split('/')[-1:][0][:-4]+'.pdb_'+chainId+'.pdb' #nativePdb file
FNULL = open(os.devnull, 'w')
''' run pdbTool to extract coOrdinates for the given pdbchain and store in pdb format '''
pdbFile = inputPDB[0:8]
mkNative = 'mkdir ./native'
print mkNative
p00 = subprocess.Popen(mkNative,stdout=FNULL,stderr=subprocess.STDOUT,shell=True)
p00.communicate()
if pdbFile :
pdbNativeSubunit = 'python '+ pdbSubset + ' '+inputPDB+ ' -c '+ chainId
print pdbNativeSubunit
p0 = subprocess.Popen(pdbNativeSubunit,shell=True)
p0.communicate()
''' calculate chain length '''
chainLength = extractChainLength(nativePDBDir+nativeInput)
if chainLength <= 10000:
#''' Move the native PDB file from the original native directory '''
moveNative = 'cp '+nativePDBDir+nativeInput+' .'
print moveNative
p0 = subprocess.Popen(moveNative,shell=True)
p0.communicate()
''' Run frustration code on the native PDB'''
nativeFrustratometer = 'sh '+FrustMeter + ' ' +nativeInput
print nativeFrustratometer
p1 = subprocess.Popen(nativeFrustratometer,stdout=FNULL,stderr=subprocess.STDOUT,shell=True)
p1.communicate()
''' Move the residue level frutsration output for the given PDB to the native directory'''
nativeFindexOut = 'mv ./'+nativeInput+'.done/'+nativeInput+'_res_renum ./native/'
print nativeFindexOut
p2 = subprocess.Popen(nativeFindexOut,shell=True)
p2.communicate()
''' Move the PDB file to the native subdirectory directory'''
nativePdbOut = 'mv '+nativeInput+' ./native/'
print nativePdbOut
p4 = subprocess.Popen(nativePdbOut,shell=True)
p4.communicate()
''' Delete the frustration code generated ouput directory to save space and get rid of other files'''
removeNatFrustration = 'rm -r ' +nativeInput+'*'
print removeNatFrustration
p5 = subprocess.Popen(removeNatFrustration,shell=True)
p5.communicate()
#''' for the mutated PDB, frustration output gets stored in the mutated subdirectory'''
elif inputFlag == 'mutated':
mutPDB = inputPDB+'_'+chainId+'.pdb'
mutInput = inputPDB.split('/')[-1:][0]+'_'+chainId+'.pdb'
FNULL = open(os.devnull, 'w')
mkMutated = 'mkdir ./mutated'
print mkMutated
p00 = subprocess.Popen(mkMutated,stdout=FNULL,stderr=subprocess.STDOUT,shell=True)
p00.communicate()
if mutPDB:
#''' calculate chain length for mutated PDB '''
chainLength = extractChainLength(mutatedPDBDir+mutInput)
if chainLength <= 10000:
pdbSubunit = 'python '+ pdbSubset + ' '+inputPDB+ ' -c '+ chainId
print pdbSubunit
p01 = subprocess.Popen(pdbSubunit,shell=True)
p01.communicate()
moveMut = 'cp '+mutPDB+' .'
print moveMut
p0 = subprocess.Popen(moveMut,shell=True)
p0.communicate()
''' Run frustration code on the mutated PDB'''
runFrustratometer = FrustMeter + ' ' +mutInput
print runFrustratometer
p02 = subprocess.Popen(runFrustratometer,stdout=FNULL,stderr=subprocess.STDOUT,shell=True)
p02.communicate()
''' Move the residue level frutsration output for the mutated PDB to the mutated directory'''
mutFindexOut = 'mv ./'+mutInput+'.done/'+mutInput+'_res_renum ./mutated/'
print mutFindexOut
p03 = subprocess.Popen(mutFindexOut,shell=True)
p03.communicate()
''' Move the mutated PDB file to the mutated subdirectory directory'''
mutPdbOut = 'cp '+mutInput+' ./mutated/'
print mutPdbOut
p05 = subprocess.Popen(mutPdbOut,shell=True)
p05.communicate()
''' Delete the frustration code generated ouput directory to save space and get rid of other files'''
removeMutFrustration = 'rm -r ' +mutInput+'*'
print removeMutFrustration
p06 = subprocess.Popen(removeMutFrustration,shell=True)
p06.communicate()
def extractFIndexVal(arguments):
''' extract out the residue level frustration index along with other info '''
residueMAP = {'NA':'UNW1','X':'UNW','A':'ALA','C':'CYS','D':'ASP','R':'ARG','N':'ASN','E':'GLU','Q':'GLN','G':'GLY','H':'HIS','I':'ILE','L':'LEU','K':'LYS','M':'MET','F':'PHE','P':'PRO','S':'SER','T':'THR','W':'TRP','Y':'TYR','V':'VAL'}
nativeAA = 'NA'
mutAA = 'NA'
MAF = 0.0
inputInfoFile = arguments["<mappedSNPInfo>"]
frstnOutFile = arguments["<frustrationOutFile>"]
nativePDBDir = arguments["<nativePDBDir>"]
mutatedPDBDir = arguments["<mutPDBDir>"]
### open output file in the append mode ###
outputFile = open(frstnOutFile,'a')
### open the ENST file to read mutated and Native PDB along with snpInfo ###
fileName = open(inputInfoFile,"r")
if fileName:
for line in fileName:
line = line.strip()
inputFileMutated = mutatedPDBDir+'.'.join(line.split()[2].split('.')[0:2])+line.split()[0][4:]+'.pdb' #mutated pdb file name
inputFileNative = nativePDBDir+line.split()[2][0:4]+'.pdb' #native pdb file name
mutResIndex = line.split()[2].split('.')[1][6:] #residue number of the mutated residue
print inputFileMutated
chainIdMap = extractChainInfo(inputFileMutated)
if chainIdMap:
print inputFileNative
nativeInput = inputFileNative.split('/')[-1:][0]+'_'+chainIdMap[int(inputFileMutated.split('/')[-1:][0].split('.')[2])] #frustation output filename for native
mutInput = inputFileMutated.split('/')[-1:][0]+'_'+chainIdMap[int(inputFileMutated.split('/')[-1:][0].split('.')[2])] # frustration output file name after mutation
print nativeInput
### Extract the residue level frustration index, burial frustration and native amoni acid identity###
nativeResFrustration,nativeBuriedFrustration,nativeAA = extractResNumFrustration(nativeInput,mutResIndex,'native')
### Extract the residue level frustration index, burial frustration and mutated amino acid identity###
mutResFrustration,mutBuriedFrustration,mutAA = extractResNumFrustration(mutInput,mutResIndex,'mutated')
outputFile.write(line.split()[0]+'\t'+line.split()[2]+'\t'+line.split()[1]+'\t'+residueMAP[nativeAA]+'\t'+residueMAP[mutAA]+'\t'+str(nativeResFrustration)+'\t'+str(mutResFrustration)+'\t'+str(nativeBuriedFrustration)+'\t'+str(mutBuriedFrustration))
outputFile.write('\n')
def returnPDBMap(mappedSNVInp,arguments):
''' return pdb map with nativePdbId,mutatedPdbId along with the chainInfo'''
chainIdMap = {}
nativePdbMap = {}
mutatedPdbMap = {}
nativePDB = []
mutatedPDB = []
chainIdList = []
nativePDBDir = arguments["<nativePDBDir>"]
mutatedPDBDir = arguments["<mutPDBDir>"]
fileName = open(mappedSNVInp,"r")
if fileName:
for line in fileName:
line = line.strip()
''' list of mutated pdbIds'''
inputFileMutated = mutatedPDBDir+'.'.join(line.split()[2].split('.')[0:2])+line.split()[0][4:]+'.pdb'
mutatedPDB.append(inputFileMutated)
''' list of native pdbIds'''
inputFileNative = nativePDBDir+line.split()[2][0:4]+'.pdb'
nativePDB.append(inputFileNative)
chainIdMap = extractChainInfo(inputFileMutated)
if chainIdMap:
''' list of chainIds mapping to different pdbs'''
print "====="+inputFileMutated.split('/')[-1:][0]
chainInfo = chainIdMap[int(inputFileMutated.split('/')[-1:][0].split('.')[2])]
chainIdList.append(chainInfo)
''' map native PdbIds with corresponding chain'''
nativePdbMap = dict(zip(nativePDB,chainIdList))
''' map mutated PdbIds with corresponding chain'''
mutatedPdbMap = dict(zip(mutatedPDB,chainIdList))
return nativePdbMap,mutatedPdbMap
def main(arguments):
nativePdb = {}
mutatedPdb = {}
mappedSNPInfo = arguments["<mappedSNPInfo>"]
nativePDBDir = arguments["<nativePDBDir>"]
mutatedPDBDir = arguments["<mutPDBDir>"]
print mappedSNPInfo+'\t'+nativePDBDir+'\t'+mutatedPDBDir
### obtain native & mutated pdbId along with chain Info ####
nativePdb,mutatedPdb = returnPDBMap(mappedSNPInfo,arguments)
#### For each native Pdb run frustratometer and store the frustation files in native sub directory####
for key in nativePdb.keys():
print nativePdb[key]
calcFrustration(key,nativePdb[key],'native',arguments)
#### For each mutated Pdb run frustratometer and store the frustation files in mutated sub directory####
for key in mutatedPdb.keys():
calcFrustration(key,mutatedPdb[key],'mutated',arguments)
#### Extract out frustartion value along with other info ####
extractFIndexVal(arguments)
if __name__ == '__main__':
arguments = docopt(__doc__, version='extractFrustrationInfo 2.0')
main(arguments)