-
Notifications
You must be signed in to change notification settings - Fork 1
/
pathologic_generation.py
68 lines (65 loc) · 2.76 KB
/
pathologic_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import sys
import os
from EC_Validation import *
class pathologic_generation:
def separator(self, data, start, stop):
MJ = data.iloc[:,start:stop]
MJ.columns = MJ.iloc[0,:]
MJ = MJ.drop([0], axis=0)
MJ["ID"].iloc[1]
return MJ
def Tie2_filler(self, MJ21_row, MJ5_row, nf, pathologic_headers, row_index):
if (MJ21_row['ID']!= '-' and MJ21_row['ID']!=' '):
#write ID and ACCESSION-2
nf.write('{0}\t{1}\n'.format(pathologic_headers[0], MJ21_row['ID']))
nf.write('{0}\t{1}\n'.format(pathologic_headers[1],MJ21_row['ID']))
#write NAME if exists
if MJ21_row['NAME'] != ' ':
nf.write('{0}\t{1}\n'.format(pathologic_headers[2], MJ21_row['NAME']))
else:
pass
#write SYNONYM
nf.write('{0}\t{1}\n'.format(pathologic_headers[3], MJ21_row['ID'][3:]))
#write SYNONYM from MJ2005 if exists
if MJ5_row['ID']!= '-':
nf.write('{0}\t{1}\n'.format(pathologic_headers[3], MJ5_row['ID']))
else:
pass
#write REPLICON
nf.write('{}\n'.format(pathologic_headers[4]))
#write STARTBASE & ENDBASE & Product type
nf.write('{}\t{}\n'.format(pathologic_headers[5], MJ21_row['start']))
nf.write('{}\t{}\n'.format(pathologic_headers[6], MJ21_row['end']))
nf.write('{}\t{}\n'.format(pathologic_headers[7], MJ21_row['Product Type']))
#write fuction
nf.write('{0}\t{1}\n'.format(pathologic_headers[8],MJ21_row['description']))
#write DBLINK
if MJ21_row['PMID']!= ' ':
nf.write('{0}\tPMID:{1}\n'.format(pathologic_headers[-1], MJ21_row['PMID']))
else:
pass
# write EC number if exists
if MJ5_row['EC']!= ' ':
# write more than one EC numbers if exists
MJ5_row['EC'] = MJ5_row['EC'].split('/')
for i in range(len(MJ5_row['EC'])):
ec = EC_Validation(MJ5_row['EC'][i])
# EC syntax checker
if ec.EC_check():
# some tranformation due some char errors
EC_current = str(MJ5_row['EC'][i].encode("utf-8"))
EC_current = EC_current[2:-1]
#write EC number
nf.write('{0}\t{1}\n'.format(pathologic_headers[9], EC_current ))
else:
# delete temporary Tier2 file
nf.close()
os.remove("./Tier2.pf")
# break - error message
sys.exit(f"Wrong syntax of EC number in row {row_index+3}")
else:
pass
nf.write('//\n')
else:
pass