-
Notifications
You must be signed in to change notification settings - Fork 0
/
PPM.py
233 lines (201 loc) · 13.6 KB
/
PPM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import copy
def PPM(input_code,branch_direction=True,input_name='ppm_outputfile'):
# Define amino acid atom information using the residue letters as the key and the atoms names and coordinates as values
# Input code is a generic FASTA sequence consisting of single letter amino acid codes, end the sequence with a * signifying the C-terminus
# the function takes the input and determines the atomic coordinates and atom connections via separate dictionaries
# ^ specifies where to start a separate branched sequence * specifies where to end the branch
# So, K^AAG*KK* sequence consists of three lysienes, with the first lysine having the sequence GAA attached to its sidechain via a isopeptide bond.
amino_acid_data = {
'A':[('N', (0.000,0.000,0.000)),
('CA', (1.432, -0.444, 0.000)),
('CB', (1.432, -1.872, 0.000)),
('O', (2.377, 1.27, 0.000)),
('C', (2.377, 0.000, 0.000))],
'K':[('N',(0.000,0.000,0.000)),
('CA',(1.432, -0.444, 0.000)),
('CB',(1.432, -1.872, 0.000)), # side chain coords. = 1.54 C-C bond length and 109.5 angle
('CG',(2.321, -3.129, 0.000)),
('CD',(1.432, -4.387, 0.000)),
('CE',(2.321, -5.645, 0.000)),
('NZ',(1.432, -6.903, 0.000)),
('O', (2.377, 1.27, 0.000)),
('C', (2.377, 0.000, 0.000))],
'G':[('N', (0.000,0.000,0.000)),
('CA', (1.432, -0.444, 0.000)),
('O', (2.377, 1.27, 0.000)),
('C', (2.377, 0.000, 0.000))],
'*': [('OXT',(1.165,-0.665,0.000)),] # this is the C-terminus OH group
# Need to add more amino acids as needed, also need to specify coordinates i.e. start at zero,
# bond length data fro https://arxiv.org/pdf/0804.2488.pdf
}
# need to add C and N termini
# labeled starting at zero, so the atom index+ conect # will equal the correct
conect_record = {'A': [ # atoms indecies that the current atom index is connected to these data include replicates i.e. #1 conect 2,3 and #2 conect 1,4
('N', (1,2)),
('CA', (2,1,5,3)),
('C', (5,2,4,6)), # hmm need to specify double bond, is also connected to previous residue 'N'
('O', (4,5)),
('CB', (3,2)),],
'K':[('N',(1,2)),
('CA',(2,1,5,3)),
('CB',(3,2,4)),
('CG',(4,3,5)),
('CD',(5,4,6)),
('CE',(6,5,7)),
('NZ',(7,6)),
('O',(8,9)),
('C',(9,8,10))],
'*':[('OXT',(1,0))],
'G':[('N', (1,2)),
('CA', (2,1,4)),
('O', (3,4)),
('C', (4,2,3))],
# Need to add more amino acids as needed, also need to specify coordinates i.e. start at zero,
}
residue_name_record = {'A':'ALA A','K':'LYS K','G':'GLY G'} # this dictionary has the 3-letter code for each single letter amino acid
################################################################################################################################################
# Create a PDB file
output_filename = str(input_name+'.pdb')#'linear_amino_acids.pdb'
with open(output_filename, 'w') as pdb_file:
horizontal_shift = 0.0 # Initialize horizontal shift
vertical_shift = 0.0
index_value = int() # initialize individual atom index value (1,2,3,4,5...ect.)
residue_index = 1 # index value for each whole amino acid: e.g. atoms 1,2,3,4 belongs to aminoacid #1 etc.
conect_output = [] # this will store all of the conect records t be printed
conect_count = int() # this will be added to the next residue so the conect_record atoms are numbered properly
atom_count = int() # this counts the atoms to add to the conect_count
#total_residue_angle_list = [] # this will store all of the angle data from each residue
skip_index = -1
skip_flag = False
chain_repeat = 0 # this keeps track of how many branches we have
for amino_acid in input_code:
skip_index += 1
if amino_acid == '^':
skip_flag = True
#need to skip the residues that are added in the branched chain
#chain_repeat += 1
side_chain_start_index = input_code.index(amino_acid,skip_index) # gets the position of the ^, starting at the current position
side_chain_shift = int() # incriment for moving the branched side chain
side_chain_skip_start = side_chain_start_index # skip all of the atoms between ^ and * for the master loop
side_chain_skip_end = input_code.index('*',side_chain_start_index) # ends at the next * this way multiple branches can be made
side_chain = input_code[side_chain_start_index+1:input_code.find('*',side_chain_start_index)+1] # create new str of just the sidechain
#print('side chain:',side_chain)
# here I refer to the branched amino acid sequence as a side chain. I am sorry for doing this
previous_residue_coord = input_code[side_chain_start_index-1] # find the previous residue to locate its sidechain
previous_residue_coord_data = amino_acid_data.get(previous_residue_coord) # obtain previous residue coord data
previous_residue_last_side_chain = previous_residue_coord_data[-3][1] # this is the final sidechain atom
#sc = side_chain = branched sequence
sc_starting_x = previous_residue_last_side_chain[0]#-3.747 # starting x
#print(sc_starting_x)
sc_starting_y = previous_residue_last_side_chain[1]-1.37 # starting y - amide bond length
#print(sc_starting_y)
#################### reverse direction success ########################################
if branch_direction:
side_chain = side_chain[::-1] # reverse the sequence order, with C-term starting first
#print('Branched side chain:',side_chain)
for residue in side_chain:
amino_acid_data_entry = amino_acid_data.get(residue)
side_chain_entry = conect_record.get(residue)
side_chain_name = residue_name_record.get(residue)
#amino_data_copy = cpy
if amino_acid_data_entry[0][0] != 'OXT':
cpy = copy.copy(amino_acid_data_entry) # copy the coord data
cpy[0],cpy[-1],cpy[-2] = ('N', (cpy[-1][1][0],cpy[0][1][2],cpy[0][1][2])),('C', (cpy[0][1][0],cpy[-1][1][1],cpy[-1][1][2])),('O',(cpy[0][1][0],cpy[-2][1][1],cpy[-2][1][2]))
# we flipped the x-coords. now we start with the C-term first. fyi: there mightve been a better way to do this.
for atom_index, (atom_name, atom_coods) in enumerate(cpy, start=1):
#print('Wrote branched',atom_name,'to file')
pdb_file.write(f'ATOM {atom_index+index_value:5d} {atom_name:<3} {side_chain_name}{residue_index:>4d} '
f'{atom_coods[1]+horizontal_shift-sc_starting_x:8.3f}{-1*atom_coods[0]-side_chain_shift+sc_starting_y:8.3f}{atom_coods[2]:8.3f}'
f' 1.00 62.66 {atom_name[0]}\n')
else:
for atom_index, (atom_name, atom_coods) in enumerate(amino_acid_data_entry, start=1):
#print('Wrote branched',atom_name,'to file')
pdb_file.write(f'ATOM {atom_index+index_value:5d} {atom_name:<3} {previous_residue_name}{residue_index:>4d} '
f'{atom_coods[1]+horizontal_shift-sc_starting_x:8.3f}{-1*atom_coods[0]-side_chain_shift+sc_starting_y+3.747-2*1.165:8.3f}{atom_coods[2]:8.3f}'
f' 1.00 62.66 {atom_name[0]}\n')
side_chain_shift -= 2.587 # residue length - amide bond
atom_count += 1
for atom_index_conect, (atom_name_conect, atom_conect) in enumerate(conect_data, start=1): #iterate through the dictionary, create pairs
# append conect records to a master list, it does not matter the order, as long as they are correct
conect_output.append([i + conect_count for i in atom_conect])
side_chain_shift += 3.747
index_value += atom_index
residue_index += 1
conect_count = atom_count
previous_residue_name = side_chain_name
####################################################################################################
##################################END REVERSE SIDE CHAIN############################################
####################################################################################################
else:
for residue in side_chain:
amino_acid_data_entry = amino_acid_data.get(residue)
side_chain_entry = conect_record.get(residue)
side_chain_name = residue_name_record.get(residue)
for atom_index, (atom_name, atom_coods) in enumerate(amino_acid_data_entry, start=1): # hahah atom coods
#new_coords = atom_coods
if atom_name != 'OXT':
pdb_file.write(f'ATOM {atom_index+index_value:5d} {atom_name:<3} {side_chain_name}{residue_index:>4d} '
f'{atom_coods[1]+horizontal_shift-sc_starting_x:8.3f}{-1*atom_coods[0]-side_chain_shift+sc_starting_y:8.3f}{atom_coods[2]:8.3f}'
f' 1.00 62.66 {atom_name[0]}\n')
else:
pdb_file.write(f'ATOM {atom_index+index_value:5d} {atom_name:<3} {previous_residue_name}{residue_index:>4d} '
f'{atom_coods[1]+horizontal_shift-sc_starting_x:8.3f}{-1*atom_coods[0]-side_chain_shift+sc_starting_y+3.747-2*1.165:8.3f}{atom_coods[2]:8.3f}'
f' 1.00 62.66 {atom_name[0]}\n')
atom_count += 1
for atom_index_conect, (atom_name_conect, atom_conect) in enumerate(conect_data, start=1): #iterate through the dictionary, create pairs
# append conect records to a master list, it does not matter the order, as long as they are correct
conect_output.append([i + conect_count for i in atom_conect])
side_chain_shift += 3.747
index_value += atom_index
residue_index += 1
conect_count = atom_count
previous_residue_name = side_chain_name
############################################################
############################################################
#continue on to the rest of the input main chain############
############################################################
############################################################
if skip_flag is True:
if side_chain_skip_start <= skip_index <= side_chain_skip_end:
#print('Skip loop:',skip_index)
continue
skip_flag = False # reset the skip flag so we can redo another branch
amino_acid_data_entry = amino_acid_data.get(amino_acid) # .get each dictionary data from letter code input
conect_data = conect_record.get(amino_acid)
residue_name = residue_name_record.get(amino_acid)
if amino_acid_data_entry is None:
print(f"Amino acid code '{amino_acid}' not found. Skipped.")
continue
#print('for_check1')
# Write atom information to the PDB file with a horizontal shift
for atom_index, (atom_name, atom_coords) in enumerate(amino_acid_data_entry, start=1): #iterate through the dictionary, create pairs
if atom_name != 'OXT':
pdb_file.write(f'ATOM {atom_index+index_value:5d} {atom_name:<3} {residue_name}{residue_index:>4d} '
f'{atom_coords[0]+ horizontal_shift:8.3f}{atom_coords[1]:8.3f}{atom_coords[2]:8.3f}'
f' 1.00 62.66 {atom_name[0]}\n')
else:
pdb_file.write(f'ATOM {atom_index+index_value:5d} {atom_name:<3} {previous_residue_name}{residue_index-1:>4d} '
f'{atom_coords[0]+ (horizontal_shift-3.747+2*1.165):8.3f}{atom_coords[1]:8.3f}{atom_coords[2]:8.3f}'
f' 1.00 62.66 {atom_name[0]}\n')
# conect time
atom_count += 1
for atom_index_conect, (atom_name_conect, atom_conect) in enumerate(conect_data, start=1): #iterate through the dictionary, create pairs
# append conect records to a master list, it does not matter the order, as long as they are correct
conect_output.append([i + conect_count for i in atom_conect])
horizontal_shift += 3.747 # Move by given Angstrom 'horizontally' one residue length+amide bond distance
#vertical_shift += 0.5
index_value += atom_index
residue_index += 1
conect_count = atom_count
previous_residue_name = residue_name
#print('for_check2')
# write all of the conect records to the remainder of the output file
for record in conect_output:
pdb_file.write('CONECT '+' '.join(map(str,record))+'\n') # need to print each value in the subset list
############################################################
#end the master loop
############################################################
print(f'PDB file generated: {output_filename}')
print(input_code)
#print(atom_coords)
#print(atom_name)