-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathobjective_function.py
139 lines (109 loc) · 4.73 KB
/
objective_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np
import fibonacci_sphere as fibo
def hydrophobic_factor(df):
'''
Calculate the hydrophobic factor of a slice of 1A.
The hydrophobic factor of the objective function is
the relative hydrophobic membrane-exposedsurface area
(hydrophobic area divided by all surface area).
'''
hydrophobic = ['PHE', 'GLY', 'ILE', 'LEU', 'MET', 'TRP', 'VAL', 'TYR']
hydrophilic = ['ALA', 'CYS', 'ASP', 'GLU', 'HIS', 'LYS', 'ASN', 'PRO', 'GLN', 'ARG', 'SER', 'THR']
# create separate df for hydrophobic and hydrophilic residues
df_hydrophobic = df[df['residue_name'].isin(hydrophobic)]
df_hydrophilic = df[df['residue_name'].isin(hydrophilic)]
sum_acc_hydrophobic = sum(df_hydrophobic['acc'])
sum_acc_hydrophilic = sum(df_hydrophilic['acc'])
factor = sum_acc_hydrophobic / (sum_acc_hydrophilic + sum_acc_hydrophobic)
return factor
def monotonic(x):
"""
check list monotonicity and return a boolean value
"""
dx = np.diff(x)
return np.all(dx <= 0) or np.all(dx >= 0)
def add_is_straight(df):
"""
Check if the residues are straight and add this information in dataframe
"The i-th residue in a protein chain is part of a straight triplet if the projection of
the Cα atoms of the previous third (i − 3) residue, itself (i)
and the next third (i +3) residue onto a predefined vector (the
normal vector of membrane planes, see below) are in a monotone decreasing
or increasing order."
input:
- df: pdb dataframe of Calpha
"""
df['straight'] = False
for i, row in df.iterrows():
if_straight = False
if i > 3 and i < (len(df) - 4):
# the projection of triplets around i in form of list
projection_list = list(df.projection)[(i - 3):(i + 3)]
if_straight = monotonic(projection_list)
df.at[i, 'straight'] = if_straight
return (df)
def straightness_factor(df_slice):
'''
calculate the straightness factor and return
"The straightness factor is defined as
the relative frequency of ‘straight’ residues in a given protein slice."
input:
- df_slice: pdb dataframe of Calpha with slice column (result returned by add_is_straight)
'''
# number of straight residues (column 'straight' is True)
number_straight = df_slice['straight'].values.sum()
return number_straight / len(df_slice)
def turn_factor(df_slice):
"""
The turn factor is defined as one minus the relative frequency of ‘turn’ residues in a given slice.
Turn triplets have a similar definition as the straight triplets: the i-th residue in a protein
chain is the center of a turn if the projection of the Cα atoms of the previous third (i − 3) residue,
itself (i) and the next third (i + 3) residue onto the predefined vector
are not in a monotone decreasing or increasing order.
- df_slice: pdb dataframe of Calpha with slice column (result returned by add_is_straight)
"""
number_turn = (~df_slice['straight'].values.sum())
return 1 - (number_turn / len(df_slice))
'''
def end_chain(df_slice):
"""
The end-chain factor is one minus the relative frequency of
chain end residues in a given slice
"""
# 1 - freq(ter res in slice )
'''
def calculate_Qvalue(df_slice):
"""
calculate the Q-value of a PDB dataframe
- df_slice: pdb dataframe of Calpha with slice column (result returned by add_is_straight)
"""
qvalue = hydrophobic_factor(df_slice) * straightness_factor(df_slice) * turn_factor(df_slice)
return qvalue
def get_best_vector(df, com, fibo_sphere):
"""
return the best Q value and the best normal vector of a protein
input:
- df: pdb dataframe
- com: center of mass
- fibo_sphere: a list of 3D vectors returned by fibo.fibonacci_sphere function
"""
best_vector = fibo_sphere[0]
best_Qvalue = 0
for vector in fibo_sphere:
# add the slice number of each residue
df_current_vector = fibo.slicing(df, vector, com)
# check if the residues are straight
add_is_straight(df_current_vector)
# calculate the Q-value of each slice of a vector
df_grouped = df_current_vector.groupby(['slice'])
list_qvalue_1a = list() # list of Q-value of each slice
for name, group in df_grouped:
qvalue_1a = calculate_Qvalue(group)
list_qvalue_1a.append(qvalue_1a)
# calculate the Q-value of a 22A window of a vector
for i in range(len(list_qvalue_1a)-22):
qvalue_22a = sum(list_qvalue_1a[i:(i+22)])
if qvalue_22a > best_Qvalue: # if find a better Qvalue
best_Qvalue = qvalue_22a
best_vector = vector
return [best_Qvalue, best_vector]