-
Notifications
You must be signed in to change notification settings - Fork 0
/
chespa.py
108 lines (100 loc) · 3.86 KB
/
chespa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import numpy as np
import pandas as pd
'''
equations are from this paper and supplementary:
Boulton, S., Akimoto, M., Selvaratnam, R., Bashiri, A., & Melacini, G. (2014).
A tool set to map allosteric networks through the NMR chemical shift covariance analysis.
Scientific Reports, 4, 7306.
'''
class Chespa:
'''
Chemical shift projection analysis
Useful in revealing false positive clustering
Takes the HSQC 2D chemical shift values from a reference state (ref),
agonist/activated state (B), and
an antagonist/reverse agonist state (A)
'''
def __init__(self,
file_path,
het_nuc='N'):
'''
The input csv file must have columns labeled {state}w1, {state}w2
for states ref, A, and B
file : str
path to csv file with columns refw1,refw2,Aw1,Aw2,.....
het_nuc : str
Heteronucleus. Either 'N' (nitrogen) or 'C' (carbon)
'''
# index col needs to be the same as the ccs data and labeled 'RESI'
try:
df = pd.read_csv(file_path)
if df.columns[0].upper() != 'RESI':
print('The index column should be named "RESI".')
else:
index_col = df.columns[0]
df = pd.read_csv(file_path, index_col=index_col)
# TODO - this isn't updating the column name
df.rename(columns={index_col:'RESI'},inplace=True)
except ValueError:
print('Make sure that your index column is named "RESI" in all caps.')
except FileNotFoundError:
print("Can't find that file.")
#print(df.head())
self.df = df
self.resis = df['RESI'].to_list()
self.het_nuc = het_nuc
if het_nuc == 'N':
self.het_coef = 0.2
if het_nuc == 'C':
self.het_coef = 0.25
# hetero nucleus shifts in column 0, H shifts in column 1
# TODO : add check to confirm hetero shifts are greater than H shifts
self.states = {
'ref': df[['refw1','refw2']].values,
'A': df[['Aw1','Aw2']].values,
'B': df[['Bw1','Bw2']].values
}
self.ref_to_B = self.get_state_distance('B')
self.ref_to_A = self.get_state_distance('A')
self.cos_theta = self.get_state_angle()
self.X = self.get_fractional_activation()
def get_state_distance(self, state):
'''
SI eq 5
magnitude of vector between ref and state
'''
# ref to proton
rtoH= (self.states['ref'][:,1]-self.states[state][:,1])**2
rtohet = (self.het_coef*(self.states['ref'][:,0]-self.states[state][:,0]))**2
return np.sqrt(rtoH+rtohet)
def get_state_angle(self):
'''
SI eq 6
'''
ref, A, B = list(self.states.values())
# normalized vectors from ref
vb = (B-ref)/np.linalg.norm((B-ref))
va = (A-ref)/np.linalg.norm((A-ref))
# row wise dot product of 2 n_residues X 2 arrays
dot = (va*vb).sum(axis=1)
# multiply magnitudes
denominator = (self.ref_to_A*self.ref_to_B)
cos_theta = dot/denominator
# fix divide by 0's
cos_theta = np.nan_to_num(cos_theta, nan=0)
return cos_theta
def get_fractional_activation(self):
'''
SI eq 7
Normalized projection of antagonist A onto agonist B
from text:
The fractional activation reveals whether a given perturbation
shifts the protein towards activation (X > 0) or inactivation (X < 0).
'''
ref, A, B = list(self.states.values())
# normalized vectors from ref
vb = (B-ref)/np.linalg.norm((B-ref))
va = (A-ref)/np.linalg.norm((A-ref))
# row wise dot product of 2 (n_residues X 2) arrays
dot = (va*vb).sum(axis=1)
return dot/((vb*vb).sum(axis=1))