Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: covalent bonds cannot link nodes on separate branches #408

Merged
merged 10 commits into from
Mar 28, 2023
4 changes: 2 additions & 2 deletions deeprankcore/domain/edgestorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@

## interactions
COVALENT = "covalent" # bool; former FEATURENAME_COVALENT
ELECTROSTATIC = "electrostatic" # float; former FEATURENAME_EDGECOULOMB
VANDERWAALS = "vanderwaals" # float; former FEATURENAME_EDGEVANDERWAALS
ELEC = "electrostatic" # float; former FEATURENAME_EDGECOULOMB
VDW = "vanderwaals" # float; former FEATURENAME_EDGEVANDERWAALS
67 changes: 36 additions & 31 deletions deeprankcore/features/contact.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@

_log = logging.getLogger(__name__)

# cutoff distances for 1-3 and 1-4 pairing. See issue: https://github.com/DeepRank/deeprank-core/issues/357#issuecomment-1461813723
# for cutoff distances, see: https://github.com/DeepRank/deeprank-core/issues/357#issuecomment-1461813723
covalent_cutoff = 2.1
cutoff_13 = 3.6
cutoff_14 = 4.2


def _get_nonbonded_energy(atoms: List[Atom], distances: npt.NDArray[np.float64]) -> Tuple [npt.NDArray[np.float64], npt.NDArray[np.float64]]:
def _get_nonbonded_energy( #pylint: disable=too-many-locals
atoms: List[Atom],
distances: npt.NDArray[np.float64],
) -> Tuple [npt.NDArray[np.float64], npt.NDArray[np.float64]]:
"""Calculates all pairwise electrostatic (Coulomb) and Van der Waals (Lennard Jones) potential energies between all atoms in the structure.

Warning: there's no distance cutoff here. The radius of influence is assumed to infinite.
Expand All @@ -41,32 +43,36 @@ def _get_nonbonded_energy(atoms: List[Atom], distances: npt.NDArray[np.float64])
EPSILON0 = 1.0
COULOMB_CONSTANT = 332.0636
charges = [atomic_forcefield.get_charge(atom) for atom in atoms]
electrostatic_energy = np.expand_dims(charges, axis=1) * np.expand_dims(charges, axis=0) * COULOMB_CONSTANT / (EPSILON0 * distances)
# remove for close contacts
electrostatic_energy[distances < cutoff_13] = 0

E_elec = np.expand_dims(charges, axis=1) * np.expand_dims(charges, axis=0) * COULOMB_CONSTANT / (EPSILON0 * distances)

# VAN DER WAALS POTENTIAL
# calculate main vdw energies
sigmas = [atomic_forcefield.get_vanderwaals_parameters(atom).sigma_main for atom in atoms]
epsilons = [atomic_forcefield.get_vanderwaals_parameters(atom).epsilon_main for atom in atoms]
mean_sigmas = 0.5 * np.add.outer(sigmas,sigmas)
geomean_eps = np.sqrt(np.multiply.outer(epsilons,epsilons)) # sqrt(eps1*eps2)
vdw_energy = 4.0 * geomean_eps * ((mean_sigmas / distances) ** 12 - (mean_sigmas / distances) ** 6)
E_vdw = 4.0 * geomean_eps * ((mean_sigmas / distances) ** 12 - (mean_sigmas / distances) ** 6)

# calculate energies for 1-4 pairs
# calculate vdw energies for 1-4 pairs
sigmas = [atomic_forcefield.get_vanderwaals_parameters(atom).sigma_14 for atom in atoms]
epsilons = [atomic_forcefield.get_vanderwaals_parameters(atom).epsilon_14 for atom in atoms]
mean_sigmas = 0.5 * np.add.outer(sigmas,sigmas)
geomean_eps = np.sqrt(np.multiply.outer(epsilons,epsilons)) # sqrt(eps1*eps2)
energy_14pairs = 4.0 * geomean_eps * ((mean_sigmas / distances) ** 12 - (mean_sigmas / distances) ** 6)
E_vdw_14pairs = 4.0 * geomean_eps * ((mean_sigmas / distances) ** 12 - (mean_sigmas / distances) ** 6)


# adjust vdw energy for close contacts
vdw_energy[distances < cutoff_14] = energy_14pairs[distances < cutoff_14]
vdw_energy[distances < cutoff_13] = 0
# Fix energies for close contacts on same chain
chains = [atom.residue.chain.id for atom in atoms]
chain_matrix = [[chain_1==chain_2 for chain_2 in chains] for chain_1 in chains]
pair_14 = np.logical_and(distances < cutoff_14, chain_matrix)
pair_13 = np.logical_and(distances < cutoff_13, chain_matrix)

E_vdw[pair_14] = E_vdw_14pairs[pair_14]
E_vdw[pair_13] = 0
E_elec[pair_13] = 0


return electrostatic_energy, vdw_energy
return E_elec, E_vdw


def add_features( # pylint: disable=unused-argument, too-many-locals
Expand Down Expand Up @@ -100,31 +106,30 @@ def add_features( # pylint: disable=unused-argument, too-many-locals
interatomic_distances = distance_matrix(positions, positions)
interatomic_electrostatic_energy, interatomic_vanderwaals_energy = _get_nonbonded_energy(all_atoms, interatomic_distances)


# assign features
if isinstance(graph.edges[0].id, AtomicContact):
for edge in graph.edges:
for edge in graph.edges:
contact = edge.id

if isinstance(contact, AtomicContact):
## find the indices
contact = edge.id
atom1_index = atom_dict[contact.atom1]
atom2_index = atom_dict[contact.atom2]
## set features
edge.features[Efeat.SAMERES] = float(contact.atom1.residue == contact.atom2.residue) # 1.0 for True; 0.0 for False
edge.features[Efeat.SAMECHAIN] = float(contact.atom1.residue.chain == contact.atom1.residue.chain) # 1.0 for True; 0.0 for False
edge.features[Efeat.SAMERES] = float(contact.atom1.residue == contact.atom2.residue)
edge.features[Efeat.SAMECHAIN] = float(contact.atom1.residue.chain == contact.atom1.residue.chain)
edge.features[Efeat.DISTANCE] = interatomic_distances[atom1_index, atom2_index]
edge.features[Efeat.COVALENT] = float(edge.features[Efeat.DISTANCE] < covalent_cutoff) # 1.0 for True; 0.0 for False
edge.features[Efeat.ELECTROSTATIC] = interatomic_electrostatic_energy[atom1_index, atom2_index]
edge.features[Efeat.VANDERWAALS] = interatomic_vanderwaals_energy[atom1_index, atom2_index]

elif isinstance(contact, ResidueContact):
for edge in graph.edges:
edge.features[Efeat.ELEC] = interatomic_electrostatic_energy[atom1_index, atom2_index]
edge.features[Efeat.VDW] = interatomic_vanderwaals_energy[atom1_index, atom2_index]

elif isinstance(contact, ResidueContact):
## find the indices
contact = edge.id
atom1_indices = [atom_dict[atom] for atom in contact.residue1.atoms]
atom2_indices = [atom_dict[atom] for atom in contact.residue2.atoms]
## set features
edge.features[Efeat.SAMECHAIN] = float(contact.residue1.chain == contact.residue2.chain) # 1.0 for True; 0.0 for False
edge.features[Efeat.SAMECHAIN] = float(contact.residue1.chain == contact.residue2.chain)
edge.features[Efeat.DISTANCE] = np.min([[interatomic_distances[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
edge.features[Efeat.COVALENT] = float(edge.features[Efeat.DISTANCE] < covalent_cutoff) # 1.0 for True; 0.0 for False
edge.features[Efeat.ELECTROSTATIC] = np.sum([[interatomic_electrostatic_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
edge.features[Efeat.VANDERWAALS] = np.sum([[interatomic_vanderwaals_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
edge.features[Efeat.ELEC] = np.sum([[interatomic_electrostatic_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
edge.features[Efeat.VDW] = np.sum([[interatomic_vanderwaals_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])

# Calculate irrespective of node type
edge.features[Efeat.COVALENT] = float(edge.features[Efeat.DISTANCE] < covalent_cutoff and edge.features[Efeat.SAMECHAIN])
8 changes: 7 additions & 1 deletion tests/data/hdf5/_generate_testdata.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
"source": [
"import warnings\n",
"from Bio import BiopythonWarning\n",
"from deeprankcore.utils.grid import GridSettings, MapMethod\n",
"\n",
"with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\", BiopythonWarning)\n",
" warnings.simplefilter(\"ignore\", RuntimeWarning)\n",
Expand Down Expand Up @@ -75,7 +77,11 @@
" ))\n",
"\n",
" # Generate graphs and save them in hdf5 files\n",
" output_paths = queries.process(cpu_count=1, prefix='1ATN_ppi')"
" output_paths = queries.process(cpu_count=1,\n",
" prefix='1ATN_ppi',\n",
" grid_settings=GridSettings([20, 20, 20], [20.0, 20.0, 20.0]),\n",
" grid_map_method=MapMethod.GAUSSIAN,\n",
" )"
]
},
{
Expand Down
89 changes: 62 additions & 27 deletions tests/features/test_contact.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from typing import Tuple
from uuid import uuid4

import numpy as np
from pdb2sql import pdb2sql

from deeprankcore.domain import edgestorage as Efeat
from deeprankcore.features.contact import add_features
from deeprankcore.features.contact import (add_features, covalent_cutoff,
cutoff_13, cutoff_14)
from deeprankcore.molstruct.atom import Atom
from deeprankcore.molstruct.pair import AtomicContact, ResidueContact
from deeprankcore.molstruct.structure import Chain
Expand All @@ -31,11 +33,12 @@ def _wrap_in_graph(edge: Edge):

def _get_contact( # pylint: disable=too-many-arguments
pdb_id: str,
residue_num1: int,
atom_name1: str,
residue_num2: int,
atom_name2: str,
residue_num1: int,
atom_name1: str,
residue_num2: int,
atom_name2: str,
residue_level: bool = False,
chains: Tuple[str,str] = None,
) -> Edge:

pdb_path = f"tests/data/pdb/{pdb_id}/{pdb_id}.pdb"
Expand All @@ -46,22 +49,27 @@ def _get_contact( # pylint: disable=too-many-arguments
finally:
pdb._close() # pylint: disable=protected-access

if not chains:
chains = [structure.chains[0], structure.chains[0]]
else:
chains = [structure.get_chain(chain) for chain in chains]

if not residue_level:
contact = AtomicContact(
_get_atom(structure.chains[0], residue_num1, atom_name1),
_get_atom(structure.chains[0], residue_num2, atom_name2)
_get_atom(chains[0], residue_num1, atom_name1),
_get_atom(chains[1], residue_num2, atom_name2)
)
else:
contact = ResidueContact(
structure.chains[0].residues[residue_num1],
structure.chains[0].residues[residue_num2]
chains[0].residues[residue_num1],
chains[1].residues[residue_num2]
)

edge_obj = Edge(contact)
add_features(pdb_path, _wrap_in_graph(edge_obj))

assert not np.isnan(edge_obj.features[Efeat.VANDERWAALS]), 'isnan vdw'
assert not np.isnan(edge_obj.features[Efeat.ELECTROSTATIC]), 'isnan electrostatic'
assert not np.isnan(edge_obj.features[Efeat.VDW]), 'isnan vdw'
assert not np.isnan(edge_obj.features[Efeat.ELEC]), 'isnan electrostatic'
assert not np.isnan(edge_obj.features[Efeat.DISTANCE]), 'isnan distance'
assert not np.isnan(edge_obj.features[Efeat.SAMECHAIN]), 'isnan samechain'
assert not np.isnan(edge_obj.features[Efeat.COVALENT]), 'isnan covalent'
Expand All @@ -76,8 +84,9 @@ def test_covalent_pair():
"""

edge_covalent = _get_contact('101M', 0, "N", 0, "CA")
assert edge_covalent.features[Efeat.VANDERWAALS] == 0.0, 'nonzero vdw energy for covalent pair'
assert edge_covalent.features[Efeat.ELECTROSTATIC] == 0.0, 'nonzero electrostatic energy for covalent pair'
assert edge_covalent.features[Efeat.DISTANCE] < covalent_cutoff
assert edge_covalent.features[Efeat.VDW] == 0.0, 'nonzero vdw energy for covalent pair'
assert edge_covalent.features[Efeat.ELEC] == 0.0, 'nonzero electrostatic energy for covalent pair'
assert edge_covalent.features[Efeat.COVALENT] == 1.0, 'covalent pair not recognized as covalent'


Expand All @@ -86,28 +95,54 @@ def test_13_pair():
"""

edge_13 = _get_contact('101M', 0, "N", 0, "CB")
assert edge_13.features[Efeat.VANDERWAALS] == 0.0, 'nonzero vdw energy for 1-3 pair'
assert edge_13.features[Efeat.ELECTROSTATIC] == 0.0, 'nonzero electrostatic energy for 1-3 pair'
assert edge_13.features[Efeat.DISTANCE] < cutoff_13
assert edge_13.features[Efeat.VDW] == 0.0, 'nonzero vdw energy for 1-3 pair'
assert edge_13.features[Efeat.ELEC] == 0.0, 'nonzero electrostatic energy for 1-3 pair'
assert edge_13.features[Efeat.COVALENT] == 0.0, '1-3 pair recognized as covalent'


def test_very_close_opposing_chains():
"""ChainA THR 118 O - ChainB ARG 30 NH1 (3.55 A). Should have non-zero energy despite close contact, because opposing chains.
"""

opposing_edge = _get_contact('1A0Z', 118, "O", 30, "NH1", chains=('A', 'B'))
assert opposing_edge.features[Efeat.DISTANCE] < cutoff_13
assert opposing_edge.features[Efeat.ELEC] != 0.0
assert opposing_edge.features[Efeat.VDW] != 0.0


def test_14_pair():
"""MET 0: N - CG, 1-4 pair (at 4.12 A distance). Should have non-zero electrostatic energy and small non-zero vdw energy.
"""

edge_14 = _get_contact('101M', 0, "CA", 0, "SD")
assert edge_14.features[Efeat.VANDERWAALS] != 0.0, '1-4 pair with 0 vdw energy'
assert abs(edge_14.features[Efeat.VANDERWAALS]) < 0.1, '1-4 pair with large vdw energy'
assert edge_14.features[Efeat.ELECTROSTATIC] != 0.0, '1-4 pair with 0 electrostatic'
assert edge_14.features[Efeat.DISTANCE] > cutoff_13
assert edge_14.features[Efeat.DISTANCE] < cutoff_14
assert edge_14.features[Efeat.VDW] != 0.0, '1-4 pair with 0 vdw energy'
assert abs(edge_14.features[Efeat.VDW]) < 0.1, '1-4 pair with large vdw energy'
assert edge_14.features[Efeat.ELEC] != 0.0, '1-4 pair with 0 electrostatic'
assert edge_14.features[Efeat.COVALENT] == 0.0, '1-4 pair recognized as covalent'


def test_14dist_opposing_chains():
"""ChainA PRO 114 CA - ChainB HIS 116 CD2 (3.62 A). Should have non-zero energy despite close contact, because opposing chains.
E_vdw for this pair if they were on the same chain: 0.018
E_vdw for this pair on opposing chains: 0.146
"""

opposing_edge = _get_contact('1A0Z', 114, "CA", 116, "CD2", chains=('A', 'B'))
assert opposing_edge.features[Efeat.DISTANCE] > cutoff_13
assert opposing_edge.features[Efeat.DISTANCE] < cutoff_14
assert opposing_edge.features[Efeat.ELEC] > 1.0, f'electrostatic: {opposing_edge.features[Efeat.ELEC]}'
assert opposing_edge.features[Efeat.VDW] > 0.1, f'vdw: {opposing_edge.features[Efeat.VDW]}'


def test_vanderwaals_negative():
"""MET 0 N - ASP 27 CB, very far (29.54 A). Should have negative vanderwaals energy.
"""

edge_far = _get_contact('101M', 0, "N", 27, "CB")
assert edge_far.features[Efeat.VANDERWAALS] < 0.0
assert edge_far.features[Efeat.VDW] < 0.0


def test_vanderwaals_morenegative():
Expand All @@ -116,7 +151,7 @@ def test_vanderwaals_morenegative():

edge_intermediate = _get_contact('101M', 0, "N", 138, "CG")
edge_far = _get_contact('101M', 0, "N", 27, "CB")
assert edge_intermediate.features[Efeat.VANDERWAALS] < edge_far.features[Efeat.VANDERWAALS]
assert edge_intermediate.features[Efeat.VDW] < edge_far.features[Efeat.VDW]


def test_edge_distance():
Expand All @@ -142,7 +177,7 @@ def test_attractive_electrostatic_close():
"""

close_attracting_edge = _get_contact('101M', 139, "CZ", 136, "OE2")
assert close_attracting_edge.features[Efeat.ELECTROSTATIC] < 0.0
assert close_attracting_edge.features[Efeat.ELEC] < 0.0


def test_attractive_electrostatic_far():
Expand All @@ -152,11 +187,11 @@ def test_attractive_electrostatic_far():
far_attracting_edge = _get_contact('101M', 139, "CZ", 20, "OD2")
close_attracting_edge = _get_contact('101M', 139, "CZ", 136, "OE2")
assert (
far_attracting_edge.features[Efeat.ELECTROSTATIC] < 0.0
far_attracting_edge.features[Efeat.ELEC] < 0.0
), 'far electrostatic > 0'
assert (
far_attracting_edge.features[Efeat.ELECTROSTATIC]
> close_attracting_edge.features[Efeat.ELECTROSTATIC]
far_attracting_edge.features[Efeat.ELEC]
> close_attracting_edge.features[Efeat.ELEC]
), 'far electrostatic <= close electrostatic'


Expand All @@ -165,7 +200,7 @@ def test_repulsive_electrostatic():
"""

opposing_edge = _get_contact('101M', 109, "OE2", 105, "OE1")
assert opposing_edge.features[Efeat.ELECTROSTATIC] > 0.0
assert opposing_edge.features[Efeat.ELEC] > 0.0


def test_residue_contact():
Expand All @@ -175,6 +210,6 @@ def test_residue_contact():
res_edge = _get_contact('101M', 0, '', 1, '', residue_level = True)
assert res_edge.features[Efeat.DISTANCE] > 0.0, 'distance <= 0'
assert res_edge.features[Efeat.DISTANCE] < 1e5, 'distance > 1e5'
assert res_edge.features[Efeat.ELECTROSTATIC] != 0.0, 'electrostatic == 0'
assert res_edge.features[Efeat.VANDERWAALS] != 0.0, 'vanderwaals == 0'
assert res_edge.features[Efeat.ELEC] != 0.0, 'electrostatic == 0'
assert res_edge.features[Efeat.VDW] != 0.0, 'vanderwaals == 0'
assert res_edge.features[Efeat.COVALENT] == 1.0, 'neighboring residues not seen as covalent'
6 changes: 3 additions & 3 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_dataset_collates_entry_names(self):
edge_features=[Efeat.DISTANCE],
target=targets.IRMSD)),
("GridDataset", GridDataset(self.hdf5_path,
features=[Efeat.VANDERWAALS],
features=[Efeat.VDW],
target=targets.IRMSD))]:

entry_names = []
Expand Down Expand Up @@ -66,7 +66,7 @@ def test_dataset_dataframe_size(self):
def test_grid_dataset_regression(self):
dataset = GridDataset(
hdf5_path=self.hdf5_path,
features=[Efeat.VANDERWAALS, Efeat.ELECTROSTATIC],
features=[Efeat.VDW, Efeat.ELEC],
target=targets.IRMSD
)

Expand All @@ -81,7 +81,7 @@ def test_grid_dataset_regression(self):
def test_grid_dataset_classification(self):
dataset = GridDataset(
hdf5_path=self.hdf5_path,
features=[Efeat.VANDERWAALS, Efeat.ELECTROSTATIC],
features=[Efeat.VDW, Efeat.ELEC],
target=targets.BINARY
)

Expand Down
Loading