Skip to content

Commit

Permalink
Merge pull request #2 from RadostW/pdb-loader
Browse files Browse the repository at this point in the history
Loading pdb files from online resources
  • Loading branch information
RadostW authored Jan 14, 2024
2 parents 31bacba + 0c529bf commit 908a4e9
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 2 deletions.
3 changes: 2 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ sphinxcontrib-serializinghtml==1.1.5

numpy>=1.20.3
scipy>=1.5.4
MDAnalysis>=2.7.0

jax>=0.2.13
#jaxlib
jaxlib

docutils>=0.16

Expand Down
4 changes: 3 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@
sys.path.insert(0, _pysrc)
# Now we can import local modules.
import pygrpy
import pygrpy.pdb_loader
import pygrpy.jax_grpy_tensors


# -- Project dependencies import ---------------------------------------------

# Import what you need for the documented package to work
import numpy
import scipy

import MDAnalysis

# -- Project information -----------------------------------------------------

Expand Down
25 changes: 25 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ Package contents
.. automodule:: pygrpy.grpy_tensors
:members:

.. automodule:: pygrpy.pdb_loader
:members:

Experimental features -- jax support
''''''''''''''''''''''''''''''''''''
.. automodule:: pygrpy.jax_grpy_tensors
Expand Down Expand Up @@ -70,6 +73,28 @@ Example use
if __name__ == "__main__":
test_hydrosize()

.. prompt:: python >>> auto

# Copyright (C) Radost Waszkiewicz 2024
# This software is distributed under MIT license
# Load shape of Lysozyme-C from different databases. Compare hydrodynamic size

import pygrpy.pdb_loader
import pygrpy.grpy

pdb_content = pygrpy.pdb_loader.get_pdb_from_alphafold("P61626")
coordinates, radii = pygrpy.pdb_loader.centres_and_radii(pdb_content)
alphafold_size = pygrpy.grpy.stokesRadius(coordinates, radii)

pdb_content = pygrpy.pdb_loader.get_pdb_from_pdb("253L")
coordinates, radii = pygrpy.pdb_loader.centres_and_radii(pdb_content)
pdb_size = pygrpy.grpy.stokesRadius(coordinates, radii)

print("Alphafold size [Ang]:")
print(alphafold_size)
print("Protein Data Bank size [Ang]:")
print(pdb_size)


.. prompt:: python >>> auto

Expand Down
140 changes: 140 additions & 0 deletions pygrpy/pdb_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Copyright Tomasz Skóra 2024
# Copyright Radost Waszkiewicz 2024

import numpy as np
import MDAnalysis as mda
import requests
import json
from io import StringIO


def get_pdb_from_pdb(pdb_id, pdb_download_server="https://files.rcsb.org/download/"):
"""
Fetches the PDB file of a given PDB ID.
Parameters
----------
pdb_id: str
The PDB ID of the protein.
pdb_download_server: str [optional]
The address of the pdb server
Returns
-------
str or None
The PDB file content as a string if successful, None otherwise.
"""

pdb_url = pdb_download_server + pdb_id + ".pdb"
print(
"Sending GET request to {} to fetch {}'s PDB file as a string.".format(
pdb_url, pdb_id
)
)
response = requests.get(pdb_url)
if response is None or not response.ok:
print("Something went wrong.")
return None
return response.text


def get_pdb_from_alphafold(
uniprot, alphafold_download_server="https://alphafold.ebi.ac.uk/api/prediction/"
):
"""
Fetches the PDB file from AlphaFold for a given UniProt ID.
Parameters
----------
uniprot: str
The UniProt ID of the protein.
alphafold_download_server: str [optional]
The address of the alphafold server.
Returns
-------
str or None
The PDB file content as a string if successful, None otherwise.
"""

alphafold_url = alphafold_download_server + uniprot
print(
"Sending GET request to {} to fetch {}'s JSON file as a string.".format(
alphafold_url, uniprot
)
)
first_response = requests.get(alphafold_url)
if first_response is None or not first_response.ok:
print("Something went wrong.")
return None
json_string = first_response.text
json_dict = json.loads(json_string[1:-1])
pdb_url = json_dict["pdbUrl"]
print(
"Sending GET request to {} to fetch {}'s PDB file as a string.".format(
pdb_url, uniprot
)
)
response = requests.get(pdb_url)
if response is None or not response.ok:
print("Something went wrong.")
return None
return response.text


def _get_calphas_radii(calphas_resnames, res_to_radii=None):
if res_to_radii is None:
res_to_radii = {
"ALA": 2.28,
"GLY": 2.56,
"MET": 4.65,
"PHE": 4.62,
"SER": 3.12,
"ARG": 5.34,
"ASN": 3.83,
"ASP": 3.72,
"CYS": 3.35,
"GLU": 3.99,
"GLN": 4.44,
"HIS": 4.45,
"ILE": 3.50,
"LEU": 3.49,
"LYS": 4.01,
"PRO": 3.50,
"THR": 3.24,
"TYR": 4.97,
"TRP": 5.04,
"VAL": 3.25,
}
return [res_to_radii[res] for res in calphas_resnames]


def centres_and_radii(pdb_string):
"""
Extracts the coordinates and radii of C-alpha atoms from a PDB file string.
Parameters
----------
pdb_string: str
The PDB file content as a string.
Returns
-------
tuple
A tuple containing the C-alpha atom coordinates (np.array) and their corresponding radii (np.array).
Examples
--------
>>> # Lysozyme C structure
>>> pdb_content = get_pdb_from_alphafold("P61626")
>>> coordinates, radii = centres_and_radii(pdb_content)
>>> print(stokesRadius(coordinates, radii))
>>> # Lysozyme C structure
>>> pdb_content = get_pdb_from_alphafold("P61626")
>>> coordinates, radii = centres_and_radii(pdb_content)
>>> print(stokesRadius(coordinates, radii))
"""
calphas = mda.Universe(StringIO(pdb_string), format="pdb").select_atoms("name CA")
calphas_radii = _get_calphas_radii(calphas.resnames)
return np.array(calphas.positions), np.array(calphas_radii)

0 comments on commit 908a4e9

Please sign in to comment.