Skip to content

Commit

Permalink
Merge pull request #107 from volkamerlab/klifs-drugs
Browse files Browse the repository at this point in the history
Add KLIFS drugs data
  • Loading branch information
dominiquesydow authored Sep 23, 2021
2 parents 1f454e8 + 177273f commit 9f752aa
Show file tree
Hide file tree
Showing 8 changed files with 605 additions and 310 deletions.
770 changes: 467 additions & 303 deletions docs/tutorials/databases_klifs.ipynb

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions opencadd/data/klifs_fields.csv
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,11 @@ coordinates,residue.klifs_id,Int32,,,
coordinates,residue.klifs_region_id,string,,,
coordinates,residue.klifs_region,string,,,
coordinates,residue.klifs_color,string,,,
drugs,drug.inn,string,INN,,
drugs,drug.brand_name,string,Brand name,,
drugs,drug.synonym,string,Synonyms,,
drugs,drug.phase,string,Phase,,
drugs,drug.approval_year,string,Approval,,
drugs,drug.smiles,string,SMILES,,
drugs,ligand.chembl_id,string,ChEMBL,,
drugs,ligand.expo_id,string,PDB,,
80 changes: 74 additions & 6 deletions opencadd/databases/klifs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,19 +138,25 @@ def _standardize_column_values(dataframe):
local data is already performed upon session initialization.
"""

# TODO Use None instead of "-"; but may affect downstream pipelines that use "-" already
if "structure.alternate_model" in dataframe.columns:
# Remote
dataframe["structure.alternate_model"].replace("", "-", inplace=True)
if "ligand.expo_id" in dataframe.columns:
# Remote
dataframe["ligand.expo_id"].replace(0, "-", inplace=True)
if "ligand_allosteric.expo_id" in dataframe.columns:
# Remote
dataframe["ligand_allosteric.expo_id"].replace(0, "-", inplace=True)
if "structure.resolution" in dataframe.columns:
# Remote
dataframe["structure.resolution"].replace(0, np.nan, inplace=True)

if "drug.brand_name" in dataframe.columns:
dataframe["drug.brand_name"] = dataframe["drug.brand_name"].apply(
lambda x: x.split(";") if x != "" else []
)
if "drug.synonyms" in dataframe.columns:
dataframe["drug.synonyms"] = dataframe["drug.synonyms"].apply(
lambda x: x.split("\t") if x != "" else []
)

return dataframe

def _standardize_dataframe(self, dataframe, columns, columns_mapping=None):
Expand Down Expand Up @@ -1046,10 +1052,13 @@ class InteractionsProvider(BaseProvider):
"""
Class for interactions requests.
Methods
-------
Properties
----------
interaction_types()
Get all available interaction types.
Methods
-------
all_interactions()
Get all available interaction fingerprints.
by_structure_klifs_id(structure_klifs_ids)
Expand Down Expand Up @@ -1428,3 +1437,62 @@ def _raise_invalid_extension(extension):
extensions = ["pdb", "mol2"]
if extension not in extensions:
raise ValueError(f"Invalid extension. Select from: {', '.join(extensions)}")


class DrugsProvider(BaseProvider):
"""
Class for drugs requests.
From the KLIFS Swagger API:
https://dev.klifs.net/swagger_v2/#/Ligands/get_drug_list
> The drug list endpoint returns a list of all annotated kinase ligands that are either
> approved or are/have been in clinical trials.
> This information is primarily powered by the PKIDB and complemented with KLIFS curation and
> annotation + manually curated data from other sources (e.g. approved INNs).
Methods
-------
all_drugs()
Get all available drugs.
Notes
-----
Class methods all return a pandas.DataFrame of drugs (rows) with the following attributes
(columns):
drug.inn : string
International nonproprietary name.
drug.brand_name : list of string
Brand name(s).
drug.synonym : list of string
Synonym(s).
drug.phase : string
Current clinical phase of the drug.
drug.approval_year : string
Year of FDA-approval.
If approval by another institution, syntax as follows, example: "2017 (EMA)".
drug.smiles : string
SMILES string of drug.
TODO: "ligand.smiles" would be more consistent with Ligand class, howover it is not
garanteed that SMILES will be the same for the same ligand, thus use "drug.smiles".
ligand.chembl_id : string
Ligand ChEMBL ID.
ligand.expo_id : string
Ligand expo ID.
"""

def all_drugs(self):
"""
Get all available drugs.
Returns
-------
pandas.DataFrame
drugs (rows) with the columns as defined in the class docstring.
Raises
------
ValueError
If DataFrame is empty.
"""
raise NotImplementedError("Implement in your subclass!")
15 changes: 15 additions & 0 deletions opencadd/databases/klifs/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
InteractionsProvider,
PocketsProvider,
CoordinatesProvider,
DrugsProvider,
)
from .schema import (
FIELDS,
Expand Down Expand Up @@ -879,3 +880,17 @@ def _add_residue_klifs_ids(self, dataframe, filepath):
dataframe = dataframe.merge(pocket_dataframe, on="residue.id", how="left")

return dataframe


class Drugs(LocalInitializer, DrugsProvider):
"""
Extends DrugsProvider to provide remote drug requests.
Refer to DrugsProvider documentation for more information:
opencadd.databases.klifs.core.DrugsProvider
"""

def all_drugs(self):

raise NotImplementedError(
"Information on drugs is not available locally! Please use a remote session."
)
21 changes: 21 additions & 0 deletions opencadd/databases/klifs/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
InteractionsProvider,
PocketsProvider,
CoordinatesProvider,
DrugsProvider,
)
from .schema import FIELDS
from .utils import metadata_to_filepath, silence_logging
Expand Down Expand Up @@ -869,3 +870,23 @@ def _add_residue_klifs_ids(self, dataframe, structure_klifs_id):
dataframe = dataframe.astype({"residue.klifs_id": "Int64"})

return dataframe


class Drugs(RemoteInitializer, DrugsProvider):
"""
Extends DrugsProvider to provide remote drug requests.
Refer to DrugsProvider documentation for more information:
opencadd.databases.klifs.core.DrugsProvider
"""

def all_drugs(self):

# Use KLIFS API
result = self._client.Ligands.get_drug_list().response().result
# Convert list of ABC objects to DataFrame
drugs = self._abc_to_dataframe(result)
# Standardize DataFrame
drugs = self._standardize_dataframe(
drugs, FIELDS.oc_name_to_type("drugs"), FIELDS.remote_to_oc_names("drugs")
)
return drugs
8 changes: 8 additions & 0 deletions opencadd/databases/klifs/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class Session:
Interactions object for interaction requests.
coordinates : None or opencadd.databases.klifs.remote.Coordinates/local.Coordinates
Coordinates object for coordinates requests.
drugs : None or opencadd.databases.klifs.remote.Drugs
Coordinates object for drugs requests.
"""

def __init__(self):
Expand All @@ -56,6 +58,7 @@ def __init__(self):
self.interactions = None
self.pockets = None
self.coordinates = None
self.drugs = None

@classmethod
def from_local(cls, path_to_klifs_download, path_to_klifs_metadata=None):
Expand Down Expand Up @@ -165,3 +168,8 @@ def _set_attributes(self, backend, path_to_klifs_download=None, database=None, c
database=database,
path_to_klifs_download=path_to_klifs_download,
)
self.drugs = backend.Drugs(
client=client,
database=database,
path_to_klifs_download=path_to_klifs_download,
)
2 changes: 1 addition & 1 deletion opencadd/databases/klifs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def metadata_to_filepath(
structure = f"{structure_pdb}{f'_alt{structure_alternate_model}' if bool(structure_alternate_model) else ''}{f'_chain{structure_chain}' if bool(structure_chain) else ''}"

# FIXME: The PDB download for ligands in KLIFS is named "klifs_ligand.pdb"
# instead of "ligand.expo_id". For the time being (until KLIFS maybe streamlines the file name
# instead of "ligand.pdb". For the time being (until KLIFS maybe streamlines the file name
# with all the other file names), rename the file here.
if entity == "ligand" and extension == "pdb":
entity = "klifs_ligand"
Expand Down
11 changes: 11 additions & 0 deletions opencadd/tests/databases/test_klifs_local_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,17 @@ def test_all_bioactivities(self):

check_dataframe(result_remote, FIELDS.oc_name_to_type("bioactivities"))

def test_all_drugs(self):
"""
Test request result for all drugs.
"""

result_remote = REMOTE.drugs.all_drugs()
check_dataframe(result_remote, FIELDS.oc_name_to_type("drugs"))

with pytest.raises(NotImplementedError):
LOCAL.drugs.all_drugs()


class TestsFromKinaseIds:
"""
Expand Down

0 comments on commit 9f752aa

Please sign in to comment.