Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Broad api calls #20

Merged
merged 8 commits into from
Jul 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions libs/Annotator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from libs.utils.Errors import ConversionNotSupported, DataNotRetrieved
from libs.utils.Errors import ConversionNotSupported, DataNotRetrieved, DataNotAvailable


class Annotator:
Expand All @@ -19,33 +19,56 @@ async def annotate(self, spectra, jobs, repeat=False):
:return: annotated dictionary
"""
metadata = spectra.metadata
cache = dict()

added_metadata = True
while added_metadata:
added_metadata = False
for job in jobs:
service = self.services.get(job.service, None)
data = metadata.get(job.source, None)

if job.target in metadata:
pass # TODO: log - data already present
elif service is None:
pass # TODO: log - unknown service
elif data is None:
pass # TODO: log - source data not available for conversion
else:
if job.target not in metadata:
try:
result = await service.convert(job.source, job.target, data)
metadata[job.target] = result
metadata, cache = await self.execute_job_with_cache(job, metadata, cache)
if repeat:
added_metadata = True
except DataNotAvailable:
pass # TODO log data for conversing missing in given metadata
except ConversionNotSupported:
pass # TODO log this type of conversion is not supported by the service
pass # TODO log this type of conversion is not supported by the service or service unknown
except DataNotRetrieved:
pass # TODO log no data were retrieved
else:
pass # TODO: log - data already present

spectra.metadata = metadata
return spectra

async def execute_job_with_cache(self, job, metadata, cache):
"""
Execute given job in cached mode. Cache is service specific
and spectra specific.

Raises DataNotRetrieved

:param job: given job to be executed
:param metadata: data to be annotated by the job
:param cache: given cache for this spectra
:return: updated metadata and cache
"""
# make sure the job makes sense
service, data = job.validate(self.services, metadata)

cache[job.service] = cache.get(job.service, dict())
if job.target in cache[job.service]:
metadata[job.target] = cache[job.service][job.target]
else:
result = await service.convert(job.source, job.target, data)
cache[job.service].update(result)
if job.target in cache[job.service]:
metadata[job.target] = cache[job.service][job.target]
else:
raise DataNotRetrieved('No data obtained from the specified job.')
return metadata, cache

def get_all_conversions(self):
"""
Method to compute all available conversion functions of all available Services.
Expand Down
12 changes: 6 additions & 6 deletions libs/services/CIR.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ async def cas_to_smiles(self, cas_number):
args = f"{cas_number}/smiles?resolver=cas_number"
response = await self.query_the_service('CIR', args)
if response:
return response
return {'smiles': response}

async def inchikey_to_smiles(self, inchikey):
"""
Expand All @@ -31,7 +31,7 @@ async def inchikey_to_smiles(self, inchikey):
args = f'{inchikey}/smiles'
response = await self.query_the_service('CIR', args)
if response:
return response.split('\n')[0]
return {'smiles': response.split('\n')[0]}

async def inchikey_to_inchi(self, inchikey):
"""
Expand All @@ -44,7 +44,7 @@ async def inchikey_to_inchi(self, inchikey):
args = f'{inchikey}/stdinchi'
response = await self.query_the_service('CIR', args)
if response:
return response
return {'inchi': response}

async def inchikey_to_cas(self, inchikey):
"""
Expand All @@ -57,7 +57,7 @@ async def inchikey_to_cas(self, inchikey):
args = f'{inchikey}/cas'
response = await self.query_the_service('CIR', args)
if response:
return response
return {'casno': response}

async def inchikey_to_formula(self, inchikey):
"""
Expand All @@ -70,7 +70,7 @@ async def inchikey_to_formula(self, inchikey):
args = f'{inchikey}/formula'
response = await self.query_the_service('CIR', args)
if response:
return response
return {'formula': response}

async def smiles_to_inchikey(self, smiles):
"""
Expand All @@ -83,4 +83,4 @@ async def smiles_to_inchikey(self, smiles):
args = f'{smiles}/stdinchikey'
response = await self.query_the_service('CIR', args)
if response:
return response[9:]
return {'inchikey': response[9:]}
89 changes: 48 additions & 41 deletions libs/services/CTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,82 +11,89 @@ def __init__(self, session):
'CTS_compound': 'http://cts.fiehnlab.ucdavis.edu/service/compound/'
}

# generate top level methods defining allowed conversions
conversions = [('inchikey', 'inchi', 'from_inchikey'),
('inchikey', 'name', 'from_inchikey'),
('inchikey', 'iupac_name', 'from_inchikey')]
self.create_top_level_conversion_methods(conversions)

### top level methods defining allowed conversions

async def cas_to_inchikey(self, cas_number):
"""
Convert CAS number to InChiKey using CTS web service
More info: http://cts.fiehnlab.ucdavis.edu/services

The method returns first found hit.

:param cas_number: given CAS number
:return: obtained InChiKey
"""
args = f'CAS/InChIKey/{cas_number}'
response = await self.query_the_service('CTS', args)
if response:
response_json = json.loads(response)
if len(response_json[0]['results']) != 0:
return response_json[0]['results'][0]

async def inchikey_to_inchi(self, inchikey):
"""
Convert InChiKey to InChi using CTS compound service
More info: http://cts.fiehnlab.ucdavis.edu/services

:param inchikey: given InChiKey value
:return: obtained InChi
"""
args = inchikey
response = await self.query_the_service('CTS_compound', args)
if response:
response_json = json.loads(response)
return response_json["inchicode"]
return self.parse_inchikey(response)

async def name_to_inchikey(self, name):
"""
Convert Chemical name to InChiKey using CTS service
More info: http://cts.fiehnlab.ucdavis.edu/services

:param name: given Chemical name
:return: obtained InChiKey
"""
args = f'Chemical%20Name/InChIKey/{name}'
response = await self.query_the_service('CTS', args)
if response:
response_json = json.loads(response)
if len(response_json[0]['results']) != 0:
return response_json[0]['results'][0]
return self.parse_inchikey(response)

###

async def inchikey_to_name(self, inchikey):
async def from_inchikey(self, inchikey):
"""
Convert InChiKey to Chemical name using CTS compound service
Convert InChiKey to all possible attributes using CTS compound service
More info: http://cts.fiehnlab.ucdavis.edu/services

:param inchikey: given InChiKey value
:return: obtained Chemical name
:return: all found data
"""
args = inchikey
response = await self.query_the_service('CTS_compound', args)
if response:
response_json = json.loads(response)
synonyms = response_json['synonyms']
names = [item['name'] for item in synonyms if item['type'] == 'Synonym']
if names:
return names[0]
return self.parse_attributes(response)

async def inchikey_to_iupac_name(self, inchikey):
def parse_inchikey(self, response):
"""
Convert InChiKey to IUPAC name using CTS compound service
More info: http://cts.fiehnlab.ucdavis.edu/services
Parse InChiKey attribute obtained from given key.

:param inchikey: given InChiKey value
:return: obtained IUPAC name
:param response: CTS conversion response to given key
:return: parsed InChiKey
"""
args = inchikey
response = await self.query_the_service('CTS_compound', args)
if response:
response_json = json.loads(response)
response_json = json.loads(response)
if len(response_json[0]['results']) != 0:
return {'inchikey': response_json[0]['results'][0]}

def parse_attributes(self, response):
"""
Parse all available attributes obtained from InChiKey.

:param response: CTS compound response to given InChiKey
:return: all parsed data
"""
response_json = json.loads(response)
result = dict()

if 'inchicode' in response_json:
result['inchi'] = response_json['inchicode']

if 'formula' in response_json:
result['formula'] = response_json['formula']

if 'synonyms' in response_json:
synonyms = response_json['synonyms']

names = [item['name'] for item in synonyms if item['type'] == 'Synonym']
if names:
result['name'] = names[0]

names = [item['name'] for item in synonyms if item['type'] == 'IUPAC Name (Preferred)']
if names:
return names[0]
result['iupac_name'] = names[0]
return result
33 changes: 32 additions & 1 deletion libs/services/Converter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from asyncstdlib import lru_cache


from aiohttp.client_exceptions import ServerDisconnectedError
from libs.utils.Errors import DataNotRetrieved, ConversionNotSupported

Expand All @@ -6,6 +9,7 @@ class Converter:
def __init__(self, session):
self.session = session

@lru_cache
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure self is hashable here? It contains the session.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, we have to make sure default hash based on id is sufficient here. We will have a look into that in #1 (cache testing).

async def query_the_service(self, service, args, method='GET', data=None):
"""
Make get request to given service with arguments.
Expand Down Expand Up @@ -79,4 +83,31 @@ async def convert(self, source, target, data):
return result
raise DataNotRetrieved(f'Target attribute {target} not available.')
except AttributeError:
raise ConversionNotSupported(f'Target attribute {target} is not supported.')
raise ConversionNotSupported(f'Conversion from {source} to {target} is not supported.')

def create_top_level_conversion_methods(self, conversions):
"""
Method to create and set dynamic methods defined in conversions

:param conversions: triples of form (from, to, method)
"""
for conversion in conversions:
create_top_level_method(self, *conversion)


def create_top_level_method(obj, source, target, method):
"""
Assign a new method to {obj} called {source}_to_{target} which calls {method}.

:param obj: given object (typically a Converter)
:param source: attribute name used as source of data
:param target: attribute name obtained using this dynamic method
:param method: method which is called in the object with single argument
"""
async def conversion(key):
return await getattr(obj, str(method))(key)

conversion.__doc__ = f'Convert {source} to {target} using {obj.__class__.__name__} service'
conversion.__name__ = f'{source}_to_{target}'

setattr(obj, conversion.__name__, conversion)
51 changes: 38 additions & 13 deletions libs/services/NLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,60 @@ def __init__(self, session):
# service URLs
self.services = {'NLM': 'https://chem.nlm.nih.gov/api/data/'}

async def inchikey_to_name(self, inchikey):
self.attributes = [{'code': 'casno', 'label': 'RN / ID'},
{'code': 'inchikey', 'label': 'InChIKey'},
{'code': 'name', 'label': 'Name'},
{'code': 'formula', 'label': 'Formula'}]

# generate top level methods defining allowed conversions
conversions = [('inchikey', 'name', 'from_inchikey'),
('inchikey', 'formula', 'from_inchikey'),
('inchikey', 'casno', 'from_inchikey'),
('name', 'inchikey', 'from_name'),
('name', 'formula', 'from_name'),
('name', 'casno', 'from_name')]
self.create_top_level_conversion_methods(conversions)

async def from_inchikey(self, inchikey):
"""
Convert InChiKey to Chemical name using NLM service
Convert InChiKey to all possible attributes using NLM service
More info: https://chem.nlm.nih.gov/chemidplus/inchikey

:param inchikey: given InChiKey
:return: obtained Chemical name
:return: all found data
"""
args = f'inchikey/equals/{inchikey}?data=summary&format=tsv'
response = await self.query_the_service('NLM', args)
if response:
if response != 'EXPRESSION_INVALID':
table = pd.read_csv(StringIO(response), sep='\t')
if not table.empty:
return table['Name'][0]
return self.parse_attributes(response)

async def name_to_inchikey(self, name):
async def from_name(self, name):
"""
Convert Chemical name to InChiKey using NLM service
Convert Chemical name to all possible attributes using NLM service
More info: https://chem.nlm.nih.gov/chemidplus/inchikey

:param name: given Chemical name
:return: obtained InChiKey
:return: all found data
"""
args = f'name/equals/{name}?data=summary&format=tsv'
response = await self.query_the_service('NLM', args)
if response:
return self.parse_attributes(response)

def parse_attributes(self, response):
"""
Parse all available attributes obtained from given key.

:param response: NLM compound response to given given key
:return: all parsed data
"""
result = dict()

if response != 'EXPRESSION_INVALID':
table = pd.read_csv(StringIO(response), sep='\t')
if not table.empty:
inchikey = table['InChIKey'][0]
if type(inchikey) == str:
return inchikey
for att in self.attributes:
value = table[att['label']][0]
if type(value) == str:
result[att['code']] = value
return result
Loading