Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

14 asynchronous requests #15

Merged
merged 9 commits into from
Jun 29, 2021
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@ Repository for tool that adds more annotations (e.g. SMILES, InChI, CAS number)

```python
# import MSP class
from libs.MSP import MSP
from libs.Spectra import Spectra
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kudos for updating readme


# create MSP object and import your .msp file
msp = MSP()
msp.load_msp_file('path_to_my_file.msp')
spectra = Spectra()
spectra.load_msp_file('path_to_my_file.msp')

# main function to annotate the MSP file using all available approaches
msp.annotate_spectrums_all_attributes()
spectra.annotate_spectrums_all_attributes()

# alternatively, it is possible to specify just particular jobs to do
jobs = [('name', 'inchi', 'PubChem'),
('casno', 'inchikey', 'CTS')]
msp.annotate_spectrums(jobs)
spectra.annotate_spectrums(jobs)

# to get available jobs
available_jobs = msp.get_available_jobs()
available_jobs = spectra.get_available_jobs()

# export file
msp.save_msp_file('path_to_a_new_file.msp')
spectra.save_msp_file('path_to_a_new_file.msp')
```
15 changes: 11 additions & 4 deletions libs/Annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,27 @@
class Annotator:
def __init__(self):
self.services = {'CTS': CTS(), 'CIR': CIR(), 'NLM': NLM(), 'PubChem': PubChem()}
self.session = None

def annotate(self, metadata, jobs, repeat=False):
async def annotate(self, spectra, jobs, repeat=False):
"""
Runs all jobs to add annotations to given dictionary containing metadata

The method goes through specified jobs of form (Source, Target, Service)
and tries to obtain 'Target' attribute based on 'Source' attribute using
'Service' service.

:param metadata: given spectra metadata
:param spectra: given spectra metadata
:param jobs: specified list of jobs to be executed
:param repeat: if some metadata was added, all jobs are executed again
:return: annotated dictionary
"""
# set session to every service
for service in self.services.values():
service.session = self.session

jobs = convert_to_jobs(jobs)
metadata = spectra.metadata

added_metadata = True
while added_metadata:
Expand All @@ -40,15 +46,16 @@ def annotate(self, metadata, jobs, repeat=False):
pass # TODO: log - source data not available for conversion
else:
try:
result = service.convert(job.source, job.target, data)
result = await service.convert(job.source, job.target, data)
metadata[job.target] = result
if repeat:
added_metadata = True
except ConversionNotSupported:
pass # TODO log this type of conversion is not supported by the service
except DataNotRetrieved:
pass # TODO log no data were retrieved
return metadata
spectra.metadata = metadata
return spectra

def get_all_conversions(self):
"""
Expand Down
37 changes: 28 additions & 9 deletions libs/MSP.py → libs/Spectra.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import asyncio
import aiohttp
from matchms.importing import load_from_msp
from matchms.exporting import save_as_msp

from libs import curator
from libs.Annotator import Annotator


class MSP:
class Spectra:
def __init__(self):
self.annotator = Annotator()
self.spectrums = []
Expand Down Expand Up @@ -36,24 +38,41 @@ def get_available_jobs(self):
"""
return self.annotator.get_all_conversions()

def annotate_spectrums(self, jobs):
async def annotate(self, jobs, batch_size, repeat):
"""
Annotate data using specified jobs in asynchronous mode.
Spectrums are executed in batches to avoid flooding web services.

:param jobs: given list of jobs to do
:param batch_size: size of single batch
:param repeat: if some metadata was added, all jobs are executed again
"""
async with aiohttp.ClientSession() as session:
self.annotator.session = session
results = []
for size in range(len(self.spectrums) // batch_size + 1):
results += await asyncio.gather(*[self.annotator.annotate(spectra, jobs, repeat) for spectra in
self.spectrums[size * batch_size:(size + 1) * batch_size]])
Comment on lines +54 to +55
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This operation is quite complex and could either be fit into multiple lines or into a small helper function with a descriptive name.

As discussed, this might then also move to a different class.

self.spectrums = results

def annotate_spectrums(self, jobs, batch_size=10, repeat=False):
"""
Adds additional metadata to all Spectra objects.

Required metadata are specified in required_annotations attribute and
have to be defined in and add_ method of Annotator class (otherwise ignored).

:param jobs: target annotation jobs
:param batch_size: number of spectrums annotated at once (to avoid flooding web services)
:param repeat: if some metadata was added, all jobs are executed again
"""
for i, spectrum in enumerate(self.spectrums):
metadata = curator.curate_metadata(spectrum.metadata)
spectrum.metadata = self.annotator.annotate(metadata, jobs)
for spectrum in self.spectrums:
spectrum.metadata = curator.curate_metadata(spectrum.metadata)
asyncio.run(self.annotate(jobs, batch_size, repeat))

def annotate_spectrums_all_attributes(self):
def annotate_spectrums_all_attributes(self, batch_size=10):
"""
Adds all implemented metadata to all Spectra objects.
"""
jobs = self.get_available_jobs()
for i, spectrum in enumerate(self.spectrums):
metadata = curator.curate_metadata(spectrum.metadata)
spectrum.metadata = self.annotator.annotate(metadata, jobs)
self.annotate_spectrums(jobs, batch_size, True)
48 changes: 24 additions & 24 deletions libs/services/CIR.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def __init__(self):
# service URLs
self.services = {'CIR': 'https://cactus.nci.nih.gov/chemical/structure/'}

def cas_to_smiles(self, cas_number):
async def cas_to_smiles(self, cas_number):
"""
Convert CAS number to SMILES using CIR web service
More info: https://cactus.nci.nih.gov/chemical/structure_documentation
Expand All @@ -16,11 +16,11 @@ def cas_to_smiles(self, cas_number):
:return: obtained SMILES
"""
args = f"{cas_number}/smiles?resolver=cas_number"
response = self.query_the_service('CIR', args)
if response.status_code == 200:
return response.text
response = await self.query_the_service('CIR', args)
if response:
return response

def inchikey_to_smiles(self, inchikey):
async def inchikey_to_smiles(self, inchikey):
"""
Convert InChiKey to SMILES using CIR web service
More info: https://cactus.nci.nih.gov/chemical/structure_documentation
Expand All @@ -29,11 +29,11 @@ def inchikey_to_smiles(self, inchikey):
:return: obtained SMILES
"""
args = f'{inchikey}/smiles'
response = self.query_the_service('CIR', args)
if response.status_code == 200:
return response.text.split('\n')[0]
response = await self.query_the_service('CIR', args)
if response:
return response.split('\n')[0]

def inchikey_to_inchi(self, inchikey):
async def inchikey_to_inchi(self, inchikey):
"""
Convert InChiKey to InCHi using CIR web service
More info: https://cactus.nci.nih.gov/chemical/structure_documentation
Expand All @@ -42,11 +42,11 @@ def inchikey_to_inchi(self, inchikey):
:return: obtained InCHi
"""
args = f'{inchikey}/stdinchi'
response = self.query_the_service('CIR', args)
if response.status_code == 200:
return response.text
response = await self.query_the_service('CIR', args)
if response:
return response

def inchikey_to_cas(self, inchikey):
async def inchikey_to_cas(self, inchikey):
"""
Convert InChiKey to CAS number using CIR web service
More info: https://cactus.nci.nih.gov/chemical/structure_documentation
Expand All @@ -55,11 +55,11 @@ def inchikey_to_cas(self, inchikey):
:return: obtained CAS number
"""
args = f'{inchikey}/cas'
response = self.query_the_service('CIR', args)
if response.status_code == 200:
return response.text
response = await self.query_the_service('CIR', args)
if response:
return response

def inchikey_to_formula(self, inchikey):
async def inchikey_to_formula(self, inchikey):
"""
Convert InChiKey to chemical formula using CIR web service
More info: https://cactus.nci.nih.gov/chemical/structure_documentation
Expand All @@ -68,11 +68,11 @@ def inchikey_to_formula(self, inchikey):
:return: obtained chemical formula
"""
args = f'{inchikey}/formula'
response = self.query_the_service('CIR', args)
if response.status_code == 200:
return response.text
response = await self.query_the_service('CIR', args)
if response:
return response

def smiles_to_inchikey(self, smiles):
async def smiles_to_inchikey(self, smiles):
"""
Convert SMILES to InChiKey using CIR web service
More info: https://cactus.nci.nih.gov/chemical/structure_documentation
Expand All @@ -81,6 +81,6 @@ def smiles_to_inchikey(self, smiles):
:return: obtained InChiKey
"""
args = f'{smiles}/stdinchikey'
response = self.query_the_service('CIR', args)
if response.status_code == 200:
return response.text[9:]
response = await self.query_the_service('CIR', args)
if response:
return response[9:]
51 changes: 29 additions & 22 deletions libs/services/CTS.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

from libs.services.Converter import Converter


Expand All @@ -9,7 +11,7 @@ def __init__(self):
'CTS_compound': 'http://cts.fiehnlab.ucdavis.edu/service/compound/'
}

def cas_to_inchikey(self, cas_number):
async def cas_to_inchikey(self, cas_number):
"""
Convert CAS number to InChiKey using CTS web service
More info: http://cts.fiehnlab.ucdavis.edu/services
Expand All @@ -20,12 +22,13 @@ def cas_to_inchikey(self, cas_number):
:return: obtained InChiKey
"""
args = f'CAS/InChIKey/{cas_number}'
response = self.query_the_service('CTS', args)
if response.status_code == 200:
if len(response.json()[0]['results']) != 0:
return response.json()[0]['results'][0]
response = await self.query_the_service('CTS', args)
if response:
response_json = json.loads(response)
if len(response_json[0]['results']) != 0:
return response_json[0]['results'][0]

def inchikey_to_inchi(self, inchikey):
async def inchikey_to_inchi(self, inchikey):
"""
Convert InChiKey to InChi using CTS compound service
More info: http://cts.fiehnlab.ucdavis.edu/services
Expand All @@ -34,11 +37,12 @@ def inchikey_to_inchi(self, inchikey):
:return: obtained InChi
"""
args = inchikey
response = self.query_the_service('CTS_compound', args)
if response.status_code == 200:
return response.json()["inchicode"]
response = await self.query_the_service('CTS_compound', args)
if response:
response_json = json.loads(response)
return response_json["inchicode"]

def name_to_inchikey(self, name):
async def name_to_inchikey(self, name):
"""
Convert Chemical name to InChiKey using CTS service
More info: http://cts.fiehnlab.ucdavis.edu/services
Expand All @@ -47,12 +51,13 @@ def name_to_inchikey(self, name):
:return: obtained InChiKey
"""
args = f'Chemical%20Name/InChIKey/{name}'
response = self.query_the_service('CTS', args)
if response.status_code == 200:
if len(response.json()[0]['results']) != 0:
return response.json()[0]['results'][0]
response = await self.query_the_service('CTS', args)
if response:
response_json = json.loads(response)
if len(response_json[0]['results']) != 0:
return response_json[0]['results'][0]

def inchikey_to_name(self, inchikey):
async def inchikey_to_name(self, inchikey):
"""
Convert InChiKey to Chemical name using CTS compound service
More info: http://cts.fiehnlab.ucdavis.edu/services
Expand All @@ -61,14 +66,15 @@ def inchikey_to_name(self, inchikey):
:return: obtained Chemical name
"""
args = inchikey
response = self.query_the_service('CTS_compound', args)
if response.status_code == 200:
synonyms = response.json()['synonyms']
response = await self.query_the_service('CTS_compound', args)
if response:
response_json = json.loads(response)
synonyms = response_json['synonyms']
names = [item['name'] for item in synonyms if item['type'] == 'Synonym']
if names:
return names[0]

def inchikey_to_iupac_name(self, inchikey):
async def inchikey_to_iupac_name(self, inchikey):
"""
Convert InChiKey to IUPAC name using CTS compound service
More info: http://cts.fiehnlab.ucdavis.edu/services
Expand All @@ -77,9 +83,10 @@ def inchikey_to_iupac_name(self, inchikey):
:return: obtained IUPAC name
"""
args = inchikey
response = self.query_the_service('CTS_compound', args)
if response.status_code == 200:
synonyms = response.json()['synonyms']
response = await self.query_the_service('CTS_compound', args)
if response:
response_json = json.loads(response)
synonyms = response_json['synonyms']
names = [item['name'] for item in synonyms if item['type'] == 'IUPAC Name (Preferred)']
if names:
return names[0]
Loading