diff --git a/README.md b/README.md index 02d8929..2e5bdbc 100644 --- a/README.md +++ b/README.md @@ -7,23 +7,23 @@ Repository for tool that adds more annotations (e.g. SMILES, InChI, CAS number) ```python # import MSP class -from libs.MSP import MSP +from libs.Spectra import Spectra # create MSP object and import your .msp file -msp = MSP() -msp.load_msp_file('path_to_my_file.msp') +spectra = Spectra() +spectra.load_msp_file('path_to_my_file.msp') # main function to annotate the MSP file using all available approaches -msp.annotate_spectrums_all_attributes() +spectra.annotate_spectrums_all_attributes() # alternatively, it is possible to specify just particular jobs to do jobs = [('name', 'inchi', 'PubChem'), ('casno', 'inchikey', 'CTS')] -msp.annotate_spectrums(jobs) +spectra.annotate_spectrums(jobs) # to get available jobs -available_jobs = msp.get_available_jobs() +available_jobs = spectra.get_available_jobs() # export file -msp.save_msp_file('path_to_a_new_file.msp') +spectra.save_msp_file('path_to_a_new_file.msp') ``` diff --git a/libs/Annotator.py b/libs/Annotator.py index d493579..b093177 100644 --- a/libs/Annotator.py +++ b/libs/Annotator.py @@ -9,8 +9,9 @@ class Annotator: def __init__(self): self.services = {'CTS': CTS(), 'CIR': CIR(), 'NLM': NLM(), 'PubChem': PubChem()} + self.session = None - def annotate(self, metadata, jobs, repeat=False): + async def annotate(self, spectra, jobs, repeat=False): """ Runs all jobs to add annotations to given dictionary containing metadata @@ -18,12 +19,17 @@ def annotate(self, metadata, jobs, repeat=False): and tries to obtain 'Target' attribute based on 'Source' attribute using 'Service' service. - :param metadata: given spectra metadata + :param spectra: given spectra metadata :param jobs: specified list of jobs to be executed :param repeat: if some metadata was added, all jobs are executed again :return: annotated dictionary """ + # set session to every service + for service in self.services.values(): + service.session = self.session + jobs = convert_to_jobs(jobs) + metadata = spectra.metadata added_metadata = True while added_metadata: @@ -40,7 +46,7 @@ def annotate(self, metadata, jobs, repeat=False): pass # TODO: log - source data not available for conversion else: try: - result = service.convert(job.source, job.target, data) + result = await service.convert(job.source, job.target, data) metadata[job.target] = result if repeat: added_metadata = True @@ -48,7 +54,8 @@ def annotate(self, metadata, jobs, repeat=False): pass # TODO log this type of conversion is not supported by the service except DataNotRetrieved: pass # TODO log no data were retrieved - return metadata + spectra.metadata = metadata + return spectra def get_all_conversions(self): """ diff --git a/libs/MSP.py b/libs/Spectra.py similarity index 50% rename from libs/MSP.py rename to libs/Spectra.py index 496aea4..bae508e 100644 --- a/libs/MSP.py +++ b/libs/Spectra.py @@ -1,3 +1,5 @@ +import asyncio +import aiohttp from matchms.importing import load_from_msp from matchms.exporting import save_as_msp @@ -5,7 +7,7 @@ from libs.Annotator import Annotator -class MSP: +class Spectra: def __init__(self): self.annotator = Annotator() self.spectrums = [] @@ -36,7 +38,24 @@ def get_available_jobs(self): """ return self.annotator.get_all_conversions() - def annotate_spectrums(self, jobs): + async def annotate(self, jobs, batch_size, repeat): + """ + Annotate data using specified jobs in asynchronous mode. + Spectrums are executed in batches to avoid flooding web services. + + :param jobs: given list of jobs to do + :param batch_size: size of single batch + :param repeat: if some metadata was added, all jobs are executed again + """ + async with aiohttp.ClientSession() as session: + self.annotator.session = session + results = [] + for size in range(len(self.spectrums) // batch_size + 1): + results += await asyncio.gather(*[self.annotator.annotate(spectra, jobs, repeat) for spectra in + self.spectrums[size * batch_size:(size + 1) * batch_size]]) + self.spectrums = results + + def annotate_spectrums(self, jobs, batch_size=10, repeat=False): """ Adds additional metadata to all Spectra objects. @@ -44,16 +63,16 @@ def annotate_spectrums(self, jobs): have to be defined in and add_ method of Annotator class (otherwise ignored). :param jobs: target annotation jobs + :param batch_size: number of spectrums annotated at once (to avoid flooding web services) + :param repeat: if some metadata was added, all jobs are executed again """ - for i, spectrum in enumerate(self.spectrums): - metadata = curator.curate_metadata(spectrum.metadata) - spectrum.metadata = self.annotator.annotate(metadata, jobs) + for spectrum in self.spectrums: + spectrum.metadata = curator.curate_metadata(spectrum.metadata) + asyncio.run(self.annotate(jobs, batch_size, repeat)) - def annotate_spectrums_all_attributes(self): + def annotate_spectrums_all_attributes(self, batch_size=10): """ Adds all implemented metadata to all Spectra objects. """ jobs = self.get_available_jobs() - for i, spectrum in enumerate(self.spectrums): - metadata = curator.curate_metadata(spectrum.metadata) - spectrum.metadata = self.annotator.annotate(metadata, jobs) + self.annotate_spectrums(jobs, batch_size, True) diff --git a/libs/services/CIR.py b/libs/services/CIR.py index ffb4294..a82cead 100644 --- a/libs/services/CIR.py +++ b/libs/services/CIR.py @@ -7,7 +7,7 @@ def __init__(self): # service URLs self.services = {'CIR': 'https://cactus.nci.nih.gov/chemical/structure/'} - def cas_to_smiles(self, cas_number): + async def cas_to_smiles(self, cas_number): """ Convert CAS number to SMILES using CIR web service More info: https://cactus.nci.nih.gov/chemical/structure_documentation @@ -16,11 +16,11 @@ def cas_to_smiles(self, cas_number): :return: obtained SMILES """ args = f"{cas_number}/smiles?resolver=cas_number" - response = self.query_the_service('CIR', args) - if response.status_code == 200: - return response.text + response = await self.query_the_service('CIR', args) + if response: + return response - def inchikey_to_smiles(self, inchikey): + async def inchikey_to_smiles(self, inchikey): """ Convert InChiKey to SMILES using CIR web service More info: https://cactus.nci.nih.gov/chemical/structure_documentation @@ -29,11 +29,11 @@ def inchikey_to_smiles(self, inchikey): :return: obtained SMILES """ args = f'{inchikey}/smiles' - response = self.query_the_service('CIR', args) - if response.status_code == 200: - return response.text.split('\n')[0] + response = await self.query_the_service('CIR', args) + if response: + return response.split('\n')[0] - def inchikey_to_inchi(self, inchikey): + async def inchikey_to_inchi(self, inchikey): """ Convert InChiKey to InCHi using CIR web service More info: https://cactus.nci.nih.gov/chemical/structure_documentation @@ -42,11 +42,11 @@ def inchikey_to_inchi(self, inchikey): :return: obtained InCHi """ args = f'{inchikey}/stdinchi' - response = self.query_the_service('CIR', args) - if response.status_code == 200: - return response.text + response = await self.query_the_service('CIR', args) + if response: + return response - def inchikey_to_cas(self, inchikey): + async def inchikey_to_cas(self, inchikey): """ Convert InChiKey to CAS number using CIR web service More info: https://cactus.nci.nih.gov/chemical/structure_documentation @@ -55,11 +55,11 @@ def inchikey_to_cas(self, inchikey): :return: obtained CAS number """ args = f'{inchikey}/cas' - response = self.query_the_service('CIR', args) - if response.status_code == 200: - return response.text + response = await self.query_the_service('CIR', args) + if response: + return response - def inchikey_to_formula(self, inchikey): + async def inchikey_to_formula(self, inchikey): """ Convert InChiKey to chemical formula using CIR web service More info: https://cactus.nci.nih.gov/chemical/structure_documentation @@ -68,11 +68,11 @@ def inchikey_to_formula(self, inchikey): :return: obtained chemical formula """ args = f'{inchikey}/formula' - response = self.query_the_service('CIR', args) - if response.status_code == 200: - return response.text + response = await self.query_the_service('CIR', args) + if response: + return response - def smiles_to_inchikey(self, smiles): + async def smiles_to_inchikey(self, smiles): """ Convert SMILES to InChiKey using CIR web service More info: https://cactus.nci.nih.gov/chemical/structure_documentation @@ -81,6 +81,6 @@ def smiles_to_inchikey(self, smiles): :return: obtained InChiKey """ args = f'{smiles}/stdinchikey' - response = self.query_the_service('CIR', args) - if response.status_code == 200: - return response.text[9:] + response = await self.query_the_service('CIR', args) + if response: + return response[9:] diff --git a/libs/services/CTS.py b/libs/services/CTS.py index bd0dfc8..5f52da6 100644 --- a/libs/services/CTS.py +++ b/libs/services/CTS.py @@ -1,3 +1,5 @@ +import json + from libs.services.Converter import Converter @@ -9,7 +11,7 @@ def __init__(self): 'CTS_compound': 'http://cts.fiehnlab.ucdavis.edu/service/compound/' } - def cas_to_inchikey(self, cas_number): + async def cas_to_inchikey(self, cas_number): """ Convert CAS number to InChiKey using CTS web service More info: http://cts.fiehnlab.ucdavis.edu/services @@ -20,12 +22,13 @@ def cas_to_inchikey(self, cas_number): :return: obtained InChiKey """ args = f'CAS/InChIKey/{cas_number}' - response = self.query_the_service('CTS', args) - if response.status_code == 200: - if len(response.json()[0]['results']) != 0: - return response.json()[0]['results'][0] + response = await self.query_the_service('CTS', args) + if response: + response_json = json.loads(response) + if len(response_json[0]['results']) != 0: + return response_json[0]['results'][0] - def inchikey_to_inchi(self, inchikey): + async def inchikey_to_inchi(self, inchikey): """ Convert InChiKey to InChi using CTS compound service More info: http://cts.fiehnlab.ucdavis.edu/services @@ -34,11 +37,12 @@ def inchikey_to_inchi(self, inchikey): :return: obtained InChi """ args = inchikey - response = self.query_the_service('CTS_compound', args) - if response.status_code == 200: - return response.json()["inchicode"] + response = await self.query_the_service('CTS_compound', args) + if response: + response_json = json.loads(response) + return response_json["inchicode"] - def name_to_inchikey(self, name): + async def name_to_inchikey(self, name): """ Convert Chemical name to InChiKey using CTS service More info: http://cts.fiehnlab.ucdavis.edu/services @@ -47,12 +51,13 @@ def name_to_inchikey(self, name): :return: obtained InChiKey """ args = f'Chemical%20Name/InChIKey/{name}' - response = self.query_the_service('CTS', args) - if response.status_code == 200: - if len(response.json()[0]['results']) != 0: - return response.json()[0]['results'][0] + response = await self.query_the_service('CTS', args) + if response: + response_json = json.loads(response) + if len(response_json[0]['results']) != 0: + return response_json[0]['results'][0] - def inchikey_to_name(self, inchikey): + async def inchikey_to_name(self, inchikey): """ Convert InChiKey to Chemical name using CTS compound service More info: http://cts.fiehnlab.ucdavis.edu/services @@ -61,14 +66,15 @@ def inchikey_to_name(self, inchikey): :return: obtained Chemical name """ args = inchikey - response = self.query_the_service('CTS_compound', args) - if response.status_code == 200: - synonyms = response.json()['synonyms'] + response = await self.query_the_service('CTS_compound', args) + if response: + response_json = json.loads(response) + synonyms = response_json['synonyms'] names = [item['name'] for item in synonyms if item['type'] == 'Synonym'] if names: return names[0] - def inchikey_to_iupac_name(self, inchikey): + async def inchikey_to_iupac_name(self, inchikey): """ Convert InChiKey to IUPAC name using CTS compound service More info: http://cts.fiehnlab.ucdavis.edu/services @@ -77,9 +83,10 @@ def inchikey_to_iupac_name(self, inchikey): :return: obtained IUPAC name """ args = inchikey - response = self.query_the_service('CTS_compound', args) - if response.status_code == 200: - synonyms = response.json()['synonyms'] + response = await self.query_the_service('CTS_compound', args) + if response: + response_json = json.loads(response) + synonyms = response_json['synonyms'] names = [item['name'] for item in synonyms if item['type'] == 'IUPAC Name (Preferred)'] if names: return names[0] diff --git a/libs/services/Converter.py b/libs/services/Converter.py index c246068..28d8bae 100644 --- a/libs/services/Converter.py +++ b/libs/services/Converter.py @@ -1,14 +1,12 @@ -import requests +from aiohttp.client_exceptions import ServerDisconnectedError from libs.utils.Errors import DataNotRetrieved, ConversionNotSupported class Converter: def __init__(self): - # used to store individual API calls to avoid executing - # the same query multiple times in single session - self.cache = dict() + self.session = None - def query_the_service(self, service, args, method='GET', data=None): + async def query_the_service(self, service, args, method='GET', data=None): """ Make get request to given service with arguments. Raises ConnectionError if service is not available. @@ -20,31 +18,53 @@ def query_the_service(self, service, args, method='GET', data=None): :return: obtained response """ try: - identification = f'{service}:{args}' - cached_result = self.cache.get(identification, None) - if cached_result: - return cached_result - result = self.execute_request(self.services[service] + args, method, data) - self.cache[identification] = result + result = await self.loop_request(self.services[service] + args, method, data) return result - except requests.exceptions.ConnectionError: - raise ConnectionError(f'Service {service} is not available') + except TypeError: + pass # TODO: log - probably given argument is incorrect - def execute_request(self, url, method, data=None): + async def loop_request(self, url, method, data, depth=10): """ Execute request with type depending on specified method. :param url: service URL :param method: GET/POST :param data: given arguments for POST request + :param depth: allowed recursion depth for unsuccessful requests :return: obtained response """ - if method == 'GET': - return requests.get(url) + try: + if method == 'GET': + async with self.session.get(url=url) as response: + return await self.process_request(response, url, method, data, depth) + else: + async with self.session.post(url=url, data=data) as response: + return await self.process_request(response, url, method, data, depth) + except ServerDisconnectedError: + if depth > 0: + return await self.loop_request(url, method, data, depth - 1) + + async def process_request(self, response, url, method, data, depth): + """ + Method to wrap response handling (same for POST and GET requests). + + :param response: given async response + :param url: service URL + :param method: GET/POST + :param data: given arguments for POST request + :param depth: allowed recursion depth for unsuccessful requests + :return: processed response + """ + result = await response.text() + if response.ok: + return result + elif response.status == 503: + if depth > 0: + return await self.loop_request(url, method, data, depth - 1) else: - return requests.post(url, data=data) + pass # TODO: log - other error responses - def convert(self, source, target, data): + async def convert(self, source, target, data): """ Converts specified {source} attribute (provided in {data}) to {target} attribute. @@ -54,7 +74,7 @@ def convert(self, source, target, data): :return: obtained value of target attribute """ try: - result = getattr(self, f'{source}_to_{target}')(data) + result = await getattr(self, f'{source}_to_{target}')(data) if result: return result raise DataNotRetrieved(f'Target attribute {target} not available.') diff --git a/libs/services/NLM.py b/libs/services/NLM.py index 1bd33e1..ce2a813 100644 --- a/libs/services/NLM.py +++ b/libs/services/NLM.py @@ -1,4 +1,5 @@ from io import StringIO + import pandas as pd from libs.services.Converter import Converter @@ -10,7 +11,7 @@ def __init__(self): # service URLs self.services = {'NLM': 'https://chem.nlm.nih.gov/api/data/'} - def inchikey_to_name(self, inchikey): + async def inchikey_to_name(self, inchikey): """ Convert InChiKey to Chemical name using NLM service More info: https://chem.nlm.nih.gov/chemidplus/inchikey @@ -19,13 +20,14 @@ def inchikey_to_name(self, inchikey): :return: obtained Chemical name """ args = f'inchikey/equals/{inchikey}?data=summary&format=tsv' - response = self.query_the_service('NLM', args) - if response.status_code == 200 and response.text != 'EXPRESSION_INVALID': - table = pd.read_csv(StringIO(response.text), sep='\t') - if not table.empty: - return table['Name'][0] + response = await self.query_the_service('NLM', args) + if response: + if response != 'EXPRESSION_INVALID': + table = pd.read_csv(StringIO(response), sep='\t') + if not table.empty: + return table['Name'][0] - def name_to_inchikey(self, name): + async def name_to_inchikey(self, name): """ Convert Chemical name to InChiKey using NLM service More info: https://chem.nlm.nih.gov/chemidplus/inchikey @@ -34,8 +36,10 @@ def name_to_inchikey(self, name): :return: obtained InChiKey """ args = f'name/equals/{name}?data=summary&format=tsv' - response = self.query_the_service('NLM', args) - if response.status_code == 200: - table = pd.read_csv(StringIO(response.text), sep='\t') + response = await self.query_the_service('NLM', args) + if response: + table = pd.read_csv(StringIO(response), sep='\t') if not table.empty: - return table['InChIKey'][0] + inchikey = table['InChIKey'][0] + if type(inchikey) == str: + return inchikey diff --git a/libs/services/PubChem.py b/libs/services/PubChem.py index e5b3613..0308f18 100644 --- a/libs/services/PubChem.py +++ b/libs/services/PubChem.py @@ -1,3 +1,5 @@ +import json + from libs.services.Converter import Converter @@ -7,7 +9,7 @@ def __init__(self): # service URLs self.services = {'PubChem': 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/'} - def name_to_inchi(self, name): + async def name_to_inchi(self, name): """ Convert Chemical name to InChi using PubChem service More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest @@ -16,13 +18,14 @@ def name_to_inchi(self, name): :return: found InChi """ args = f'name/{name}/JSON' - response = self.query_the_service('PubChem', args) - if response.status_code == 200: - for prop in response.json()['PC_Compounds'][0]['props']: + response = await self.query_the_service('PubChem', args) + if response: + response_json = json.loads(response) + for prop in response_json['PC_Compounds'][0]['props']: if prop['urn']['label'] == 'InChI': return prop['value']['sval'] - def inchi_to_inchikey(self, inchi): + async def inchi_to_inchikey(self, inchi): """ Convert InChi to InChiKey using PubChem service More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest @@ -30,13 +33,13 @@ def inchi_to_inchikey(self, inchi): :param inchi: given InChi :return: found InChiKey """ - props = self.get_props_from_inchi(inchi) + props = await self.get_props_from_inchi(inchi) if props: for prop in props: if prop['urn']['label'] == 'InChIKey': return prop['value']['sval'] - def inchi_to_iupac_name(self, inchi): + async def inchi_to_iupac_name(self, inchi): """ Convert InChi to IUPAC name using PubChem service More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest @@ -44,13 +47,13 @@ def inchi_to_iupac_name(self, inchi): :param inchi: given InChi :return: found IUPAC name """ - props = self.get_props_from_inchi(inchi) + props = await self.get_props_from_inchi(inchi) if props: for prop in props: if prop['urn']['label'] == 'IUPAC Name' and prop['urn']['name'] == 'Preferred': return prop['value']['sval'] - def inchi_to_formula(self, inchi): + async def inchi_to_formula(self, inchi): """ Convert InChi to chemical formula using PubChem service More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest @@ -58,13 +61,13 @@ def inchi_to_formula(self, inchi): :param inchi: given InChi :return: found chemical formula """ - props = self.get_props_from_inchi(inchi) + props = await self.get_props_from_inchi(inchi) if props: for prop in props: if prop['urn']['label'] == 'Molecular Formula': return prop['value']['sval'] - def inchi_to_smiles(self, inchi): + async def inchi_to_smiles(self, inchi): """ Convert InChi to SMILES using PubChem service More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest @@ -72,13 +75,13 @@ def inchi_to_smiles(self, inchi): :param inchi: given InChi :return: found SMILES """ - props = self.get_props_from_inchi(inchi) + props = await self.get_props_from_inchi(inchi) if props: for prop in props: if prop['urn']['label'] == 'SMILES' and prop['urn']['name'] == 'Canonical': return prop['value']['sval'] - def get_props_from_inchi(self, inchi): + async def get_props_from_inchi(self, inchi): """ General methods to obtain all possible data based on InChi. @@ -86,6 +89,7 @@ def get_props_from_inchi(self, inchi): :return: obtained properties associated to the given InChi """ args = "inchi/JSON" - response = self.query_the_service('PubChem', args, method='POST', data={'inchi': inchi}) - if response.status_code == 200: - return response.json()['PC_Compounds'][0]['props'] + response = await self.query_the_service('PubChem', args, method='POST', data={'inchi': inchi}) + if response: + response_json = json.loads(response) + return response_json['PC_Compounds'][0]['props'] diff --git a/requirements.txt b/requirements.txt index 517fd57..adc3d5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ matchms~=0.9.0 pandas~=1.2.4 requests~=2.25.1 -mock~=4.0.3 \ No newline at end of file +mock~=4.0.3 +aiohttp~=3.7.4.post0 \ No newline at end of file diff --git a/tests/test_CIR.py b/tests/test_CIR.py index c3c7d2b..6a629ca 100644 --- a/tests/test_CIR.py +++ b/tests/test_CIR.py @@ -1,6 +1,8 @@ +import asyncio import unittest from libs.services.CIR import CIR +from tests.utils import wrap_with_session class TestCIR(unittest.TestCase): @@ -11,63 +13,63 @@ def test_connect_to_service(self): # test basic CIR service cas_number = '7783-89-3' args = '{}/smiles?resolver=cas_number'.format(cas_number) - response = self.converter.query_the_service('CIR', args) - self.assertEqual(response.status_code, 200) - self.assertTrue(type(response.text) == str) + response = asyncio.run(wrap_with_session(self.converter, 'query_the_service', ['CIR', args])) + self.assertTrue(type(response) == str) # incorrect CAS number cas_number = '7783893' args = '{}/smiles?resolver=cas_number'.format(cas_number) - response = self.converter.query_the_service('CIR', args) - self.assertEqual(response.status_code, 500) + response = asyncio.run(wrap_with_session(self.converter, 'query_the_service', ['CIR', args])) + self.assertIsNone(response) def test_cas_to_smiles(self): smiles = '[Ag+].[O-][Br](=O)=O' cas_number = '7783-89-3' - self.assertEqual(self.converter.cas_to_smiles(cas_number), smiles) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'cas_to_smiles', [cas_number])), smiles) cas_number = '7783893' - self.assertIsNone(self.converter.cas_to_smiles(cas_number)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'cas_to_smiles', [cas_number]))) def test_inchikey_to_smiles(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' smiles = '[Ag+].[O-][Br](=O)=O' - self.assertEqual(self.converter.inchikey_to_smiles(inchikey), smiles) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_smiles', [inchikey])), smiles) inchikey = 'XQLMNVCXIKR-UHFFFAOYSA-M' - self.assertIsNone(self.converter.inchikey_to_smiles(inchikey)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_smiles', [inchikey]))) def test_inchikey_to_inchi(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' inchi = 'InChI=1S/Ag.BrHO3/c;2-1(3)4/h;(H,2,3,4)/q+1;/p-1' - self.assertEqual(self.converter.inchikey_to_inchi(inchikey), inchi) + + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey])), inchi) inchikey = 'XQLMNVCXIKR-UHFFFAOYSA-M' - self.assertIsNone(self.converter.inchikey_to_inchi(inchikey)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey]))) def test_inchikey_to_cas(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' cas_number = '7783-89-3' - self.assertEqual(self.converter.inchikey_to_cas(inchikey), cas_number) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_cas', [inchikey])), cas_number) inchikey = 'XQLMNVCXIKR-UHFFFAOYSA-M' - self.assertIsNone(self.converter.inchikey_to_cas(inchikey)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_cas', [inchikey]))) def test_inchikey_to_formula(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' formula = 'AgBrO3' - self.assertEqual(self.converter.inchikey_to_formula(inchikey), formula) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_formula', [inchikey])), formula) inchikey = 'XQLMNVCXIKR-UHFFFAOYSA-M' - self.assertIsNone(self.converter.inchikey_to_formula(inchikey)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_formula', [inchikey]))) def test_smiles_to_inchikey(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' smiles = '[Ag+].[O-][Br](=O)=O' - self.assertEqual(self.converter.smiles_to_inchikey(smiles), inchikey) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'smiles_to_inchikey', [smiles])), inchikey) smiles = '[Ag+].O-][Br](=O)=O' - self.assertIsNone(self.converter.smiles_to_inchikey(smiles)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'smiles_to_inchikey', [smiles]))) diff --git a/tests/test_CTS.py b/tests/test_CTS.py index fde639a..d269fd3 100644 --- a/tests/test_CTS.py +++ b/tests/test_CTS.py @@ -1,6 +1,10 @@ +import asyncio import unittest +import json +from aiohttp.client_exceptions import ClientConnectorError from libs.services.CTS import CTS +from tests.utils import wrap_with_session class TestCTS(unittest.TestCase): @@ -11,77 +15,71 @@ def test_connect_to_service(self): # test basic CTS service cas_number = '7783-89-3' args = 'CAS/InChIKey/{}'.format(cas_number) - response = self.converter.query_the_service('CTS', args) - self.assertEqual(response.status_code, 200) - json = response.json() - self.assertTrue(type(json == list)) - self.assertEqual(len(json), 1) - self.assertIn('results', json[0]) - self.assertEqual(len(json[0]['results']), 1) + response = asyncio.run(wrap_with_session(self.converter, 'query_the_service', ['CTS', args])) + response_json = json.loads(response) + self.assertTrue(type(response_json == list)) + self.assertEqual(len(response_json), 1) + self.assertIn('results', response_json[0]) + self.assertEqual(len(response_json[0]['results']), 1) # incorrect CAS number cas_number = '7783893' args = 'CAS/InChIKey/{}'.format(cas_number) - response = self.converter.query_the_service('CTS', args) - self.assertEqual(response.status_code, 200) - json = response.json() - self.assertTrue(type(json == list)) - self.assertEqual(len(json), 1) - self.assertIn('results', json[0]) - self.assertEqual(len(json[0]['results']), 0) + response = asyncio.run(wrap_with_session(self.converter, 'query_the_service', ['CTS', args])) + response_json = json.loads(response) + self.assertTrue(type(response_json == list)) + self.assertEqual(len(response_json), 1) + self.assertIn('results', response_json[0]) + self.assertEqual(len(response_json[0]['results']), 0) # test incorrect service (simulates unavailable service) self.converter.services['random'] = 'https://random_strange_url.com' - self.assertRaises(ConnectionError, self.converter.query_the_service, 'random', '') + try: + asyncio.run(wrap_with_session(self.converter, 'query_the_service', ['random', ''])) + except ClientConnectorError: + pass + else: + self.fail('ConnectionError not raised') def test_cas_to_inchikey(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' cas_number = '7783-89-3' - self.assertEqual(self.converter.cas_to_inchikey(cas_number), inchikey) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'cas_to_inchikey', [cas_number])), inchikey) cas_number = '7783893' - self.assertIsNone(self.converter.cas_to_inchikey(cas_number)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'cas_to_inchikey', [cas_number]))) def test_inchikey_to_inchi(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' inchi = 'InChI=1S/Ag.BrHO3/c;2-1(3)4/h;(H,2,3,4)/q+1;/p-1' - self.assertEqual(self.converter.inchikey_to_inchi(inchikey), inchi) + + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey])), inchi) inchikey = 'XQLMNMQIKR-UHFFFAOYSA-M' - self.assertIsNone(self.converter.inchikey_to_inchi(inchikey)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey]))) def test_name_to_inchikey(self): name = 'L-Alanine' inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' - self.assertEqual(self.converter.name_to_inchikey(name), inchikey) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name])), inchikey) name = 'L-Alalalalanine' - self.assertIsNone(self.converter.name_to_inchikey(name)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name]))) def test_inchikey_to_name(self): inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' name = 'L-2-Aminopropanoic acid' - self.assertEqual(self.converter.inchikey_to_name(inchikey), name) + + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey])), name) inchikey = 'XQLMNMQIKR-UHFFFAOYSA-M' - self.assertIsNone(self.converter.inchikey_to_name(inchikey)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey]))) def test_inchikey_to_IUPAC_name(self): inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' uipac_name = '(2S)-2-aminopropanoic acid' - self.assertEqual(self.converter.inchikey_to_iupac_name(inchikey), uipac_name) + asyncio.run(wrap_with_session(self.converter, 'inchikey_to_iupac_name', [inchikey])) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_iupac_name', [inchikey])), uipac_name) inchikey = 'XQLMNMQIKR-UHFFFAOYSA-M' - self.assertIsNone(self.converter.inchikey_to_iupac_name(inchikey)) - - def test_cache(self): - inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' - identification = 'CTS_compound:' + inchikey - _ = self.converter.inchikey_to_inchi(inchikey) - - self.assertIn(identification, self.converter.cache) - - _ = self.converter.inchikey_to_name(inchikey) - _ = self.converter.inchikey_to_iupac_name(inchikey) - - self.assertIn(identification, self.converter.cache) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_iupac_name', [inchikey]))) diff --git a/tests/test_NLM.py b/tests/test_NLM.py index 7080f5c..537962b 100644 --- a/tests/test_NLM.py +++ b/tests/test_NLM.py @@ -1,8 +1,10 @@ +import asyncio import unittest from io import StringIO import pandas as pd from libs.services.NLM import NLM +from tests.utils import wrap_with_session class TestNLM(unittest.TestCase): @@ -13,27 +15,28 @@ def test_connect_to_service(self): # test basic NLM service inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' args = 'inchikey/equals/{}?data=summary&format=tsv'.format(inchikey) - response = self.converter.query_the_service('NLM', args) - self.assertEqual(response.status_code, 200) + response = asyncio.run(wrap_with_session(self.converter, 'query_the_service', ['NLM', args])) - table = pd.read_csv(StringIO(response.text), sep='\t') + table = pd.read_csv(StringIO(response), sep='\t') self.assertFalse(table.empty) def test_inchikey_to_name(self): inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' name = 'Alanine [USAN:INN]' - self.assertEqual(self.converter.inchikey_to_name(inchikey), name) + + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey])), name) inchikey = 'QNAYBMLOXXXXGJ-REOHCLBHSA-N' - self.assertIsNone(self.converter.inchikey_to_name(inchikey)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey]))) inchikey = 'QNAYMLGJ-REOLBHSA-N' - self.assertIsNone(self.converter.inchikey_to_name(inchikey)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey]))) def test_name_to_inchikey(self): name = 'L-Alanine' inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' - self.assertEqual(self.converter.name_to_inchikey(name), inchikey) + + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name])), inchikey) name = 'L-Alanne' - self.assertIsNone(self.converter.name_to_inchikey(name)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name]))) diff --git a/tests/test_annotator.py b/tests/test_annotator.py index 7e6cbc2..9f68cb9 100644 --- a/tests/test_annotator.py +++ b/tests/test_annotator.py @@ -1,7 +1,9 @@ +import asyncio import unittest import mock from libs.Annotator import Annotator +from tests.utils import wrap_with_session class TestAnnotator(unittest.TestCase): @@ -10,29 +12,34 @@ def setUp(self): def test_annotate(self): jobs = [('name', 'inchi', 'PubChem')] - pubchem = self.annotator.services['PubChem'] - pubchem.name_to_inchi = mock.Mock(return_value='a InChi value') + pubchem.name_to_inchi = mock.AsyncMock(return_value='a InChi value') self.annotator.services['PubChem'] = pubchem - metadata = {'name': 'a good name'} - expected_metadata = {'name': 'a good name', 'inchi': 'a InChi value'} - self.assertEqual(self.annotator.annotate(metadata, jobs), expected_metadata) + spectra = mock.Mock() + spectra.metadata = {'name': 'a good name'} + expected_spectra = mock.AsyncMock() + expected_spectra.metadata = {'name': 'a good name', 'inchi': 'a InChi value'} + self.assertEqual(asyncio.run(wrap_with_session(self.annotator, 'annotate', [spectra, jobs])).metadata, + expected_spectra.metadata) def test_service_unknown(self): jobs = [('name', 'inchi', 'Jumbo')] - metadata = {'name': 'a good name'} - self.assertEqual(self.annotator.annotate(metadata, jobs), metadata) + spectra = mock.Mock() + spectra.metadata = {'name': 'a good name'} + self.assertEqual(asyncio.run(wrap_with_session(self.annotator, 'annotate', [spectra, jobs])), spectra) def test_source_unknown(self): jobs = [('random_name', 'inchi', 'Jumbo')] - metadata = {'name': 'a good name'} - self.assertEqual(self.annotator.annotate(metadata, jobs), metadata) + spectra = mock.Mock() + spectra.metadata = {'name': 'a good name'} + self.assertEqual(asyncio.run(wrap_with_session(self.annotator, 'annotate', [spectra, jobs])), spectra) def test_target_unknown(self): jobs = [('name', 'random_name', 'CTS')] - metadata = {'name': 'a good name'} - self.assertEqual(self.annotator.annotate(metadata, jobs), metadata) + spectra = mock.Mock() + spectra.metadata = {'name': 'a good name'} + self.assertEqual(asyncio.run(wrap_with_session(self.annotator, 'annotate', [spectra, jobs])), spectra) def test_get_all_conversions(self): expected_result = [('cas', 'inchikey', 'CTS'), ('inchikey', 'inchi', 'CTS'), ('inchikey', 'iupac_name', 'CTS'), diff --git a/tests/test_pubchem.py b/tests/test_pubchem.py index 8c81a08..a304611 100644 --- a/tests/test_pubchem.py +++ b/tests/test_pubchem.py @@ -1,6 +1,9 @@ +import asyncio import unittest +import json from libs.services.PubChem import PubChem +from tests.utils import wrap_with_session class TestPubChem(unittest.TestCase): @@ -10,62 +13,55 @@ def setUp(self): def test_connect_to_service(self): inchi = 'InChI=1S/C9H10O4/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)' args = "inchi/JSON" - response = self.converter.query_the_service('PubChem', args, method='POST', data={'inchi': inchi}) - self.assertEqual(response.status_code, 200) - json = response.json() - self.assertIn('PC_Compounds', json) - self.assertTrue(len(json['PC_Compounds']) == 1) - self.assertIn('props', json['PC_Compounds'][0]) - self.assertTrue(type(json['PC_Compounds'][0]['props']) == list) + response = asyncio.run(wrap_with_session(self.converter, 'query_the_service', + ['PubChem', args, 'POST', {'inchi': inchi}])) + response_json = json.loads(response) + self.assertIn('PC_Compounds', response_json) + self.assertTrue(len(response_json['PC_Compounds']) == 1) + self.assertIn('props', response_json['PC_Compounds'][0]) + self.assertTrue(type(response_json['PC_Compounds'][0]['props']) == list) def test_inchi_to_inchikey(self): inchi = 'InChI=1S/C9H10O4/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)' inchikey = 'JVGVDSSUAVXRDY-UHFFFAOYSA-N' - self.assertEqual(self.converter.inchi_to_inchikey(inchi), inchikey) - # clear the cache - self.converter.cache = dict() + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_inchikey', [inchi])), inchikey) - wrong_inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)' - self.assertIsNone(self.converter.inchi_to_inchikey(wrong_inchi)) + inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)' + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchi_to_inchikey', [inchi]))) def test_name_to_inchi(self): name = '3-Methyl-5-[p-fluorophenyl]-2H-1,3-[3H]-oxazine-2,6-dione' inchi = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3' - self.assertEqual(self.converter.name_to_inchi(name), inchi) + + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchi', [name])), inchi) name = 'HYDROXYPHENYLLACTATE M-H' - self.assertIsNone(self.converter.name_to_inchi(name)) + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'name_to_inchi', [name]))) def test_inchi_to_IUPAC_name(self): inchi = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3' IUPAC_name = '5-(4-fluorophenyl)-3-methyl-1,3-oxazine-2,6-dione' - self.assertEqual(self.converter.inchi_to_iupac_name(inchi), IUPAC_name) - # clear the cache - self.converter.cache = dict() + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_iupac_name', [inchi])), IUPAC_name) - wrong_inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' - self.assertIsNone(self.converter.inchi_to_iupac_name(wrong_inchi)) + inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchi_to_iupac_name', [inchi]))) def test_inchi_to_formula(self): inchi = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3' formula = 'C11H8FNO3' - self.assertEqual(self.converter.inchi_to_formula(inchi), formula) - # clear the cache - self.converter.cache = dict() + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_formula', [inchi])), formula) - wrong_inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' - self.assertIsNone(self.converter.inchi_to_formula(wrong_inchi)) + inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchi_to_formula', [inchi]))) def test_inchi_to_smiles(self): inchi = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3' smiles = 'CN1C=C(C(=O)OC1=O)C2=CC=C(C=C2)F' - self.assertEqual(self.converter.inchi_to_smiles(inchi), smiles) - # clear the cache - self.converter.cache = dict() + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_smiles', [inchi])), smiles) - wrong_inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' - self.assertIsNone(self.converter.inchi_to_smiles(wrong_inchi)) + inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' + self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchi_to_smiles', [inchi]))) diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..d663587 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,7 @@ +import aiohttp + + +async def wrap_with_session(converter, method, args): + async with aiohttp.ClientSession() as session: + converter.session = session + return await getattr(converter, method)(*args)