diff --git a/libs/Annotator.py b/libs/Annotator.py index 37e7620..cb83de6 100644 --- a/libs/Annotator.py +++ b/libs/Annotator.py @@ -1,4 +1,4 @@ -from libs.utils.Errors import ConversionNotSupported, DataNotRetrieved +from libs.utils.Errors import ConversionNotSupported, DataNotRetrieved, DataNotAvailable class Annotator: @@ -19,33 +19,56 @@ async def annotate(self, spectra, jobs, repeat=False): :return: annotated dictionary """ metadata = spectra.metadata + cache = dict() added_metadata = True while added_metadata: added_metadata = False for job in jobs: - service = self.services.get(job.service, None) - data = metadata.get(job.source, None) - - if job.target in metadata: - pass # TODO: log - data already present - elif service is None: - pass # TODO: log - unknown service - elif data is None: - pass # TODO: log - source data not available for conversion - else: + if job.target not in metadata: try: - result = await service.convert(job.source, job.target, data) - metadata[job.target] = result + metadata, cache = await self.execute_job_with_cache(job, metadata, cache) if repeat: added_metadata = True + except DataNotAvailable: + pass # TODO log data for conversing missing in given metadata except ConversionNotSupported: - pass # TODO log this type of conversion is not supported by the service + pass # TODO log this type of conversion is not supported by the service or service unknown except DataNotRetrieved: pass # TODO log no data were retrieved + else: + pass # TODO: log - data already present + spectra.metadata = metadata return spectra + async def execute_job_with_cache(self, job, metadata, cache): + """ + Execute given job in cached mode. Cache is service specific + and spectra specific. + + Raises DataNotRetrieved + + :param job: given job to be executed + :param metadata: data to be annotated by the job + :param cache: given cache for this spectra + :return: updated metadata and cache + """ + # make sure the job makes sense + service, data = job.validate(self.services, metadata) + + cache[job.service] = cache.get(job.service, dict()) + if job.target in cache[job.service]: + metadata[job.target] = cache[job.service][job.target] + else: + result = await service.convert(job.source, job.target, data) + cache[job.service].update(result) + if job.target in cache[job.service]: + metadata[job.target] = cache[job.service][job.target] + else: + raise DataNotRetrieved('No data obtained from the specified job.') + return metadata, cache + def get_all_conversions(self): """ Method to compute all available conversion functions of all available Services. diff --git a/libs/services/CIR.py b/libs/services/CIR.py index c75230c..e9311af 100644 --- a/libs/services/CIR.py +++ b/libs/services/CIR.py @@ -18,7 +18,7 @@ async def cas_to_smiles(self, cas_number): args = f"{cas_number}/smiles?resolver=cas_number" response = await self.query_the_service('CIR', args) if response: - return response + return {'smiles': response} async def inchikey_to_smiles(self, inchikey): """ @@ -31,7 +31,7 @@ async def inchikey_to_smiles(self, inchikey): args = f'{inchikey}/smiles' response = await self.query_the_service('CIR', args) if response: - return response.split('\n')[0] + return {'smiles': response.split('\n')[0]} async def inchikey_to_inchi(self, inchikey): """ @@ -44,7 +44,7 @@ async def inchikey_to_inchi(self, inchikey): args = f'{inchikey}/stdinchi' response = await self.query_the_service('CIR', args) if response: - return response + return {'inchi': response} async def inchikey_to_cas(self, inchikey): """ @@ -57,7 +57,7 @@ async def inchikey_to_cas(self, inchikey): args = f'{inchikey}/cas' response = await self.query_the_service('CIR', args) if response: - return response + return {'casno': response} async def inchikey_to_formula(self, inchikey): """ @@ -70,7 +70,7 @@ async def inchikey_to_formula(self, inchikey): args = f'{inchikey}/formula' response = await self.query_the_service('CIR', args) if response: - return response + return {'formula': response} async def smiles_to_inchikey(self, smiles): """ @@ -83,4 +83,4 @@ async def smiles_to_inchikey(self, smiles): args = f'{smiles}/stdinchikey' response = await self.query_the_service('CIR', args) if response: - return response[9:] + return {'inchikey': response[9:]} diff --git a/libs/services/CTS.py b/libs/services/CTS.py index 70fead8..5b12618 100644 --- a/libs/services/CTS.py +++ b/libs/services/CTS.py @@ -11,82 +11,89 @@ def __init__(self, session): 'CTS_compound': 'http://cts.fiehnlab.ucdavis.edu/service/compound/' } + # generate top level methods defining allowed conversions + conversions = [('inchikey', 'inchi', 'from_inchikey'), + ('inchikey', 'name', 'from_inchikey'), + ('inchikey', 'iupac_name', 'from_inchikey')] + self.create_top_level_conversion_methods(conversions) + + ### top level methods defining allowed conversions + async def cas_to_inchikey(self, cas_number): """ Convert CAS number to InChiKey using CTS web service More info: http://cts.fiehnlab.ucdavis.edu/services - The method returns first found hit. - :param cas_number: given CAS number :return: obtained InChiKey """ args = f'CAS/InChIKey/{cas_number}' response = await self.query_the_service('CTS', args) if response: - response_json = json.loads(response) - if len(response_json[0]['results']) != 0: - return response_json[0]['results'][0] - - async def inchikey_to_inchi(self, inchikey): - """ - Convert InChiKey to InChi using CTS compound service - More info: http://cts.fiehnlab.ucdavis.edu/services - - :param inchikey: given InChiKey value - :return: obtained InChi - """ - args = inchikey - response = await self.query_the_service('CTS_compound', args) - if response: - response_json = json.loads(response) - return response_json["inchicode"] + return self.parse_inchikey(response) async def name_to_inchikey(self, name): """ Convert Chemical name to InChiKey using CTS service More info: http://cts.fiehnlab.ucdavis.edu/services - :param name: given Chemical name :return: obtained InChiKey """ args = f'Chemical%20Name/InChIKey/{name}' response = await self.query_the_service('CTS', args) if response: - response_json = json.loads(response) - if len(response_json[0]['results']) != 0: - return response_json[0]['results'][0] + return self.parse_inchikey(response) + + ### - async def inchikey_to_name(self, inchikey): + async def from_inchikey(self, inchikey): """ - Convert InChiKey to Chemical name using CTS compound service + Convert InChiKey to all possible attributes using CTS compound service More info: http://cts.fiehnlab.ucdavis.edu/services :param inchikey: given InChiKey value - :return: obtained Chemical name + :return: all found data """ args = inchikey response = await self.query_the_service('CTS_compound', args) if response: - response_json = json.loads(response) - synonyms = response_json['synonyms'] - names = [item['name'] for item in synonyms if item['type'] == 'Synonym'] - if names: - return names[0] + return self.parse_attributes(response) - async def inchikey_to_iupac_name(self, inchikey): + def parse_inchikey(self, response): """ - Convert InChiKey to IUPAC name using CTS compound service - More info: http://cts.fiehnlab.ucdavis.edu/services + Parse InChiKey attribute obtained from given key. - :param inchikey: given InChiKey value - :return: obtained IUPAC name + :param response: CTS conversion response to given key + :return: parsed InChiKey """ - args = inchikey - response = await self.query_the_service('CTS_compound', args) - if response: - response_json = json.loads(response) + response_json = json.loads(response) + if len(response_json[0]['results']) != 0: + return {'inchikey': response_json[0]['results'][0]} + + def parse_attributes(self, response): + """ + Parse all available attributes obtained from InChiKey. + + :param response: CTS compound response to given InChiKey + :return: all parsed data + """ + response_json = json.loads(response) + result = dict() + + if 'inchicode' in response_json: + result['inchi'] = response_json['inchicode'] + + if 'formula' in response_json: + result['formula'] = response_json['formula'] + + if 'synonyms' in response_json: synonyms = response_json['synonyms'] + + names = [item['name'] for item in synonyms if item['type'] == 'Synonym'] + if names: + result['name'] = names[0] + names = [item['name'] for item in synonyms if item['type'] == 'IUPAC Name (Preferred)'] if names: - return names[0] + result['iupac_name'] = names[0] + return result diff --git a/libs/services/Converter.py b/libs/services/Converter.py index ce40c4b..bed9d7b 100644 --- a/libs/services/Converter.py +++ b/libs/services/Converter.py @@ -1,3 +1,6 @@ +from asyncstdlib import lru_cache + + from aiohttp.client_exceptions import ServerDisconnectedError from libs.utils.Errors import DataNotRetrieved, ConversionNotSupported @@ -6,6 +9,7 @@ class Converter: def __init__(self, session): self.session = session + @lru_cache async def query_the_service(self, service, args, method='GET', data=None): """ Make get request to given service with arguments. @@ -79,4 +83,31 @@ async def convert(self, source, target, data): return result raise DataNotRetrieved(f'Target attribute {target} not available.') except AttributeError: - raise ConversionNotSupported(f'Target attribute {target} is not supported.') + raise ConversionNotSupported(f'Conversion from {source} to {target} is not supported.') + + def create_top_level_conversion_methods(self, conversions): + """ + Method to create and set dynamic methods defined in conversions + + :param conversions: triples of form (from, to, method) + """ + for conversion in conversions: + create_top_level_method(self, *conversion) + + +def create_top_level_method(obj, source, target, method): + """ + Assign a new method to {obj} called {source}_to_{target} which calls {method}. + + :param obj: given object (typically a Converter) + :param source: attribute name used as source of data + :param target: attribute name obtained using this dynamic method + :param method: method which is called in the object with single argument + """ + async def conversion(key): + return await getattr(obj, str(method))(key) + + conversion.__doc__ = f'Convert {source} to {target} using {obj.__class__.__name__} service' + conversion.__name__ = f'{source}_to_{target}' + + setattr(obj, conversion.__name__, conversion) diff --git a/libs/services/NLM.py b/libs/services/NLM.py index 312c2dd..c352262 100644 --- a/libs/services/NLM.py +++ b/libs/services/NLM.py @@ -11,35 +11,60 @@ def __init__(self, session): # service URLs self.services = {'NLM': 'https://chem.nlm.nih.gov/api/data/'} - async def inchikey_to_name(self, inchikey): + self.attributes = [{'code': 'casno', 'label': 'RN / ID'}, + {'code': 'inchikey', 'label': 'InChIKey'}, + {'code': 'name', 'label': 'Name'}, + {'code': 'formula', 'label': 'Formula'}] + + # generate top level methods defining allowed conversions + conversions = [('inchikey', 'name', 'from_inchikey'), + ('inchikey', 'formula', 'from_inchikey'), + ('inchikey', 'casno', 'from_inchikey'), + ('name', 'inchikey', 'from_name'), + ('name', 'formula', 'from_name'), + ('name', 'casno', 'from_name')] + self.create_top_level_conversion_methods(conversions) + + async def from_inchikey(self, inchikey): """ - Convert InChiKey to Chemical name using NLM service + Convert InChiKey to all possible attributes using NLM service More info: https://chem.nlm.nih.gov/chemidplus/inchikey :param inchikey: given InChiKey - :return: obtained Chemical name + :return: all found data """ args = f'inchikey/equals/{inchikey}?data=summary&format=tsv' response = await self.query_the_service('NLM', args) if response: - if response != 'EXPRESSION_INVALID': - table = pd.read_csv(StringIO(response), sep='\t') - if not table.empty: - return table['Name'][0] + return self.parse_attributes(response) - async def name_to_inchikey(self, name): + async def from_name(self, name): """ - Convert Chemical name to InChiKey using NLM service + Convert Chemical name to all possible attributes using NLM service More info: https://chem.nlm.nih.gov/chemidplus/inchikey :param name: given Chemical name - :return: obtained InChiKey + :return: all found data """ args = f'name/equals/{name}?data=summary&format=tsv' response = await self.query_the_service('NLM', args) if response: + return self.parse_attributes(response) + + def parse_attributes(self, response): + """ + Parse all available attributes obtained from given key. + + :param response: NLM compound response to given given key + :return: all parsed data + """ + result = dict() + + if response != 'EXPRESSION_INVALID': table = pd.read_csv(StringIO(response), sep='\t') if not table.empty: - inchikey = table['InChIKey'][0] - if type(inchikey) == str: - return inchikey + for att in self.attributes: + value = table[att['label']][0] + if type(value) == str: + result[att['code']] = value + return result diff --git a/libs/services/PubChem.py b/libs/services/PubChem.py index f7d2cd2..f8bef66 100644 --- a/libs/services/PubChem.py +++ b/libs/services/PubChem.py @@ -1,6 +1,7 @@ import json from libs.services.Converter import Converter +from frozendict import frozendict class PubChem(Converter): @@ -9,87 +10,69 @@ def __init__(self, session): # service URLs self.services = {'PubChem': 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/'} - async def name_to_inchi(self, name): + self.attributes = [{'code': 'inchi', 'label': 'InChI', 'extra': None}, + {'code': 'inchikey', 'label': 'InChIKey', 'extra': None}, + {'code': 'iupac_name', 'label': 'IUPAC Name', 'extra': 'Preferred'}, + {'code': 'formula', 'label': 'Molecular Formula', 'extra': None}, + {'code': 'smiles', 'label': 'SMILES', 'extra': 'Canonical'}] + + # generate top level methods defining allowed conversions + conversions = [('name', 'inchi', 'from_name'), + ('name', 'inchikey', 'from_name'), + ('name', 'iupac_name', 'from_name'), + ('name', 'formula', 'from_name'), + ('name', 'smiles', 'from_name'), + ('inchi', 'inchikey', 'from_inchi'), + ('inchi', 'iupac_name', 'from_inchi'), + ('inchi', 'formula', 'from_inchi'), + ('inchi', 'smiles', 'from_inchi')] + self.create_top_level_conversion_methods(conversions) + + async def from_name(self, name): """ - Convert Chemical name to InChi using PubChem service + Convert Chemical name to all possible attributes using PubChem service More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest :param name: given Chemical name - :return: found InChi + :return: all found data """ args = f'name/{name}/JSON' response = await self.query_the_service('PubChem', args) if response: - response_json = json.loads(response) - for prop in response_json['PC_Compounds'][0]['props']: - if prop['urn']['label'] == 'InChI': - return prop['value']['sval'] - - async def inchi_to_inchikey(self, inchi): - """ - Convert InChi to InChiKey using PubChem service - More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest - - :param inchi: given InChi - :return: found InChiKey - """ - props = await self.get_props_from_inchi(inchi) - if props: - for prop in props: - if prop['urn']['label'] == 'InChIKey': - return prop['value']['sval'] + return self.parse_attributes(response) - async def inchi_to_iupac_name(self, inchi): + async def from_inchi(self, inchi): """ - Convert InChi to IUPAC name using PubChem service + Convert InChi to to all possible attributes using PubChem service More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest :param inchi: given InChi - :return: found IUPAC name - """ - props = await self.get_props_from_inchi(inchi) - if props: - for prop in props: - if prop['urn']['label'] == 'IUPAC Name' and prop['urn']['name'] == 'Preferred': - return prop['value']['sval'] - - async def inchi_to_formula(self, inchi): + :return: all found data """ - Convert InChi to chemical formula using PubChem service - More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest - - :param inchi: given InChi - :return: found chemical formula - """ - props = await self.get_props_from_inchi(inchi) - if props: - for prop in props: - if prop['urn']['label'] == 'Molecular Formula': - return prop['value']['sval'] + args = "inchi/JSON" + response = await self.query_the_service('PubChem', args, method='POST', data=frozendict({'inchi': inchi})) + if response: + return self.parse_attributes(response) - async def inchi_to_smiles(self, inchi): + def parse_attributes(self, response): """ - Convert InChi to SMILES using PubChem service - More info: https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest + Parse all available attributes (specified in self.attributes) from given response. - :param inchi: given InChi - :return: found SMILES - """ - props = await self.get_props_from_inchi(inchi) - if props: - for prop in props: - if prop['urn']['label'] == 'SMILES' and prop['urn']['name'] == 'Canonical': - return prop['value']['sval'] + Method does not return anything, instead stores data in local cache. - async def get_props_from_inchi(self, inchi): + :param response: given JSON + :return: all parsed data """ - General methods to obtain all possible data based on InChi. + response_json = json.loads(response) + result = dict() - :param inchi: given InChi - :return: obtained properties associated to the given InChi - """ - args = "inchi/JSON" - response = await self.query_the_service('PubChem', args, method='POST', data={'inchi': inchi}) - if response: - response_json = json.loads(response) - return response_json['PC_Compounds'][0]['props'] + for prop in response_json['PC_Compounds'][0]['props']: + label = prop['urn']['label'] + for att in self.attributes: + if label == att['label']: + if att['extra']: + if prop['urn']['name'] == att['extra']: + result[att['code']] = prop['value']['sval'] + else: + result[att['code']] = prop['value']['sval'] + return result diff --git a/libs/utils/Errors.py b/libs/utils/Errors.py index 5ab8fa6..2407831 100644 --- a/libs/utils/Errors.py +++ b/libs/utils/Errors.py @@ -12,3 +12,7 @@ class UnknownService(Exception): class UnknownSpectraFormat(Exception): pass + + +class DataNotAvailable(Exception): + pass diff --git a/libs/utils/Job.py b/libs/utils/Job.py index e55b953..31a3057 100644 --- a/libs/utils/Job.py +++ b/libs/utils/Job.py @@ -1,3 +1,6 @@ +from libs.utils.Errors import ConversionNotSupported, DataNotAvailable + + class Job: def __init__(self, data): self.source, self.target, self.service = data @@ -8,6 +11,17 @@ def __str__(self): def __repr__(self): return f'Job(({self.source}, {self.target}, {self.service}))' + def validate(self, services, metadata): + service = services.get(self.service, None) + data = metadata.get(self.source, None) + + if service is None: + raise ConversionNotSupported(f'Specified {service} not supported.') + elif data is None: + raise DataNotAvailable(f'Source {self.source} not available in metadata.') + else: + return service, data + def convert_to_jobs(jobs): return [Job(data) for data in jobs] diff --git a/requirements.txt b/requirements.txt index adc3d5b..3f205d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,6 @@ matchms~=0.9.0 pandas~=1.2.4 requests~=2.25.1 mock~=4.0.3 -aiohttp~=3.7.4.post0 \ No newline at end of file +aiohttp~=3.7.4.post0 +asyncstdlib~=3.9.2 +frozendict~=2.0.3 \ No newline at end of file diff --git a/tests/test_CIR.py b/tests/test_CIR.py index db0b3be..cae5ec4 100644 --- a/tests/test_CIR.py +++ b/tests/test_CIR.py @@ -25,7 +25,8 @@ def test_connect_to_service(self): def test_cas_to_smiles(self): smiles = '[Ag+].[O-][Br](=O)=O' cas_number = '7783-89-3' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'cas_to_smiles', [cas_number])), smiles) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'cas_to_smiles', [cas_number]))['smiles'], + smiles) cas_number = '7783893' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'cas_to_smiles', [cas_number]))) @@ -33,7 +34,8 @@ def test_cas_to_smiles(self): def test_inchikey_to_smiles(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' smiles = '[Ag+].[O-][Br](=O)=O' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_smiles', [inchikey])), smiles) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_smiles', [inchikey]))['smiles'], + smiles) inchikey = 'XQLMNVCXIKR-UHFFFAOYSA-M' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_smiles', [inchikey]))) @@ -42,7 +44,8 @@ def test_inchikey_to_inchi(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' inchi = 'InChI=1S/Ag.BrHO3/c;2-1(3)4/h;(H,2,3,4)/q+1;/p-1' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey])), inchi) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey]))['inchi'], + inchi) inchikey = 'XQLMNVCXIKR-UHFFFAOYSA-M' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey]))) @@ -51,7 +54,8 @@ def test_inchikey_to_cas(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' cas_number = '7783-89-3' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_cas', [inchikey])), cas_number) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_cas', [inchikey]))['casno'], + cas_number) inchikey = 'XQLMNVCXIKR-UHFFFAOYSA-M' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_cas', [inchikey]))) @@ -60,7 +64,8 @@ def test_inchikey_to_formula(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' formula = 'AgBrO3' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_formula', [inchikey])), formula) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_formula', [inchikey]))['formula'], + formula) inchikey = 'XQLMNVCXIKR-UHFFFAOYSA-M' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_formula', [inchikey]))) @@ -69,7 +74,8 @@ def test_smiles_to_inchikey(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' smiles = '[Ag+].[O-][Br](=O)=O' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'smiles_to_inchikey', [smiles])), inchikey) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'smiles_to_inchikey', [smiles]))['inchikey'], + inchikey) smiles = '[Ag+].O-][Br](=O)=O' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'smiles_to_inchikey', [smiles]))) diff --git a/tests/test_CTS.py b/tests/test_CTS.py index d45b875..02cc485 100644 --- a/tests/test_CTS.py +++ b/tests/test_CTS.py @@ -34,7 +34,8 @@ def test_connect_to_service(self): def test_cas_to_inchikey(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' cas_number = '7783-89-3' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'cas_to_inchikey', [cas_number])), inchikey) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'cas_to_inchikey', [cas_number]))['inchikey'], + inchikey) cas_number = '7783893' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'cas_to_inchikey', [cas_number]))) @@ -43,7 +44,8 @@ def test_inchikey_to_inchi(self): inchikey = 'XQLMNMQWVCXIKR-UHFFFAOYSA-M' inchi = 'InChI=1S/Ag.BrHO3/c;2-1(3)4/h;(H,2,3,4)/q+1;/p-1' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey])), inchi) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey]))['inchi'], + inchi) inchikey = 'XQLMNMQIKR-UHFFFAOYSA-M' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_inchi', [inchikey]))) @@ -51,7 +53,8 @@ def test_inchikey_to_inchi(self): def test_name_to_inchikey(self): name = 'L-Alanine' inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name])), inchikey) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name]))['inchikey'], + inchikey) name = 'L-Alalalalanine' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name]))) @@ -60,16 +63,17 @@ def test_inchikey_to_name(self): inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' name = 'L-2-Aminopropanoic acid' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey])), name) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey]))['name'], name) inchikey = 'XQLMNMQIKR-UHFFFAOYSA-M' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey]))) def test_inchikey_to_IUPAC_name(self): inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' - uipac_name = '(2S)-2-aminopropanoic acid' + iupac_name = '(2S)-2-aminopropanoic acid' asyncio.run(wrap_with_session(self.converter, 'inchikey_to_iupac_name', [inchikey])) - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_iupac_name', [inchikey])), uipac_name) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_iupac_name', + [inchikey]))['iupac_name'], iupac_name) inchikey = 'XQLMNMQIKR-UHFFFAOYSA-M' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_iupac_name', [inchikey]))) diff --git a/tests/test_NLM.py b/tests/test_NLM.py index 67721d5..edf068d 100644 --- a/tests/test_NLM.py +++ b/tests/test_NLM.py @@ -24,7 +24,7 @@ def test_inchikey_to_name(self): inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' name = 'Alanine [USAN:INN]' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey])), name) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey]))['name'], name) inchikey = 'QNAYBMLOXXXXGJ-REOHCLBHSA-N' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchikey_to_name', [inchikey]))) @@ -36,7 +36,8 @@ def test_name_to_inchikey(self): name = 'L-Alanine' inchikey = 'QNAYBMKLOCPYGJ-REOHCLBHSA-N' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name])), inchikey) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name]))['inchikey'], + inchikey) name = 'L-Alanne' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'name_to_inchikey', [name]))) diff --git a/tests/test_annotator.py b/tests/test_annotator.py index ede5696..416fcc3 100644 --- a/tests/test_annotator.py +++ b/tests/test_annotator.py @@ -10,7 +10,7 @@ class TestAnnotator(unittest.TestCase): def test_annotate(self): jobs = [Job(('name', 'inchi', 'PubChem'))] pubchem = mock.AsyncMock() - pubchem.convert = mock.AsyncMock(return_value='a InChi value') + pubchem.convert = mock.AsyncMock(return_value={'inchi': 'a InChi value'}) services = {'PubChem': pubchem} annotator = Annotator(services) diff --git a/tests/test_data/sample.msp b/tests/test_data/sample.msp new file mode 100644 index 0000000..20e8745 --- /dev/null +++ b/tests/test_data/sample.msp @@ -0,0 +1,87 @@ +NAME: Hydrogen +FORMULA: H2 +MW: 2 +CASNO: 1333740 +ID: 1 +COMMENT: NIST MS# 245692, Seq# M1 +NUM PEAKS: 2 +STDINCHI: InChI=1S/H +SMILES: [H] +1.0 20.98 +2.0 999.0 + +NAME: Deuterium +FORMULA: D2 +MW: 4 +CASNO: 7782390 +ID: 2 +COMMENT: NIST MS# 61316, Seq# M2 +NUM PEAKS: 2 +STDINCHI: InChI=1S/H2/h1H/i1+1D +SMILES: [2H][2H] +2.0 14.99 +4.0 999.0 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 3 +COMMENT: Any=100 ; NIST MS# 61313, Seq# M64 +NUM PEAKS: 6 +STDINCHI: InChI=1S/CH4/h1H4 +SMILES: C +12.0 37.97 +13.0 105.9 +14.0 203.82 +15.0 886.2 +16.0 999.0 +17.0 15.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 4 +COMMENT: Any=100 ; NIST MS# 18807, Seq# R26 +NUM PEAKS: 6 +STDINCHI: InChI=1S/CH4/h1H4 +SMILES: C +12.0 25.98 +13.0 85.92 +14.0 170.85 +15.0 855.23 +16.0 999.0 +17.0 10.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 5 +COMMENT: Any=100 ; NIST MS# 18809, Seq# R27 +NUM PEAKS: 6 +STDINCHI: InChI=1S/CH4/h1H4 +SMILES: C +12.0 7.99 +13.0 28.97 +14.0 74.93 +15.0 753.32 +16.0 999.0 +17.0 11.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 6 +COMMENT: Any=100 ; NIST MS# 423924, Seq# R28 +NUM PEAKS: 6 +STDINCHI: InChI=1S/CH4/h1H4 +SMILES: C +12.0 25.98 +13.0 74.93 +14.0 152.86 +15.0 829.25 +16.0 999.0 +17.0 10.99 \ No newline at end of file diff --git a/tests/test_data/sample_out.msp b/tests/test_data/sample_out.msp new file mode 100644 index 0000000..499d8c6 --- /dev/null +++ b/tests/test_data/sample_out.msp @@ -0,0 +1,105 @@ +NAME: Hydrogen +FORMULA: H2 +MW: 2 +CASNO: 1333-74-0 +ID: 1 +COMMENT: NIST MS# 245692, Seq# M1 +STDINCHI: InChI=1S/H +SMILES: [H] +INCHIKEY: UFHFLCQGNIYNRP-UHFFFAOYSA-N +INCHI: InChI=1S/H2/h1H +IUPAC_NAME: molecular hydrogen +NUM PEAKS: 2 +1.0 20.98 +2.0 999.0 + +NAME: Deuterium +FORMULA: D2 +MW: 4 +CASNO: 7782-39-0 +ID: 2 +COMMENT: NIST MS# 61316, Seq# M2 +STDINCHI: InChI=1S/H2/h1H/i1+1D +SMILES: [2H][2H] +INCHIKEY: UFHFLCQGNIYNRP-VVKOMZTBSA-N +INCHI: InChI=1S/H2/h1H/i1+1D +NUM PEAKS: 2 +2.0 14.99 +4.0 999.0 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 3 +COMMENT: Any=100 ; NIST MS# 61313, Seq# M64 +STDINCHI: InChI=1S/CH4/h1H4 +SMILES: C +INCHIKEY: VNWKTOKETHGBQD-UHFFFAOYSA-N +INCHI: InChI=1S/CH4/h1H4 +IUPAC_NAME: methane +NUM PEAKS: 6 +12.0 37.97 +13.0 105.9 +14.0 203.82 +15.0 886.2 +16.0 999.0 +17.0 15.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 4 +COMMENT: Any=100 ; NIST MS# 18807, Seq# R26 +STDINCHI: InChI=1S/CH4/h1H4 +SMILES: C +INCHIKEY: VNWKTOKETHGBQD-UHFFFAOYSA-N +INCHI: InChI=1S/CH4/h1H4 +IUPAC_NAME: methane +NUM PEAKS: 6 +12.0 25.98 +13.0 85.92 +14.0 170.85 +15.0 855.23 +16.0 999.0 +17.0 10.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 5 +COMMENT: Any=100 ; NIST MS# 18809, Seq# R27 +STDINCHI: InChI=1S/CH4/h1H4 +SMILES: C +INCHIKEY: VNWKTOKETHGBQD-UHFFFAOYSA-N +INCHI: InChI=1S/CH4/h1H4 +IUPAC_NAME: methane +NUM PEAKS: 6 +12.0 7.99 +13.0 28.97 +14.0 74.93 +15.0 753.32 +16.0 999.0 +17.0 11.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 6 +COMMENT: Any=100 ; NIST MS# 423924, Seq# R28 +STDINCHI: InChI=1S/CH4/h1H4 +SMILES: C +INCHIKEY: VNWKTOKETHGBQD-UHFFFAOYSA-N +INCHI: InChI=1S/CH4/h1H4 +IUPAC_NAME: methane +NUM PEAKS: 6 +12.0 25.98 +13.0 74.93 +14.0 152.86 +15.0 829.25 +16.0 999.0 +17.0 10.99 + diff --git a/tests/test_pubchem.py b/tests/test_pubchem.py index 92da28e..05a72fd 100644 --- a/tests/test_pubchem.py +++ b/tests/test_pubchem.py @@ -3,6 +3,7 @@ import json from libs.services.PubChem import PubChem +from frozendict import frozendict from tests.utils import wrap_with_session @@ -14,7 +15,7 @@ def test_connect_to_service(self): inchi = 'InChI=1S/C9H10O4/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)' args = "inchi/JSON" response = asyncio.run(wrap_with_session(self.converter, 'query_the_service', - ['PubChem', args, 'POST', {'inchi': inchi}])) + ['PubChem', args, 'POST', frozendict({'inchi': inchi})])) response_json = json.loads(response) self.assertIn('PC_Compounds', response_json) self.assertTrue(len(response_json['PC_Compounds']) == 1) @@ -25,7 +26,8 @@ def test_inchi_to_inchikey(self): inchi = 'InChI=1S/C9H10O4/c10-7-3-1-6(2-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)' inchikey = 'JVGVDSSUAVXRDY-UHFFFAOYSA-N' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_inchikey', [inchi])), inchikey) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_inchikey', [inchi]))['inchikey'], + inchikey) inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)9(12)13/h1-4,8,10-11H,5H2,(H,12,13)' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchi_to_inchikey', [inchi]))) @@ -34,7 +36,7 @@ def test_name_to_inchi(self): name = '3-Methyl-5-[p-fluorophenyl]-2H-1,3-[3H]-oxazine-2,6-dione' inchi = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchi', [name])), inchi) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'name_to_inchi', [name]))['inchi'], inchi) name = 'HYDROXYPHENYLLACTATE M-H' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'name_to_inchi', [name]))) @@ -43,7 +45,8 @@ def test_inchi_to_IUPAC_name(self): inchi = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3' IUPAC_name = '5-(4-fluorophenyl)-3-methyl-1,3-oxazine-2,6-dione' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_iupac_name', [inchi])), IUPAC_name) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_iupac_name', [inchi]))['iupac_name'], + IUPAC_name) inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchi_to_iupac_name', [inchi]))) @@ -52,7 +55,8 @@ def test_inchi_to_formula(self): inchi = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3' formula = 'C11H8FNO3' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_formula', [inchi])), formula) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_formula', [inchi]))['formula'], + formula) inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchi_to_formula', [inchi]))) @@ -61,7 +65,7 @@ def test_inchi_to_smiles(self): inchi = 'InChI=1S/C11H8FNO3/c1-13-6-9(10(14)16-11(13)15)7-2-4-8(12)5-3-7/h2-6H,1H3' smiles = 'CN1C=C(C(=O)OC1=O)C2=CC=C(C=C2)F' - self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_smiles', [inchi])), smiles) + self.assertEqual(asyncio.run(wrap_with_session(self.converter, 'inchi_to_smiles', [inchi]))['smiles'], smiles) inchi = 'InChI=1S/C9H10O4/c102-4-7)5-8(11)93/1-4,8,10-11H,5H2,(H,12,13)' self.assertIsNone(asyncio.run(wrap_with_session(self.converter, 'inchi_to_smiles', [inchi])))