From 388e7da1627a81f248d74638331436fb21be5d4d Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Tue, 26 Nov 2024 17:39:05 +0000 Subject: [PATCH 1/3] plannotate working version --- dna_functions.py | 23 +++++++ main.py | 41 ++++++++++++ poetry.lock | 22 +++++-- pydantic_models.py | 5 ++ pyproject.toml | 3 +- test_endpoints.py | 63 ++++++++++++++++++- test_files/planottate/input.fasta | 2 + .../planottate/mock_response_success.json | 38 +++++++++++ 8 files changed, 189 insertions(+), 8 deletions(-) create mode 100644 test_files/planottate/input.fasta create mode 100644 test_files/planottate/mock_response_success.json diff --git a/dna_functions.py b/dna_functions.py index 83d8c92..a9b15c9 100644 --- a/dna_functions.py +++ b/dna_functions.py @@ -5,6 +5,7 @@ from pydna.dseqrecord import Dseqrecord from pydna.dseq import Dseq from pydantic_models import TextFileSequence, AddGeneIdSource, SequenceFileFormat +from shareyourcloning_linkml.datamodel import PlannotateAnnotationReport from pydna.parsers import parse as pydna_parse import requests from bs4 import BeautifulSoup @@ -315,3 +316,25 @@ async def get_sequence_from_euroscarf_url(plasmid_id: str) -> Dseqrecord: raise HTTPError(url, 503, msg, msg, None) genbank_url = f'http://www.euroscarf.de/{subpath.get("href")}' return (await get_sequences_from_gb_file_url(genbank_url))[0] + + +async def annotate_with_plannotate( + file_content: str, file_name: str, url: str +) -> tuple[Dseqrecord, PlannotateAnnotationReport, str]: + async with httpx.AsyncClient() as client: + try: + print('aaaaaaaaaaaaaa') + response = await client.post( + url, + files={'file': (file_name, file_content, 'text/plain')}, + ) + if response.status_code != 200: + raise HTTPError(url, response.status_code, 'plannotate server error', 'plannotate server error', None) + data = response.json() + dseqr = custom_file_parser(io.StringIO(data['gb_file']), 'genbank')[0] + report = [PlannotateAnnotationReport.model_validate(r) for r in data['report']] + return dseqr, report, data['version'] + except httpx.ConnectError as e: + raise HTTPError( + url, 500, 'cannot connect to plannotate server', 'cannot connect to plannotate server', None + ) from e diff --git a/main.py b/main.py index f8dc32f..1205ed1 100644 --- a/main.py +++ b/main.py @@ -19,6 +19,7 @@ get_sequence_from_snagene_url, custom_file_parser, get_sequence_from_euroscarf_url, + annotate_with_plannotate as _annotate_with_plannotate, ) from pydantic_models import ( PCRSource, @@ -47,6 +48,7 @@ EuroscarfSource, OverlapExtensionPCRLigationSource, GatewaySource, + AnnotationSource, ) from fastapi.middleware.cors import CORSMiddleware from Bio.Restriction.Restriction import RestrictionBatch @@ -82,6 +84,11 @@ # ENV variables ======================================== RECORD_STUBS = os.environ['RECORD_STUBS'] == '1' if 'RECORD_STUBS' in os.environ else False SERVE_FRONTEND = os.environ['SERVE_FRONTEND'] == '1' if 'SERVE_FRONTEND' in os.environ else False +PLANNOTATE_URL = os.environ['PLANNOTATE_URL'] if 'PLANNOTATE_URL' in os.environ else None + +# Handle trailing slash: +if PLANNOTATE_URL is not None and not PLANNOTATE_URL.endswith('/'): + PLANNOTATE_URL += '/' origins = [] if os.environ.get('ALLOWED_ORIGINS') is not None: @@ -1334,6 +1341,40 @@ async def primer_design_simple_pair( return {'primers': [fwd, rvs]} +if PLANNOTATE_URL is not None: + + @router.post( + '/annotate/plannotate', + summary='Annotate a sequence with Plannotate', + response_model=create_model( + 'PlannotateResponse', + sources=(list[AnnotationSource], ...), + sequences=(list[TextFileSequence], ...), + ), + ) + async def annotate_with_plannotate( + sequence: TextFileSequence, + ): + input_seqr = read_dsrecord_from_json(sequence) + # Make a request submitting sequence as a file: + try: + seqr, annotations, version = await _annotate_with_plannotate( + sequence.file_content, f'{sequence.id}.gb', PLANNOTATE_URL + 'annotate' + ) + except HTTPError as e: + raise HTTPException(e.code, *e.args) from e + + source = AnnotationSource( + id=0, + annotation_report=annotations, + annotation_tool='plannotate', + annotation_tool_version=version, + ) + seqr.name = input_seqr.name + '_annotated' + + return {'sources': [source], 'sequences': [format_sequence_genbank(seqr, source.output_name)]} + + @router.post( '/validate', summary='Validate a cloning strategy', diff --git a/poetry.lock b/poetry.lock index b286b0d..1adba1e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2174,6 +2174,20 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "respx" +version = "0.21.1" +description = "A utility for mocking out the Python HTTPX and HTTP Core libraries." +optional = false +python-versions = ">=3.7" +files = [ + {file = "respx-0.21.1-py2.py3-none-any.whl", hash = "sha256:05f45de23f0c785862a2c92a3e173916e8ca88e4caad715dd5f68584d6053c20"}, + {file = "respx-0.21.1.tar.gz", hash = "sha256:0bd7fe21bfaa52106caa1223ce61224cf30786985f17c63c5d71eff0307ee8af"}, +] + +[package.dependencies] +httpx = ">=0.21.0" + [[package]] name = "rich" version = "13.7.1" @@ -2224,13 +2238,13 @@ testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jar [[package]] name = "shareyourcloning-linkml" -version = "0.1.9a0" +version = "0.1.10a0" description = "A LinkML data model for ShareYourCloning" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "shareyourcloning_linkml-0.1.9a0-py3-none-any.whl", hash = "sha256:f8c10c57c7cbc21128df4e436dd1d1a42ec8a0a85087b3a258d3723b4e654b45"}, - {file = "shareyourcloning_linkml-0.1.9a0.tar.gz", hash = "sha256:cbd0b8560491306a3c988f0da9b354257694e4b8452f86adf396f2a897386a70"}, + {file = "shareyourcloning_linkml-0.1.10a0-py3-none-any.whl", hash = "sha256:1b39b00195651566ee252dea8a12587bbae31b4c65a30f591ebdd8fe41b36f0c"}, + {file = "shareyourcloning_linkml-0.1.10a0.tar.gz", hash = "sha256:0a3448a4b50bd46909f2b672568368f356b96aca1c5b760640796ffb18b8b1fb"}, ] [package.dependencies] @@ -2703,4 +2717,4 @@ test = ["pytest (>=6.0.0)", "setuptools (>=65)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "aede493acac225de8818047139d33d280360dc5af0ad25b3de5cdf718f884a57" +content-hash = "c88f08376988b48f9118feefe2f3a884b272e1ada60a2355ff39cc68232a7ae9" diff --git a/pydantic_models.py b/pydantic_models.py index 6fa423c..82d12ba 100644 --- a/pydantic_models.py +++ b/pydantic_models.py @@ -39,6 +39,7 @@ EuroscarfSource as _EuroscarfSource, GatewaySource as _GatewaySource, InFusionSource as _InFusionSource, + AnnotationSource as _AnnotationSource, ) from pydna.utils import shift_location as _shift_location from assembly2 import edge_representation2subfragment_representation, subfragment_representation2edge_representation @@ -131,6 +132,10 @@ class GenomeCoordinatesSource(SourceCommonClass, _GenomeCoordinatesSource): pass +class AnnotationSource(SourceCommonClass, _AnnotationSource): + pass + + class RestrictionSequenceCut(_RestrictionSequenceCut): @classmethod diff --git a/pyproject.toml b/pyproject.toml index 11ffd4d..b91aa0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ pydna = {git = "https://github.com/BjornFJohansson/pydna", rev = "9d112d71534194 requests = "^2.31.0" regex = "^2023.10.3" pydantic = "^2.7.1" -shareyourcloning-linkml = "0.1.9a0" +shareyourcloning-linkml = "0.1.10a0" pandas = "^2.2.3" openpyxl = "^3.1.5" @@ -33,6 +33,7 @@ pytest = "^7.4.3" pre-commit = "^3.6.2" pytest-cov = "^4.1.0" pytest-rerunfailures = "^14.0" +respx = "^0.21.1" [tool.poetry.group.ipython.dependencies] diff --git a/test_endpoints.py b/test_endpoints.py index 3bc868a..ee83adb 100644 --- a/test_endpoints.py +++ b/test_endpoints.py @@ -1,5 +1,5 @@ -from dna_functions import format_sequence_genbank, read_dsrecord_from_json -from main import app +from dna_functions import format_sequence_genbank, read_dsrecord_from_json, annotate_with_plannotate +import main as _main from fastapi.testclient import TestClient from pydna.parsers import parse as pydna_parse from Bio.Restriction.Restriction import CommOnly @@ -39,6 +39,10 @@ import pytest from Bio.Seq import reverse_complement import os +from importlib import reload +import respx +import httpx +from urllib.error import HTTPError def get_all_feature_labels(seq: Dseqrecord): @@ -60,7 +64,7 @@ def wrapper(*args, **kwargs): return decorator -client = TestClient(app) +client = TestClient(_main.app) class VersionTest(unittest.TestCase): @@ -2629,5 +2633,58 @@ def test_single_input(self): self.assertEqual(str(seqs[0].seq), product) +class PlannotateTest(unittest.TestCase): + def setUp(self): + # Has to be imported here to get the right environment variable + pytest.MonkeyPatch().setenv('PLANNOTATE_URL', 'http://dummy/url') + + reload(_main) + self.client = TestClient(_main.app) + + def tearDown(self): + pytest.MonkeyPatch().setenv('PLANNOTATE_URL', '') + reload(_main) + + @respx.mock + def test_plannotate(self): + seq = Dseqrecord( + 'AAAAttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctatggaaaAAAA' + ) + seq = format_sequence_genbank(seq) + mock_response_success = json.load(open('test_files/planottate/mock_response_success.json')) + # Mock the HTTPX GET request + respx.post('http://dummy/url/annotate').respond(200, json=mock_response_success) + + response = self.client.post('/annotate/plannotate', json=seq.model_dump()) + self.assertEqual(response.status_code, 200) + payload = response.json() + seq = read_dsrecord_from_json(TextFileSequence.model_validate(payload['sequences'][0])) + source = payload['sources'][0] + self.assertEqual(source['annotation_tool'], 'plannotate') + self.assertEqual(source['annotation_tool_version'], '1.2.2') + self.assertEqual(len(source['annotation_report']), 2) + feature_names = [f.qualifiers['label'][0] for f in seq.features] + self.assertIn('ori', feature_names) + self.assertIn('RNAI', feature_names) + + @respx.mock + def test_plannotate_down(self): + respx.post('http://dummy/url/annotate').mock(side_effect=httpx.ConnectError('Connection error')) + seq = Dseqrecord('aaa') + seq = format_sequence_genbank(seq) + response = self.client.post('/annotate/plannotate', json=seq.model_dump()) + self.assertEqual(response.status_code, 500) + + @respx.mock + async def test_plannotate_other_error(self): + # This is tested here because it's impossible to send a malformed request from the backend + respx.post('http://dummy/url/annotate').respond(400, json={'error': 'bad request'}) + + with pytest.raises(HTTPError) as e: + await annotate_with_plannotate('hello', 'hello.blah', 'http://dummy/url/annotate') + + self.assertEqual(e.code, 400) + + if __name__ == '__main__': unittest.main() diff --git a/test_files/planottate/input.fasta b/test_files/planottate/input.fasta new file mode 100644 index 0000000..d3a604c --- /dev/null +++ b/test_files/planottate/input.fasta @@ -0,0 +1,2 @@ +> blah +AAAAttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctatggaaaAAAA diff --git a/test_files/planottate/mock_response_success.json b/test_files/planottate/mock_response_success.json new file mode 100644 index 0000000..1ea646a --- /dev/null +++ b/test_files/planottate/mock_response_success.json @@ -0,0 +1,38 @@ +{ + "version": "1.2.2", + "gb_file": "LOCUS plasmid 597 bp DNA linear SYN 26-NOV-2024\nDEFINITION .\nACCESSION .\nVERSION .\nKEYWORDS .\nSOURCE .\n ORGANISM .\n .\nCOMMENT Annotated with pLannotate v1.2.2\nFEATURES Location/Qualifiers\n rep_origin 5..593\n /note=\"pLannotate\"\n /label=\"ori\"\n /database=\"snapgene\"\n /identity=\"99.8\"\n /match_length=\"100.0\"\n /fragment=\"False\"\n /other=\"rep_origin\"\n ncRNA complement(44..148)\n /note=\"pLannotate\"\n /label=\"RNAI\"\n /database=\"Rfam\"\n /identity=\"100.0\"\n /match_length=\"102.9\"\n /fragment=\"False\"\n /other=\"ncRNA\"\nORIGIN\n 1 aaaattgaga tccttttttt ctgcgcgtaa tctgctgctt gcaaacaaaa aaaccaccgc\n 61 taccagcggt ggtttgtttg ccggatcaag agctaccaac tctttttccg aaggtaactg\n 121 gcttcagcag agcgcagata ccaaatactg ttcttctagt gtagccgtag ttaggccacc\n 181 acttcaagaa ctctgtagca ccgcctacat acctcgctct gctaatcctg ttaccagtgg\n 241 ctgctgccag tggcgataag tcgtgtctta ccgggttgga ctcaagacga tagttaccgg\n 301 ataaggcgca gcggtcgggc tgaacggggg gttcgtgcac acagcccagc ttggagcgaa\n 361 cgacctacac cgaactgaga tacctacagc gtgagctatg agaaagcgcc acgcttcccg\n 421 aagggagaaa ggcggacagg tatccggtaa gcggcagggt cggaacagga gagcgcacga\n 481 gggagcttcc agggggaaac gcctggtatc tttatagtcc tgtcgggttt cgccacctct\n 541 gacttgagcg tcgatttttg tgatgctcgt caggggggcg gagcctatgg aaaaaaa\n//\n", + "report": [ + { + "sseqid": "ori", + "start_location": 4, + "end_location": 593, + "strand": 1, + "percent_identity": 99.83, + "full_length_of_feature_in_db": 589, + "length_of_found_feature": 589, + "percent_match_length": 100, + "fragment": false, + "database": "snapgene", + "Feature": "ori", + "Type": "rep_origin", + "Description": "high-copy-number ColE1/pMB1/pBR322/pUC origin of replication ", + "sequence": "TTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAA" + }, + { + "sseqid": "1", + "start_location": 43, + "end_location": 148, + "strand": -1, + "percent_identity": 100, + "full_length_of_feature_in_db": 102, + "length_of_found_feature": 105, + "percent_match_length": 97.05882352941177, + "fragment": false, + "database": "Rfam", + "Feature": "RNAI", + "Type": "ncRNA", + "Description": "Accession: RF00106 - RNAI", + "sequence": "AGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGT" + } + ] +} From d65e95673d56ebf11df2748625fdb6efa5b4c093 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Tue, 26 Nov 2024 18:35:56 +0000 Subject: [PATCH 2/3] closes #223 --- dna_functions.py | 7 +++++-- main.py | 12 +++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/dna_functions.py b/dna_functions.py index a9b15c9..28de145 100644 --- a/dna_functions.py +++ b/dna_functions.py @@ -323,17 +323,20 @@ async def annotate_with_plannotate( ) -> tuple[Dseqrecord, PlannotateAnnotationReport, str]: async with httpx.AsyncClient() as client: try: - print('aaaaaaaaaaaaaa') response = await client.post( url, files={'file': (file_name, file_content, 'text/plain')}, + timeout=20, ) if response.status_code != 200: - raise HTTPError(url, response.status_code, 'plannotate server error', 'plannotate server error', None) + detail = response.json().get('detail', 'plannotate server error') + raise HTTPError(url, response.status_code, detail, detail, None) data = response.json() dseqr = custom_file_parser(io.StringIO(data['gb_file']), 'genbank')[0] report = [PlannotateAnnotationReport.model_validate(r) for r in data['report']] return dseqr, report, data['version'] + except httpx.TimeoutException as e: + raise HTTPError(url, 504, 'plannotate server timeout', 'plannotate server timeout', None) from e except httpx.ConnectError as e: raise HTTPError( url, 500, 'cannot connect to plannotate server', 'cannot connect to plannotate server', None diff --git a/main.py b/main.py index 1205ed1..0353bc6 100644 --- a/main.py +++ b/main.py @@ -1354,6 +1354,7 @@ async def primer_design_simple_pair( ) async def annotate_with_plannotate( sequence: TextFileSequence, + source: AnnotationSource, ): input_seqr = read_dsrecord_from_json(sequence) # Make a request submitting sequence as a file: @@ -1362,14 +1363,11 @@ async def annotate_with_plannotate( sequence.file_content, f'{sequence.id}.gb', PLANNOTATE_URL + 'annotate' ) except HTTPError as e: - raise HTTPException(e.code, *e.args) from e + raise HTTPException(e.code, e.msg) from e - source = AnnotationSource( - id=0, - annotation_report=annotations, - annotation_tool='plannotate', - annotation_tool_version=version, - ) + source.annotation_report = annotations + source.annotation_tool = 'plannotate' + source.annotation_tool_version = version seqr.name = input_seqr.name + '_annotated' return {'sources': [source], 'sequences': [format_sequence_genbank(seqr, source.output_name)]} From e26273c42364ebbd3a4f0aefb0480eb3adf86e2d Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Tue, 26 Nov 2024 18:41:24 +0000 Subject: [PATCH 3/3] fix test --- test_endpoints.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test_endpoints.py b/test_endpoints.py index ee83adb..8b7c2b6 100644 --- a/test_endpoints.py +++ b/test_endpoints.py @@ -28,6 +28,7 @@ EuroscarfSource, SnapGenePlasmidSource, GatewaySource, + AnnotationSource, ) from pydna.dseqrecord import Dseqrecord import unittest @@ -2655,7 +2656,10 @@ def test_plannotate(self): # Mock the HTTPX GET request respx.post('http://dummy/url/annotate').respond(200, json=mock_response_success) - response = self.client.post('/annotate/plannotate', json=seq.model_dump()) + source = AnnotationSource(id=0, annotation_tool='plannotate') + response = self.client.post( + '/annotate/plannotate', json={'sequence': seq.model_dump(), 'source': source.model_dump()} + ) self.assertEqual(response.status_code, 200) payload = response.json() seq = read_dsrecord_from_json(TextFileSequence.model_validate(payload['sequences'][0])) @@ -2672,7 +2676,10 @@ def test_plannotate_down(self): respx.post('http://dummy/url/annotate').mock(side_effect=httpx.ConnectError('Connection error')) seq = Dseqrecord('aaa') seq = format_sequence_genbank(seq) - response = self.client.post('/annotate/plannotate', json=seq.model_dump()) + source = AnnotationSource(id=0, annotation_tool='plannotate') + response = self.client.post( + '/annotate/plannotate', json={'sequence': seq.model_dump(), 'source': source.model_dump()} + ) self.assertEqual(response.status_code, 500) @respx.mock