Skip to content

Commit

Permalink
Plannotate (#224)
Browse files Browse the repository at this point in the history
* plannotate working version

* closes #223

* fix test
  • Loading branch information
manulera authored Nov 26, 2024
1 parent 80ab2c1 commit f8e3c82
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 8 deletions.
26 changes: 26 additions & 0 deletions dna_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pydna.dseqrecord import Dseqrecord
from pydna.dseq import Dseq
from pydantic_models import TextFileSequence, AddGeneIdSource, SequenceFileFormat
from shareyourcloning_linkml.datamodel import PlannotateAnnotationReport
from pydna.parsers import parse as pydna_parse
import requests
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -315,3 +316,28 @@ async def get_sequence_from_euroscarf_url(plasmid_id: str) -> Dseqrecord:
raise HTTPError(url, 503, msg, msg, None)
genbank_url = f'http://www.euroscarf.de/{subpath.get("href")}'
return (await get_sequences_from_gb_file_url(genbank_url))[0]


async def annotate_with_plannotate(
file_content: str, file_name: str, url: str
) -> tuple[Dseqrecord, PlannotateAnnotationReport, str]:
async with httpx.AsyncClient() as client:
try:
response = await client.post(
url,
files={'file': (file_name, file_content, 'text/plain')},
timeout=20,
)
if response.status_code != 200:
detail = response.json().get('detail', 'plannotate server error')
raise HTTPError(url, response.status_code, detail, detail, None)
data = response.json()
dseqr = custom_file_parser(io.StringIO(data['gb_file']), 'genbank')[0]
report = [PlannotateAnnotationReport.model_validate(r) for r in data['report']]
return dseqr, report, data['version']
except httpx.TimeoutException as e:
raise HTTPError(url, 504, 'plannotate server timeout', 'plannotate server timeout', None) from e
except httpx.ConnectError as e:
raise HTTPError(
url, 500, 'cannot connect to plannotate server', 'cannot connect to plannotate server', None
) from e
39 changes: 39 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
get_sequence_from_snagene_url,
custom_file_parser,
get_sequence_from_euroscarf_url,
annotate_with_plannotate as _annotate_with_plannotate,
)
from pydantic_models import (
PCRSource,
Expand Down Expand Up @@ -47,6 +48,7 @@
EuroscarfSource,
OverlapExtensionPCRLigationSource,
GatewaySource,
AnnotationSource,
)
from fastapi.middleware.cors import CORSMiddleware
from Bio.Restriction.Restriction import RestrictionBatch
Expand Down Expand Up @@ -82,6 +84,11 @@
# ENV variables ========================================
RECORD_STUBS = os.environ['RECORD_STUBS'] == '1' if 'RECORD_STUBS' in os.environ else False
SERVE_FRONTEND = os.environ['SERVE_FRONTEND'] == '1' if 'SERVE_FRONTEND' in os.environ else False
PLANNOTATE_URL = os.environ['PLANNOTATE_URL'] if 'PLANNOTATE_URL' in os.environ else None

# Handle trailing slash:
if PLANNOTATE_URL is not None and not PLANNOTATE_URL.endswith('/'):
PLANNOTATE_URL += '/'

origins = []
if os.environ.get('ALLOWED_ORIGINS') is not None:
Expand Down Expand Up @@ -1334,6 +1341,38 @@ async def primer_design_simple_pair(
return {'primers': [fwd, rvs]}


if PLANNOTATE_URL is not None:

@router.post(
'/annotate/plannotate',
summary='Annotate a sequence with Plannotate',
response_model=create_model(
'PlannotateResponse',
sources=(list[AnnotationSource], ...),
sequences=(list[TextFileSequence], ...),
),
)
async def annotate_with_plannotate(
sequence: TextFileSequence,
source: AnnotationSource,
):
input_seqr = read_dsrecord_from_json(sequence)
# Make a request submitting sequence as a file:
try:
seqr, annotations, version = await _annotate_with_plannotate(
sequence.file_content, f'{sequence.id}.gb', PLANNOTATE_URL + 'annotate'
)
except HTTPError as e:
raise HTTPException(e.code, e.msg) from e

source.annotation_report = annotations
source.annotation_tool = 'plannotate'
source.annotation_tool_version = version
seqr.name = input_seqr.name + '_annotated'

return {'sources': [source], 'sequences': [format_sequence_genbank(seqr, source.output_name)]}


@router.post(
'/validate',
summary='Validate a cloning strategy',
Expand Down
22 changes: 18 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
EuroscarfSource as _EuroscarfSource,
GatewaySource as _GatewaySource,
InFusionSource as _InFusionSource,
AnnotationSource as _AnnotationSource,
)
from pydna.utils import shift_location as _shift_location
from assembly2 import edge_representation2subfragment_representation, subfragment_representation2edge_representation
Expand Down Expand Up @@ -131,6 +132,10 @@ class GenomeCoordinatesSource(SourceCommonClass, _GenomeCoordinatesSource):
pass


class AnnotationSource(SourceCommonClass, _AnnotationSource):
pass


class RestrictionSequenceCut(_RestrictionSequenceCut):

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pydna = {git = "https://github.com/BjornFJohansson/pydna", rev = "9d112d71534194
requests = "^2.31.0"
regex = "^2023.10.3"
pydantic = "^2.7.1"
shareyourcloning-linkml = "0.1.9a0"
shareyourcloning-linkml = "0.1.10a0"
pandas = "^2.2.3"
openpyxl = "^3.1.5"

Expand All @@ -33,6 +33,7 @@ pytest = "^7.4.3"
pre-commit = "^3.6.2"
pytest-cov = "^4.1.0"
pytest-rerunfailures = "^14.0"
respx = "^0.21.1"


[tool.poetry.group.ipython.dependencies]
Expand Down
70 changes: 67 additions & 3 deletions test_endpoints.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dna_functions import format_sequence_genbank, read_dsrecord_from_json
from main import app
from dna_functions import format_sequence_genbank, read_dsrecord_from_json, annotate_with_plannotate
import main as _main
from fastapi.testclient import TestClient
from pydna.parsers import parse as pydna_parse
from Bio.Restriction.Restriction import CommOnly
Expand Down Expand Up @@ -28,6 +28,7 @@
EuroscarfSource,
SnapGenePlasmidSource,
GatewaySource,
AnnotationSource,
)
from pydna.dseqrecord import Dseqrecord
import unittest
Expand All @@ -39,6 +40,10 @@
import pytest
from Bio.Seq import reverse_complement
import os
from importlib import reload
import respx
import httpx
from urllib.error import HTTPError


def get_all_feature_labels(seq: Dseqrecord):
Expand All @@ -60,7 +65,7 @@ def wrapper(*args, **kwargs):
return decorator


client = TestClient(app)
client = TestClient(_main.app)


class VersionTest(unittest.TestCase):
Expand Down Expand Up @@ -2629,5 +2634,64 @@ def test_single_input(self):
self.assertEqual(str(seqs[0].seq), product)


class PlannotateTest(unittest.TestCase):
def setUp(self):
# Has to be imported here to get the right environment variable
pytest.MonkeyPatch().setenv('PLANNOTATE_URL', 'http://dummy/url')

reload(_main)
self.client = TestClient(_main.app)

def tearDown(self):
pytest.MonkeyPatch().setenv('PLANNOTATE_URL', '')
reload(_main)

@respx.mock
def test_plannotate(self):
seq = Dseqrecord(
'AAAAttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctatggaaaAAAA'
)
seq = format_sequence_genbank(seq)
mock_response_success = json.load(open('test_files/planottate/mock_response_success.json'))
# Mock the HTTPX GET request
respx.post('http://dummy/url/annotate').respond(200, json=mock_response_success)

source = AnnotationSource(id=0, annotation_tool='plannotate')
response = self.client.post(
'/annotate/plannotate', json={'sequence': seq.model_dump(), 'source': source.model_dump()}
)
self.assertEqual(response.status_code, 200)
payload = response.json()
seq = read_dsrecord_from_json(TextFileSequence.model_validate(payload['sequences'][0]))
source = payload['sources'][0]
self.assertEqual(source['annotation_tool'], 'plannotate')
self.assertEqual(source['annotation_tool_version'], '1.2.2')
self.assertEqual(len(source['annotation_report']), 2)
feature_names = [f.qualifiers['label'][0] for f in seq.features]
self.assertIn('ori', feature_names)
self.assertIn('RNAI', feature_names)

@respx.mock
def test_plannotate_down(self):
respx.post('http://dummy/url/annotate').mock(side_effect=httpx.ConnectError('Connection error'))
seq = Dseqrecord('aaa')
seq = format_sequence_genbank(seq)
source = AnnotationSource(id=0, annotation_tool='plannotate')
response = self.client.post(
'/annotate/plannotate', json={'sequence': seq.model_dump(), 'source': source.model_dump()}
)
self.assertEqual(response.status_code, 500)

@respx.mock
async def test_plannotate_other_error(self):
# This is tested here because it's impossible to send a malformed request from the backend
respx.post('http://dummy/url/annotate').respond(400, json={'error': 'bad request'})

with pytest.raises(HTTPError) as e:
await annotate_with_plannotate('hello', 'hello.blah', 'http://dummy/url/annotate')

self.assertEqual(e.code, 400)


if __name__ == '__main__':
unittest.main()
2 changes: 2 additions & 0 deletions test_files/planottate/input.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
> blah
AAAAttgagatcctttttttctgcgcgtaatctgctgcttgcaaacaaaaaaaccaccgctaccagcggtggtttgtttgccggatcaagagctaccaactctttttccgaaggtaactggcttcagcagagcgcagataccaaatactgttcttctagtgtagccgtagttaggccaccacttcaagaactctgtagcaccgcctacatacctcgctctgctaatcctgttaccagtggctgctgccagtggcgataagtcgtgtcttaccgggttggactcaagacgatagttaccggataaggcgcagcggtcgggctgaacggggggttcgtgcacacagcccagcttggagcgaacgacctacaccgaactgagatacctacagcgtgagctatgagaaagcgccacgcttcccgaagggagaaaggcggacaggtatccggtaagcggcagggtcggaacaggagagcgcacgagggagcttccagggggaaacgcctggtatctttatagtcctgtcgggtttcgccacctctgacttgagcgtcgatttttgtgatgctcgtcaggggggcggagcctatggaaaAAAA
38 changes: 38 additions & 0 deletions test_files/planottate/mock_response_success.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"version": "1.2.2",
"gb_file": "LOCUS plasmid 597 bp DNA linear SYN 26-NOV-2024\nDEFINITION .\nACCESSION .\nVERSION .\nKEYWORDS .\nSOURCE .\n ORGANISM .\n .\nCOMMENT Annotated with pLannotate v1.2.2\nFEATURES Location/Qualifiers\n rep_origin 5..593\n /note=\"pLannotate\"\n /label=\"ori\"\n /database=\"snapgene\"\n /identity=\"99.8\"\n /match_length=\"100.0\"\n /fragment=\"False\"\n /other=\"rep_origin\"\n ncRNA complement(44..148)\n /note=\"pLannotate\"\n /label=\"RNAI\"\n /database=\"Rfam\"\n /identity=\"100.0\"\n /match_length=\"102.9\"\n /fragment=\"False\"\n /other=\"ncRNA\"\nORIGIN\n 1 aaaattgaga tccttttttt ctgcgcgtaa tctgctgctt gcaaacaaaa aaaccaccgc\n 61 taccagcggt ggtttgtttg ccggatcaag agctaccaac tctttttccg aaggtaactg\n 121 gcttcagcag agcgcagata ccaaatactg ttcttctagt gtagccgtag ttaggccacc\n 181 acttcaagaa ctctgtagca ccgcctacat acctcgctct gctaatcctg ttaccagtgg\n 241 ctgctgccag tggcgataag tcgtgtctta ccgggttgga ctcaagacga tagttaccgg\n 301 ataaggcgca gcggtcgggc tgaacggggg gttcgtgcac acagcccagc ttggagcgaa\n 361 cgacctacac cgaactgaga tacctacagc gtgagctatg agaaagcgcc acgcttcccg\n 421 aagggagaaa ggcggacagg tatccggtaa gcggcagggt cggaacagga gagcgcacga\n 481 gggagcttcc agggggaaac gcctggtatc tttatagtcc tgtcgggttt cgccacctct\n 541 gacttgagcg tcgatttttg tgatgctcgt caggggggcg gagcctatgg aaaaaaa\n//\n",
"report": [
{
"sseqid": "ori",
"start_location": 4,
"end_location": 593,
"strand": 1,
"percent_identity": 99.83,
"full_length_of_feature_in_db": 589,
"length_of_found_feature": 589,
"percent_match_length": 100,
"fragment": false,
"database": "snapgene",
"Feature": "ori",
"Type": "rep_origin",
"Description": "high-copy-number ColE1/pMB1/pBR322/pUC origin of replication ",
"sequence": "TTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAA"
},
{
"sseqid": "1",
"start_location": 43,
"end_location": 148,
"strand": -1,
"percent_identity": 100,
"full_length_of_feature_in_db": 102,
"length_of_found_feature": 105,
"percent_match_length": 97.05882352941177,
"fragment": false,
"database": "Rfam",
"Feature": "RNAI",
"Type": "ncRNA",
"Description": "Accession: RF00106 - RNAI",
"sequence": "AGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGT"
}
]
}

0 comments on commit f8e3c82

Please sign in to comment.