Skip to content

Commit

Permalink
Clustal api (#75)
Browse files Browse the repository at this point in the history
* Refactor alignment notebook to use updated pyeed API and handle API request errorsated example

* updated example

* cleaned

* formatted

* implemented abstract tool with clustalo

* added docstr

* removed deprecated

* removed

* added tofasta

* API update

---------

Co-authored-by: sdRDM Bot <sdRDM@bot.com>
  • Loading branch information
haeussma and sdRDM Bot authored May 11, 2024
1 parent 20a9ead commit a36e97c
Show file tree
Hide file tree
Showing 33 changed files with 12,441 additions and 13,418 deletions.
121 changes: 107 additions & 14 deletions examples/alignment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import json\n",
"\n",
"from pyeed.core import ProteinInfo, Alignment\n",
"from pyeed.align import ClustalOmega"
"from pyeed.core import ProteinRecord\n",
"from pyeed.align.msa import MSA"
]
},
{
Expand All @@ -21,13 +24,13 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cd77c7df525540a6af5b3805541b1d59",
"model_id": "d86f74adb4e945eca8aed1567ea40590",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -38,6 +41,48 @@
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Request to https://www.ebi.ac.uk/ena/browser/api/xml/3144227 failed with status code 404\n",
"</pre>\n"
],
"text/plain": [
"Request to https://www.ebi.ac.uk/ena/browser/api/xml/3144227 failed with status code 404\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Response: &lt;?xml version=\"1.0\" encoding=\"UTF-8\"?&gt;\n",
"&lt;ErrorDetails&gt;\n",
" &lt;timestamp&gt;1715461558378&lt;/timestamp&gt;\n",
" &lt;status&gt;404&lt;/status&gt;\n",
" &lt;error&gt;Not Found&lt;/error&gt;\n",
" &lt;message&gt;3144227 not found.&lt;/message&gt;\n",
" &lt;path&gt;/ena/browser/api/xml/3144227&lt;/path&gt;\n",
"&lt;/ErrorDetails&gt;\n",
"\n",
"</pre>\n"
],
"text/plain": [
"Response: <?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
"<ErrorDetails>\n",
" <timestamp>1715461558378</timestamp>\n",
" <status>404</status>\n",
" <error>Not Found</error>\n",
" <message>3144227 not found.</message>\n",
" <path>/ena/browser/api/xml/3144227</path>\n",
"</ErrorDetails>\n",
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
Expand Down Expand Up @@ -67,14 +112,38 @@
"with open(\"ids.json\", \"r\") as f:\n",
" ids = json.load(f)\n",
"\n",
"sequences = ProteinInfo.get_ids(ids)"
"sequences = ProteinRecord.get_ids(ids)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e2838d7c343140f5856fa627ea1b303a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
Expand All @@ -84,20 +153,44 @@
}
],
"source": [
"alignment = Alignment.from_sequences(sequences=sequences, aligner=ClustalOmega)"
"alignment = MSA(sequences=sequences).clustalo()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Q97CT6\n",
"------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------MERNISVEELNQ-IPTPKKEVEIVERKGIGHPDSVADGIAEAVSRSLSKYYLEHYG--R-ILHHNTDQVEVVGGQSAPKY----------G-GGLV--LEPTYILLSGRATTKVGN---------DRVPYKSITIKAAKDYLRNNFS-HLDVDADVMIDSRIGQ-------G-SVDLVEVYDT--S-K-------------------------------LEANDTSFGVGFAPLSETENIVLKTERYLNGS-----LK--KKLPMVGYDIKVMG-FRQKDTINLTVAAAFVDKYIKDADEYFNLKDQLKDLVLD-NA-VE-ET-DK--------EV---KVYINTAD-----IR------EN-----SKSVGYLTVTGMSMENGDDGSVGRGNRVNGLITPYRAMSMEAAAGKNPVTHVGKLYNVLANKIANDIVQEE--GN--DIAEVLVRIVSQIGRPIDDP-HVASVQVIYEGNV----DHS--KH-KNNIRNLVNDRL--AHVS--D-LTMQFVEGK--ITV--F---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
"data": {
"text/plain": [
"1046"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(alignment.aligned_sequences)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'Sequence' object has no attribute 'source_id'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[5], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# print the first aligned sequence string and the ID\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43malignment\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maligned_sequences\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msource_id\u001b[49m)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(alignment\u001b[38;5;241m.\u001b[39maligned_sequences[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39msequence)\n",
"File \u001b[0;32m~/miniconda3/envs/pye/lib/python3.10/site-packages/pydantic/main.py:811\u001b[0m, in \u001b[0;36mBaseModel.__getattr__\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__getattribute__\u001b[39m(item) \u001b[38;5;66;03m# Raises AttributeError if appropriate\u001b[39;00m\n\u001b[1;32m 809\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 810\u001b[0m \u001b[38;5;66;03m# this is the current error\u001b[39;00m\n\u001b[0;32m--> 811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[0;31mAttributeError\u001b[0m: 'Sequence' object has no attribute 'source_id'"
]
}
],
Expand Down
Loading

0 comments on commit a36e97c

Please sign in to comment.