diff --git a/graph-network/graph-network/populate.ipynb b/graph-network/graph-network/populate.ipynb index 1f42f095..49d8066f 100644 --- a/graph-network/graph-network/populate.ipynb +++ b/graph-network/graph-network/populate.ipynb @@ -2,10 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": 24, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "%reload_ext autoreload\n", + "%autoreload 2\n", "import json\n", "from pathlib import Path\n", "from rdflib import Graph\n", @@ -14,6 +16,26 @@ "from pyeed.core import ProteinRecord" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# get pyeed logger to log to file\n", + "import logging\n", + "\n", + "logging.getLogger(\"pyeed\").setLevel(logging.DEBUG)\n", + "# logger to log to file\n", + "logger = logging.getLogger(\"pyeed\")\n", + "logger.setLevel(logging.DEBUG)\n", + "# create file handler which logs even debug messages\n", + "fh = logging.FileHandler(\"pyeed.log\")\n", + "fh.setLevel(logging.ERROR)\n", + "# connect logger with file handler\n", + "logger.addHandler(fh)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -29,7 +51,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1d6eff9b03b84ff99d3d478709c896ce", + "model_id": "ac245aa1b6714381bf5db13c78080c0d", "version_major": 2, "version_minor": 0 }, @@ -62,6 +84,37 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "ename": "AttributeError", + "evalue": "'Region' object has no attribute '_repo'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m../../docs/examples/ids.json\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 2\u001b[0m ids \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mload(f)\n\u001b[0;32m----> 4\u001b[0m seqs \u001b[38;5;241m=\u001b[39m \u001b[43mProteinRecord\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_ids\u001b[49m\u001b[43m(\u001b[49m\u001b[43mids\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/GitHub/pyeed/pyeed/core/proteinrecord.py:165\u001b[0m, in \u001b[0;36mProteinRecord.get_ids\u001b[0;34m(cls, accession_ids)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnest_asyncio\u001b[39;00m\n\u001b[1;32m 163\u001b[0m nest_asyncio\u001b[38;5;241m.\u001b[39mapply()\n\u001b[0;32m--> 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43masyncio\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43mProteinFetcher\u001b[49m\u001b[43m(\u001b[49m\u001b[43mids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maccession_ids\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfetch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mforce_terminal\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/nest_asyncio.py:30\u001b[0m, in \u001b[0;36m_patch_asyncio..run\u001b[0;34m(main, debug)\u001b[0m\n\u001b[1;32m 28\u001b[0m task \u001b[38;5;241m=\u001b[39m asyncio\u001b[38;5;241m.\u001b[39mensure_future(main)\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 30\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mloop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_until_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m task\u001b[38;5;241m.\u001b[39mdone():\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/nest_asyncio.py:98\u001b[0m, in \u001b[0;36m_patch_loop..run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m f\u001b[38;5;241m.\u001b[39mdone():\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEvent loop stopped before Future completed.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/asyncio/futures.py:203\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__log_traceback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 203\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\u001b[38;5;241m.\u001b[39mwith_traceback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception_tb)\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_result\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/asyncio/tasks.py:267\u001b[0m, in \u001b[0;36mTask.__step\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 264\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 265\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 266\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 267\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39msend(\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 269\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39mthrow(exc)\n", + "File \u001b[0;32m~/Documents/GitHub/pyeed/pyeed/fetch/proteinfetcher.py:185\u001b[0m, in \u001b[0;36mProteinFetcher.fetch\u001b[0;34m(self, **console_kwargs)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;66;03m# map data to objects\u001b[39;00m\n\u001b[1;32m 183\u001b[0m ncbi_responses, uniprot_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39midentify_data_source(responses)\n\u001b[0;32m--> 185\u001b[0m ncbi_entries \u001b[38;5;241m=\u001b[39m \u001b[43mNCBIProteinMapper\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[43mncbi_responses\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 187\u001b[0m uniprot_entries \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 188\u001b[0m UniprotMapper()\u001b[38;5;241m.\u001b[39mmap(\u001b[38;5;241m*\u001b[39mresp) \u001b[38;5;28;01mfor\u001b[39;00m resp \u001b[38;5;129;01min\u001b[39;00m uniprot_response\u001b[38;5;241m.\u001b[39mvalues()\n\u001b[1;32m 189\u001b[0m ]\n\u001b[1;32m 191\u001b[0m uniprot_entries\u001b[38;5;241m.\u001b[39mextend(ncbi_entries)\n", + "File \u001b[0;32m~/Documents/GitHub/pyeed/pyeed/fetch/ncbiproteinmapper.py:69\u001b[0m, in \u001b[0;36mNCBIProteinMapper.map\u001b[0;34m(self, responses)\u001b[0m\n\u001b[1;32m 65\u001b[0m protein_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmap_regions(record, protein_info)\n\u001b[1;32m 67\u001b[0m protein_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmap_sites(record, protein_info)\n\u001b[0;32m---> 69\u001b[0m protein_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_cds\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrecord\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprotein_info\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 71\u001b[0m protein_infos\u001b[38;5;241m.\u001b[39mappend(protein_info)\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m protein_infos\n", + "File \u001b[0;32m~/Documents/GitHub/pyeed/pyeed/fetch/ncbiproteinmapper.py:232\u001b[0m, in \u001b[0;36mNCBIProteinMapper.map_cds\u001b[0;34m(self, seq_record, protein_record)\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m protein_record\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 232\u001b[0m \u001b[43mprotein_record\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoding_sequence\u001b[49m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_cds_regions(\n\u001b[1;32m 233\u001b[0m cds\u001b[38;5;241m.\u001b[39mqualifiers[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoded_by\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 234\u001b[0m )\n\u001b[1;32m 235\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m:\n\u001b[1;32m 236\u001b[0m LOGGER\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 237\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo coding sequence reference found for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mseq_record\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcds\u001b[38;5;241m.\u001b[39mqualifiers\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 238\u001b[0m )\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/sdRDM/base/datamodel.py:1048\u001b[0m, in \u001b[0;36mDataModel.__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_parent_instances(value)\n\u001b[1;32m 1046\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_references(name, value)\n\u001b[0;32m-> 1048\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__setattr__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, (\u001b[38;5;28mlist\u001b[39m, ListPlus)):\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m[name]\u001b[38;5;241m.\u001b[39m_parent \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/pydantic/main.py:836\u001b[0m, in \u001b[0;36mBaseModel.__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 834\u001b[0m attr\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__set__\u001b[39m(\u001b[38;5;28mself\u001b[39m, value)\n\u001b[1;32m 835\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvalidate_assignment\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 836\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__pydantic_validator__\u001b[38;5;241m.\u001b[39mvalidate_assignment(\u001b[38;5;28mself\u001b[39m, name, value)\n\u001b[1;32m 837\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mextra\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mallow\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_fields:\n\u001b[1;32m 838\u001b[0m \u001b[38;5;66;03m# TODO - matching error\u001b[39;00m\n\u001b[1;32m 839\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m object has no field \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/sdRDM/base/datamodel.py:882\u001b[0m, in \u001b[0;36mDataModel._convert_extended_list_and_numpy_strings\u001b[0;34m(cls, value)\u001b[0m\n\u001b[1;32m 879\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Validator used to convert any list into a ListPlus.\"\"\"\u001b[39;00m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, \u001b[38;5;28mlist\u001b[39m):\n\u001b[0;32m--> 882\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mListPlus\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_convert_numpy_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43min_setup\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 883\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, np\u001b[38;5;241m.\u001b[39mstr_):\n\u001b[1;32m 884\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(value)\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/sdRDM/base/listplus.py:22\u001b[0m, in \u001b[0;36mListPlus.__init__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_attribute \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m arg \u001b[38;5;129;01min\u001b[39;00m args:\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgenerator object\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrepr\u001b[39m(arg):\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m element \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(arg):\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mappend(element)\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/pydantic/main.py:1006\u001b[0m, in \u001b[0;36mBaseModel.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1005\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__repr__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[0;32m-> 1006\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__repr_name__()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__repr_str__\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m, \u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m'\u001b[39m\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/pydantic/_internal/_repr.py:55\u001b[0m, in \u001b[0;36mRepresentation.__repr_str__\u001b[0;34m(self, join_str)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__repr_str__\u001b[39m(\u001b[38;5;28mself\u001b[39m, join_str: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[0;32m---> 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mjoin_str\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43ma\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m=\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mv\u001b[49m\u001b[38;5;132;43;01m!r}\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__repr_args__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/pydantic/_internal/_repr.py:55\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__repr_str__\u001b[39m(\u001b[38;5;28mself\u001b[39m, join_str: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[0;32m---> 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m join_str\u001b[38;5;241m.\u001b[39mjoin\u001b[43m(\u001b[49m\u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43ma\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m=\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mv\u001b[49m\u001b[38;5;132;43;01m!r}\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__repr_args__\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/pydantic/main.py:1025\u001b[0m, in \u001b[0;36mBaseModel.__repr_args__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pydantic_extra \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m ((k, v) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m pydantic_extra\u001b[38;5;241m.\u001b[39mitems())\n\u001b[0;32m-> 1025\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m ((k, \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, k)) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_computed_fields\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m v\u001b[38;5;241m.\u001b[39mrepr)\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/pydantic/main.py:1025\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pydantic_extra \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m ((k, v) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m pydantic_extra\u001b[38;5;241m.\u001b[39mitems())\n\u001b[0;32m-> 1025\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m ((k, \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_computed_fields\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m v\u001b[38;5;241m.\u001b[39mrepr)\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/pydantic/main.py:808\u001b[0m, in \u001b[0;36mBaseModel.__getattr__\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 806\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m, item):\n\u001b[0;32m--> 808\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mitem\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Raises AttributeError if appropriate\u001b[39;00m\n\u001b[1;32m 809\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 810\u001b[0m \u001b[38;5;66;03m# this is the current error\u001b[39;00m\n\u001b[1;32m 811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/sdRDM/base/datamodel.py:156\u001b[0m, in \u001b[0;36mDataModel.json_ld_context\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m term:\n\u001b[1;32m 153\u001b[0m sub_annots[attr] \u001b[38;5;241m=\u001b[39m term\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\n\u001b[0;32m--> 156\u001b[0m cls_name: \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_repo\u001b[49m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcls_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39msub_annots\n\u001b[1;32m 158\u001b[0m }\n", + "File \u001b[0;32m~/miniconda3/envs/pyeed/lib/python3.11/site-packages/pydantic/main.py:811\u001b[0m, in \u001b[0;36mBaseModel.__getattr__\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__getattribute__\u001b[39m(item) \u001b[38;5;66;03m# Raises AttributeError if appropriate\u001b[39;00m\n\u001b[1;32m 809\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 810\u001b[0m \u001b[38;5;66;03m# this is the current error\u001b[39;00m\n\u001b[0;32m--> 811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mitem\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[0;31mAttributeError\u001b[0m: 'Region' object has no attribute '_repo'" + ] } ], "source": [ @@ -73,10 +126,15 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "# write json-ld files\n", + "for seq in seqs:\n", + " with open(f\"../json/{seq.id}.json\", \"w\") as f:\n", + " f.write(seq.json())\n", + "\n", "# write ttl files\n", "for seq in seqs:\n", " g = Graph()\n", @@ -93,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, "outputs": [ { diff --git a/pyeed/core/abstractannotation.py b/pyeed/core/abstractannotation.py index ec548405..34853756 100644 --- a/pyeed/core/abstractannotation.py +++ b/pyeed/core/abstractannotation.py @@ -47,11 +47,6 @@ class AbstractAnnotation( json_schema_extra=dict(), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/alignmentresult.py b/pyeed/core/alignmentresult.py index 3ca642f8..d9cce044 100644 --- a/pyeed/core/alignmentresult.py +++ b/pyeed/core/alignmentresult.py @@ -59,11 +59,6 @@ class AlignmentResult( json_schema_extra=dict(), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/blastdata.py b/pyeed/core/blastdata.py index f9160ccb..b024b1d5 100644 --- a/pyeed/core/blastdata.py +++ b/pyeed/core/blastdata.py @@ -88,11 +88,6 @@ class BlastData( json_schema_extra=dict(), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/clustalomegaresult.py b/pyeed/core/clustalomegaresult.py index 7583ceea..13bfda76 100644 --- a/pyeed/core/clustalomegaresult.py +++ b/pyeed/core/clustalomegaresult.py @@ -1,16 +1,17 @@ from typing import Dict, Optional from uuid import uuid4 -import sdRDM from lxml.etree import _Element from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus from sdRDM.tools.utils import elem2dict +from .alignmentresult import AlignmentResult + class ClustalOmegaResult( - sdRDM.DataModel, + AlignmentResult, search_mode="unordered", ): """""" @@ -29,11 +30,6 @@ class ClustalOmegaResult( json_schema_extra=dict(), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/dnarecord.py b/pyeed/core/dnarecord.py index 2e284ef3..96e88757 100644 --- a/pyeed/core/dnarecord.py +++ b/pyeed/core/dnarecord.py @@ -1,16 +1,17 @@ -from typing import Dict, Optional, Set +from typing import Dict, Optional from uuid import uuid4 -import sdRDM from lxml.etree import _Element from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus from sdRDM.tools.utils import elem2dict +from .sequencerecord import SequenceRecord + class DNARecord( - sdRDM.DataModel, + SequenceRecord, search_mode="unordered", ): """A nucleic acid sequence and associated metadata 🧬""" @@ -29,15 +30,6 @@ class DNARecord( json_schema_extra=dict(), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - - _object_terms: Set[str] = PrivateAttr( - default={"http://semanticscience.org/resource/SIO_010008"} - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/numberedsequence.py b/pyeed/core/numberedsequence.py index b82ed2de..3369ab18 100644 --- a/pyeed/core/numberedsequence.py +++ b/pyeed/core/numberedsequence.py @@ -38,11 +38,6 @@ class NumberedSequence( ), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/organism.py b/pyeed/core/organism.py index 093386f2..fc98ca77 100644 --- a/pyeed/core/organism.py +++ b/pyeed/core/organism.py @@ -106,11 +106,6 @@ class Organism( ), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/pairwisealignmentresult.py b/pyeed/core/pairwisealignmentresult.py index ad91298e..ed806ae3 100644 --- a/pyeed/core/pairwisealignmentresult.py +++ b/pyeed/core/pairwisealignmentresult.py @@ -1,16 +1,17 @@ from typing import Dict, Optional from uuid import uuid4 -import sdRDM from lxml.etree import _Element from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus from sdRDM.tools.utils import elem2dict +from .alignmentresult import AlignmentResult + class PairwiseAlignmentResult( - sdRDM.DataModel, + AlignmentResult, search_mode="unordered", ): """""" @@ -57,11 +58,6 @@ class PairwiseAlignmentResult( json_schema_extra=dict(), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/proteinrecord.py b/pyeed/core/proteinrecord.py index 6faf718f..9517e47f 100644 --- a/pyeed/core/proteinrecord.py +++ b/pyeed/core/proteinrecord.py @@ -1,11 +1,10 @@ import asyncio import warnings from concurrent.futures import ThreadPoolExecutor -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional from uuid import uuid4 import nest_asyncio -import sdRDM from Bio.Blast import NCBIXML from IPython.display import clear_output from lxml.etree import _Element @@ -21,10 +20,11 @@ from .dnarecord import DNARecord from .region import Region +from .sequencerecord import SequenceRecord class ProteinRecord( - sdRDM.DataModel, + SequenceRecord, search_mode="unordered", ): """A protein sequence and associated metadata.""" @@ -73,15 +73,6 @@ class ProteinRecord( ), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - - _object_terms: Set[str] = PrivateAttr( - default={"http://semanticscience.org/resource/SIO_010043"} - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") @@ -100,6 +91,9 @@ def add_to_coding_sequence( self, start: Optional[int] = None, end: Optional[int] = None, + url: Optional[str] = None, + accession_id: Optional[str] = None, + name: Optional[str] = None, id: Optional[str] = None, **kwargs, ) -> Region: @@ -110,11 +104,17 @@ def add_to_coding_sequence( id (str): Unique identifier of the 'Region' object. Defaults to 'None'. start (): Start position of the site.. Defaults to None end (): End position of the site.. Defaults to None + url (): URI of the annotation.. Defaults to None + accession_id (): Accession ID of the annotation.. Defaults to None + name (): A name of a sequence feature, e.g. the name of a feature. Defaults to None """ params = { "start": start, "end": end, + "url": url, + "accession_id": accession_id, + "name": name, } if id is not None: diff --git a/pyeed/core/region.py b/pyeed/core/region.py index 1b195d1b..547e9224 100644 --- a/pyeed/core/region.py +++ b/pyeed/core/region.py @@ -1,16 +1,17 @@ -from typing import Dict, Optional, Set +from typing import Dict, Optional from uuid import uuid4 -import sdRDM from lxml.etree import _Element from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus from sdRDM.tools.utils import elem2dict +from .abstractannotation import AbstractAnnotation + class Region( - sdRDM.DataModel, + AbstractAnnotation, search_mode="unordered", ): """Regional annotation of a feature within a sequence. The direction of the region is defined by the start and end positions.""" @@ -40,15 +41,6 @@ class Region( ), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - - _object_terms: Set[str] = PrivateAttr( - default={"http://semanticscience.org/resource/SIO_000370"} - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/regionset.py b/pyeed/core/regionset.py index 3f477417..b8497be8 100644 --- a/pyeed/core/regionset.py +++ b/pyeed/core/regionset.py @@ -33,11 +33,6 @@ class RegionSet( ), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _object_terms: Set[str] = PrivateAttr( default={"http://semanticscience.org/resource/SIO_000370"} ) @@ -60,6 +55,9 @@ def add_to_regions( self, start: Optional[int] = None, end: Optional[int] = None, + url: Optional[str] = None, + accession_id: Optional[str] = None, + name: Optional[str] = None, id: Optional[str] = None, **kwargs, ) -> Region: @@ -70,11 +68,17 @@ def add_to_regions( id (str): Unique identifier of the 'Region' object. Defaults to 'None'. start (): Start position of the site.. Defaults to None end (): End position of the site.. Defaults to None + url (): URI of the annotation.. Defaults to None + accession_id (): Accession ID of the annotation.. Defaults to None + name (): A name of a sequence feature, e.g. the name of a feature. Defaults to None """ params = { "start": start, "end": end, + "url": url, + "accession_id": accession_id, + "name": name, } if id is not None: diff --git a/pyeed/core/sequence.py b/pyeed/core/sequence.py index 40813a82..91d85cd7 100644 --- a/pyeed/core/sequence.py +++ b/pyeed/core/sequence.py @@ -36,11 +36,6 @@ class Sequence( json_schema_extra=dict(), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/sequencerecord.py b/pyeed/core/sequencerecord.py index 2f0a0f60..9e738ac9 100644 --- a/pyeed/core/sequencerecord.py +++ b/pyeed/core/sequencerecord.py @@ -101,11 +101,6 @@ class SequenceRecord( ), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") @@ -123,6 +118,9 @@ def _parse_raw_xml_data(self): def add_to_sites( self, positions: List[int] = ListPlus(), + url: Optional[str] = None, + accession_id: Optional[str] = None, + name: Optional[str] = None, id: Optional[str] = None, **kwargs, ) -> Site: @@ -132,10 +130,16 @@ def add_to_sites( Args: id (str): Unique identifier of the 'Site' object. Defaults to 'None'. positions (): Position of the site(s) within the sequence.. Defaults to ListPlus() + url (): URI of the annotation.. Defaults to None + accession_id (): Accession ID of the annotation.. Defaults to None + name (): A name of a sequence feature, e.g. the name of a feature. Defaults to None """ params = { "positions": positions, + "url": url, + "accession_id": accession_id, + "name": name, } if id is not None: @@ -151,6 +155,9 @@ def add_to_regions( self, start: Optional[int] = None, end: Optional[int] = None, + url: Optional[str] = None, + accession_id: Optional[str] = None, + name: Optional[str] = None, id: Optional[str] = None, **kwargs, ) -> Region: @@ -161,11 +168,17 @@ def add_to_regions( id (str): Unique identifier of the 'Region' object. Defaults to 'None'. start (): Start position of the site.. Defaults to None end (): End position of the site.. Defaults to None + url (): URI of the annotation.. Defaults to None + accession_id (): Accession ID of the annotation.. Defaults to None + name (): A name of a sequence feature, e.g. the name of a feature. Defaults to None """ params = { "start": start, "end": end, + "url": url, + "accession_id": accession_id, + "name": name, } if id is not None: diff --git a/pyeed/core/site.py b/pyeed/core/site.py index 06b665c0..0e3d8002 100644 --- a/pyeed/core/site.py +++ b/pyeed/core/site.py @@ -1,16 +1,17 @@ -from typing import Dict, List, Optional, Set +from typing import Dict, List, Optional from uuid import uuid4 -import sdRDM from lxml.etree import _Element from pydantic import PrivateAttr, model_validator from pydantic_xml import attr, element from sdRDM.base.listplus import ListPlus from sdRDM.tools.utils import elem2dict +from .abstractannotation import AbstractAnnotation + class Site( - sdRDM.DataModel, + AbstractAnnotation, search_mode="unordered", ): """Position(s) constituting a site within a sequence.""" @@ -32,15 +33,6 @@ class Site( ), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - - _object_terms: Set[str] = PrivateAttr( - default={"http://semanticscience.org/resource/sio:010049"} - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/core/standardnumbering.py b/pyeed/core/standardnumbering.py index e72580b9..9438ed3d 100644 --- a/pyeed/core/standardnumbering.py +++ b/pyeed/core/standardnumbering.py @@ -40,11 +40,6 @@ class StandardNumbering( ), ) - _repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed") - _commit: Optional[str] = PrivateAttr( - default="6cb7b2b9f86383fe9659fc93e0802306fd288462" - ) - _raw_xml_data: Dict = PrivateAttr(default_factory=dict) @model_validator(mode="after") diff --git a/pyeed/fetch/ncbiproteinmapper.py b/pyeed/fetch/ncbiproteinmapper.py index dd1ebcf8..f8eef8a5 100644 --- a/pyeed/fetch/ncbiproteinmapper.py +++ b/pyeed/fetch/ncbiproteinmapper.py @@ -46,10 +46,13 @@ def map(self, responses: List[str]) -> List[ProteinRecord]: protein_infos = [] for record in seq_records: - protein_info = ProteinRecord(id=record.id, sequence=str(record.seq)) try: + # print(str(record.seq)) + # print(protein_info.sequence) + # print(self.map_organism(record)) + # print(protein_info) protein_info.organism = Organism(**self.map_organism(record)) except ValidationError as e: LOGGER.error( @@ -90,13 +93,16 @@ def map_organism(self, seq_record: SeqRecord) -> dict: return {} try: - taxonomy_id = next(feature for feature in feature.qualifiers["db_xref"] if "taxon" in feature) + taxonomy_id = next( + feature + for feature in feature.qualifiers["db_xref"] + if "taxon" in feature + ) if ":" in taxonomy_id: taxonomy_id = taxonomy_id.split(":")[1] except StopIteration: taxonomy_id = None - except KeyError: LOGGER.debug(f"No taxonomy ID found for {seq_record.id}: {feature}") return {} diff --git a/pyeed/schemes/pyeed_schema.md b/pyeed/schemes/pyeed_schema.md index c136922e..f21b22e9 100644 --- a/pyeed/schemes/pyeed_schema.md +++ b/pyeed/schemes/pyeed_schema.md @@ -1,5 +1,11 @@ ```mermaid classDiagram + SequenceRecord <-- ProteinRecord + SequenceRecord <-- DNARecord + AbstractAnnotation <-- Site + AbstractAnnotation <-- Region + AlignmentResult <-- PairwiseAlignmentResult + AlignmentResult <-- ClustalOmegaResult SequenceRecord *-- Site SequenceRecord *-- Region SequenceRecord *-- RegionSet @@ -67,9 +73,9 @@ classDiagram class BlastData { +float identity +float evalue - +integer n_hits + +int n_hits +string substitution_matrix - +integer word_size + +int word_size +float gap_open +float gap_extend +float threshold @@ -92,12 +98,12 @@ classDiagram +float score +float identity +float similarity - +integer gaps - +integer mismatches + +int gaps + +int mismatches } class StandardNumbering { - +string reference_id + +str reference_id +NumberedSequence[0..*] numberd_sequences } diff --git a/specifications/sequence_record.md b/specifications/sequence_record.md index 775d32ab..f806b994 100644 --- a/specifications/sequence_record.md +++ b/specifications/sequence_record.md @@ -42,7 +42,7 @@ A molecular sequence and associated annotation data. - Type: RegionSet[] - Description: Multiple regions forming a higher order structure or feature of a sequence. -### ProteinRecord (sio:SIO_010043) [SequenceRecord] +### ProteinRecord(_SequenceRecord_) (sio:SIO_010043) A protein sequence and associated metadata. @@ -63,7 +63,7 @@ A protein sequence and associated metadata. - Description: Calculated molecular weight of the protein based on the sequence. - Term: edam:data_1505 -### DNARecord (sio:SIO_010008) [SequenceRecord] +### DNARecord(_SequenceRecord_) (sio:SIO_010008) A nucleic acid sequence and associated metadata 🧬 @@ -85,7 +85,7 @@ A nucleic acid sequence and associated metadata 🧬 - Type: string - Description: A name of a sequence feature, e.g. the name of a feature -### Site (sio:sio:010049) [AbstractAnnotation] +### Site(_AbstractAnnotation_) (sio:sio:010049) Position(s) constituting a site within a sequence. @@ -94,7 +94,7 @@ Position(s) constituting a site within a sequence. - Description: Position of the site(s) within the sequence. - Term: sio:SIO_000056 -### Region (sio:SIO_000370) [AbstractAnnotation] +### Region(_AbstractAnnotation_) (sio:SIO_000370) Regional annotation of a feature within a sequence. The direction of the region is defined by the start and end positions. @@ -169,7 +169,7 @@ Description of an organism 🦠. - Description: Expectation value (E) to safe hits. - Default: 10.0 - n_hits - - Type: integer + - Type: int - Description: Number of hits to return. - Default: 100 - substitution_matrix @@ -177,7 +177,7 @@ Description of an organism 🦠. - Description: Substitution matrix to use. - Default: "BLOSUM62" - word_size - - Type: integer + - Type: int - Description: Word size of the initial match. - Default: 3 - Inclusivminimum: 2 @@ -224,7 +224,7 @@ Description of an organism 🦠. - Type: StandardNumbering - Description: Standard numbering of the aligned sequences. -### PairwiseAlignmentResult [AlignmentResult] +### PairwiseAlignmentResult(_AlignmentResult_) - score - Type: float @@ -236,16 +236,16 @@ Description of an organism 🦠. - Type: float - Description: Ratio of similar residues in the alignment - gaps - - Type: integer + - Type: int - Description: Number of gaps in the alignment - mismatches - - Type: integer + - Type: int - Description: Number of mismatches in the alignment ### StandardNumbering - reference_id - - Type: string + - Type: str - Description: Standard numbering of the reference sequence - numberd_sequences - Type: NumberedSequence[] @@ -260,7 +260,7 @@ Description of an organism 🦠. - Type: string[] - Description: Standard numbering of the aligned sequence -### ClustalOmegaResult [AlignmentResult] +### ClustalOmegaResult(_AlignmentResult_) - version - Type: string