PyEED · haeussma · Jun 10, 2024 · Jun 10, 2024 · Jun 10, 2024
diff --git a/docs/examples/basics.ipynb b/docs/examples/basics.ipynb
@@ -37,13 +37,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5352b6f5fa4c46119cd65bfc4de44c0d",
+       "model_id": "68d545568bf04841be62573b15776e10",
        "version_major": 2,
        "version_minor": 0
       },
@@ -106,7 +106,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -122,7 +122,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f759e3211ef44d2b81a0f820d2f49e2d",
+       "model_id": "93adf4bcad844963b3815a8288e2083b",
        "version_major": 2,
        "version_minor": 0
       },
@@ -178,43 +178,236 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "\u001b[4mProteinRecord\u001b[0m\n",
-      "├── \u001b[94mid\u001b[0m = WP_068323110.1\n",
-      "├── \u001b[94mname\u001b[0m = methionine adenosyltransferase\n",
+      "├── \u001b[94mid\u001b[0m = Q9YBK2\n",
+      "├── \u001b[94mname\u001b[0m = S-adenosylmethionine synthase\n",
       "├── \u001b[94morganism\u001b[0m\n",
       "│   └── \u001b[4mOrganism\u001b[0m\n",
-      "│       ├── \u001b[94mid\u001b[0m = 95652fe4-fb9a-40e7-b98a-1dfad46d8e56\n",
-      "│       ├── \u001b[94mtaxonomy_id\u001b[0m = 1609559\n",
-      "│       ├── \u001b[94mname\u001b[0m = Pyrococcus kukulkanii\n",
+      "│       ├── \u001b[94mid\u001b[0m = cb2acb39-692e-4424-a4ed-1b97f2351a83\n",
+      "│       ├── \u001b[94mtaxonomy_id\u001b[0m = 272557\n",
+      "│       ├── \u001b[94mname\u001b[0m = Aeropyrum pernix K1\n",
       "│       ├── \u001b[94mdomain\u001b[0m = Archaea\n",
-      "│       ├── \u001b[94mphylum\u001b[0m = Euryarchaeota\n",
-      "│       ├── \u001b[94mtax_class\u001b[0m = Thermococci\n",
-      "│       ├── \u001b[94morder\u001b[0m = Thermococcales\n",
-      "│       ├── \u001b[94mfamily\u001b[0m = Thermococcaceae\n",
-      "│       └── \u001b[94mgenus\u001b[0m = Pyrococcus\n",
-      "├── \u001b[94msequence\u001b[0m = MARNIVVEEIVRTPVEMQKVELVERKGIGHPDSIADGIAEAVSRALCREYIKRYGVILHHNTDQVEVVGGRAYPKFGGGEVVKPIYILLSGRAVELVDQELFPVHEVAIRAAKEYLKKNIRHLDVENHVVIDSRIGQGSVDLVSVFNKAKENPIPLANDTSFGVGFAPLTETERLVLETERLLNSEKFKKEYPAVGEDIKVMGLRKGDEIDLTIAAAIVDSEVANPKEYMEVKDKIKETVEELAKDITSRKVNIYVNTADDPKKDIYYITVTGTSAEAGDDGSVGRGNRVNGLITPNRHMSMEAAAGKNPVSHVGKIYNILAMFIANDIAKALPVEEVYVRILSQIGKPIDQPLVASIQVIPKQGHTVKEFEKDAYAIADEWLANITKIQKMILEDKITVF\n",
+      "│       ├── \u001b[94mphylum\u001b[0m = Thermoproteota\n",
+      "│       ├── \u001b[94mtax_class\u001b[0m = Thermoprotei\n",
+      "│       ├── \u001b[94morder\u001b[0m = Desulfurococcales\n",
+      "│       ├── \u001b[94mfamily\u001b[0m = Desulfurococcaceae\n",
+      "│       └── \u001b[94mgenus\u001b[0m = Aeropyrum\n",
+      "├── \u001b[94msequence\u001b[0m = MARRIVVESYPYPRVEDLQVELVERKGLGHPDTICDAAAEAVSRELSKYYLERFGKILHHNVDKVLLVGGQAAPRLGGGEVLQPIYILVSGRVTTEVRTGGGVESVPVGPIILRAVKNYIRENFRFLDPEEHVIVDYRVGRGSVDLVGIFEAEDKVPLANDTSIGSGHAPLSTLERLVLETERILNSRETKERLPAVGEDVKVMGVRDGKSITLTVAMAVVSSQVGSVSDYLAVKEEAESLILDLASRIAPDYDVRVNINTGDIPEKKILYLTVTGTSAEHGDDGATGRGNRVNGLITPMRPMSMEAAAGKNPVNHVGKIYNVVANEMAALIHREVKGVEEVYVKLVSQIGKPIDRPRIVDVKVRMEGGREVTADAKREIEAIANSVLDGITGYTEKLVRGDITVY\n",
       "├── \u001b[94mregions\u001b[0m\n",
-      "│   └── 0\n",
+      "│   ├── 0\n",
+      "│   │   └── \u001b[4mRegion\u001b[0m\n",
+      "│   │       ├── \u001b[94mid\u001b[0m = 0f3c9ab0-dde3-4cb1-aa12-c25c5fbb514c\n",
+      "│   │       ├── \u001b[94mname\u001b[0m = S-adenosylmethionine synthetase, archaea\n",
+      "│   │       ├── \u001b[94mstart\u001b[0m = 2\n",
+      "│   │       └── \u001b[94mend\u001b[0m = 406\n",
+      "│   ├── 1\n",
+      "│   │   └── \u001b[4mRegion\u001b[0m\n",
+      "│   │       ├── \u001b[94mid\u001b[0m = 4d0151b8-22bb-4695-ac0b-f70f0e7bb7c9\n",
+      "│   │       ├── \u001b[94mname\u001b[0m = S-adenosylmethionine synthase\n",
+      "│   │       ├── \u001b[94mstart\u001b[0m = 3\n",
+      "│   │       └── \u001b[94mend\u001b[0m = 406\n",
+      "│   └── 2\n",
       "│       └── \u001b[4mRegion\u001b[0m\n",
-      "│           ├── \u001b[94mid\u001b[0m = MetK2\n",
-      "│           ├── \u001b[94mstart\u001b[0m = 2\n",
-      "│           └── \u001b[94mend\u001b[0m = 401\n",
+      "│           ├── \u001b[94mid\u001b[0m = a9a6dce1-248d-4b51-bb37-69a6e1b1ed52\n",
+      "│           ├── \u001b[94mname\u001b[0m = S-adenosylmethionine synthetase, domain 3\n",
+      "│           ├── \u001b[94mstart\u001b[0m = 144\n",
+      "│           └── \u001b[94mend\u001b[0m = 248\n",
       "├── \u001b[94mec_number\u001b[0m = 2.5.1.6\n",
-      "└── \u001b[94mmol_weight\u001b[0m = 44273.0\n",
+      "└── \u001b[94mmol_weight\u001b[0m = 44235.0\n",
       "\n"
      ]
     }
    ],
    "source": [
     "print(blast_results[3])"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "💾 Sequence saved to protein.fasta\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pyeed.core import ProteinRecord\n",
+    "\n",
+    "protein = ProteinRecord(name=\"test_protein\", sequence=\"MTEITAAMVKELREDKAVQLLREKGLGK\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from neo4j import GraphDatabase\n",
+    "\n",
+    "# import environment variables\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "\n",
+    "# URI examples: \"neo4j://localhost\", \"neo4j+s://xxx.databases.neo4j.io\"\n",
+    "URI = \"neo4j+s://ecd986f5.databases.neo4j.io\"\n",
+    "AUTH = (os.getenv(\"NEO4J_USER\"), os.getenv(\"NEO4J_PASSWORD\"))\n",
+    "\n",
+    "with GraphDatabase.driver(URI, auth=AUTH) as driver:\n",
+    "    driver.verify_connectivity()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run_query(query):\n",
+    "    with driver.session() as session:\n",
+    "        result = session.run(query)\n",
+    "        records = list(result)  # Fetch all records within the session context\n",
+    "        return records"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GO terms and their relationships have been created.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Sample GO terms data\n",
+    "go_terms = [\n",
+    "    {\n",
+    "        \"id\": \"GO:0008150\",\n",
+    "        \"name\": \"biological_process\",\n",
+    "        \"namespace\": \"biological_process\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"id\": \"GO:0003674\",\n",
+    "        \"name\": \"molecular_function\",\n",
+    "        \"namespace\": \"molecular_function\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"id\": \"GO:0005575\",\n",
+    "        \"name\": \"cellular_component\",\n",
+    "        \"namespace\": \"cellular_component\",\n",
+    "    },\n",
+    "    {\"id\": \"GO:0007049\", \"name\": \"cell cycle\", \"namespace\": \"biological_process\"},\n",
+    "    {\"id\": \"GO:0009987\", \"name\": \"cellular process\", \"namespace\": \"biological_process\"},\n",
+    "]\n",
+    "\n",
+    "# Create GO Term nodes\n",
+    "for term in go_terms:\n",
+    "    query = f\"\"\"\n",
+    "    CREATE (g:GO_Term {{id: '{term['id']}', name: '{term['name']}', namespace: '{term['namespace']}'}})\n",
+    "    \"\"\"\n",
+    "    run_query(query)\n",
+    "\n",
+    "# Define relationships between GO terms\n",
+    "relationships = [\n",
+    "    {\"parent_id\": \"GO:0008150\", \"child_id\": \"GO:0007049\", \"type\": \"IS_A\"},\n",
+    "    {\"parent_id\": \"GO:0008150\", \"child_id\": \"GO:0009987\", \"type\": \"IS_A\"},\n",
+    "]\n",
+    "\n",
+    "# Create relationships\n",
+    "for rel in relationships:\n",
+    "    query = f\"\"\"\n",
+    "    MATCH (p:GO_Term {{id: '{rel['parent_id']}'}}), (c:GO_Term {{id: '{rel['child_id']}'}})\n",
+    "    CREATE (c)-[:{rel['type']}]->(p)\n",
+    "    \"\"\"\n",
+    "    run_query(query)\n",
+    "\n",
+    "print(\"GO terms and their relationships have been created.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Proteins and their relationships to GO terms have been created.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Sample proteins data\n",
+    "proteins = [\n",
+    "    {\"name\": \"BRCA1 Protein\", \"sequence\": \"M1...1863\", \"length\": 1863},\n",
+    "    {\"name\": \"TP53 Protein\", \"sequence\": \"M1...393\", \"length\": 393},\n",
+    "]\n",
+    "\n",
+    "# Create Protein nodes\n",
+    "for protein in proteins:\n",
+    "    query = f\"\"\"\n",
+    "    CREATE (p:Protein {{name: '{protein['name']}', sequence: '{protein['sequence']}', length: {protein['length']}}})\n",
+    "    \"\"\"\n",
+    "    run_query(query)\n",
+    "\n",
+    "# Define relationships between proteins and GO terms\n",
+    "protein_go_relationships = [\n",
+    "    {\n",
+    "        \"protein_name\": \"BRCA1 Protein\",\n",
+    "        \"go_id\": \"GO:0007049\",\n",
+    "        \"relationship\": \"INVOLVED_IN\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"protein_name\": \"TP53 Protein\",\n",
+    "        \"go_id\": \"GO:0009987\",\n",
+    "        \"relationship\": \"INVOLVED_IN\",\n",
+    "    },\n",
+    "]\n",
+    "\n",
+    "# Create relationships\n",
+    "for rel in protein_go_relationships:\n",
+    "    query = f\"\"\"\n",
+    "    MATCH (p:Protein {{name: '{rel['protein_name']}'}}), (g:GO_Term {{id: '{rel['go_id']}'}})\n",
+    "    CREATE (p)-[:{rel['relationship']}]->(g)\n",
+    "    \"\"\"\n",
+    "    run_query(query)\n",
+    "\n",
+    "print(\"Proteins and their relationships to GO terms have been created.\")"
+   ]
   }
  ],
  "metadata": {

diff --git a/docs/quick_start/basics.md b/docs/quick_start/basics.md
@@ -7,17 +7,20 @@ A sequence object can be created by passing a sequence string to the constructor
 === "Protein"
 
     ``` py
-    from pyeed.core import ProteinInfo
+    from pyeed.core import ProteinRecord
 
-    protein = ProteinInfo(sequence="MTEITAAMVKELREDKAVQLLREKGLGK")
+    protein = ProteinRecord(
+        name="My Protein",
+        sequence="MTEITAAMVKELREDKAVQLLREKGLGK"
+    )
     ```
 
 === "DNA"
 
     ``` py
-    from pyeed.core import DNAInfo
+    from pyeed.core import DNARecord
 
-    dna = DNAInfo(sequence="ATGCGTACGTCGATCGATCGATCGATCGATCGATCGATCGATCGTAGTC")
+    dna = DNARecord(sequence="ATGCGTACGTCGATCGATCGATCGATCGATCGATCGATCGATCGTAGTC")
     ```
 
 
@@ -28,27 +31,13 @@ Besides adding sequence information manually, PyEED also allows searching for se
 === "Protein"
 
     ``` py
-    protein = ProteinInfo.get_id("UCS38941.1")
+    protein = ProteinRecord.get_id("UCS38941.1")
     ```
 
 === "DNA"
 
     ``` py
-    dna = DNAInfo.get_id("NC_000913.3")
-    ```
-
-Alternatively, the sequence can be initiated from a sequence string, triggering a BLAST search in the NCBI database. If the sequence is found, the sequence object is filled with the corresponding information.
-
-=== "Protein"
-
-    ``` py
-    # Not yet implemented
-    ```
-
-=== "DNA"
-
-    ``` py
-    # Not yet implemented
+    # Not implemented
     ```
 
 ## ⬇️ Save a sequence
@@ -58,12 +47,6 @@ Alternatively, the sequence can be initiated from a sequence string, triggering
 The sequence can be stored in a `FASTA`, `JSON`, `YAML`, or `XML` file format. Therefore, the respective method can be used.
 The file path is passed as an argument to the method.
 
-=== "FASTA"
-
-    ``` py
-    protein.to_fasta("protein.fasta")
-    ```
-
 === "JSON"
 
     ``` py
@@ -82,8 +65,14 @@ The file path is passed as an argument to the method.
     protein.to_xml("protein.xml")
     ```
 
+=== "FASTA"
+
+    ``` py
+    protein.to_fasta("protein.fasta")
+    ```
+
 ### To database
-Alternatively, sequence data can be stored in a `PostgreSQL` database. Therefore, the `to_db()` method can be used.
+Alternatively, sequence data can be stored in a graph database. Therefore, the `to_db()` method can be used.
 
 ```py
 # Feature is currently implemented