-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
130 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 46, | ||
"id": "collectible-letter", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import sqlite3\n", | ||
"import random\n", | ||
"\n", | ||
"random.seed(1234)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 59, | ||
"id": "preliminary-tobago", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"2.MED1 SAMN03941711\n", | ||
"0.ANT1 SAMN07722859\n", | ||
"4.ANT1 SAMN05219223\n", | ||
"0.PE2 SAMN05225434\n", | ||
"2.MED0 SAMN07176224\n", | ||
"0.PE5 SAMN12138635\n", | ||
"1.ORI1 SAMN01831179\n", | ||
"0.ANT3 SAMN02403038\n", | ||
"2.ANT3 SAMN02403007\n", | ||
"1.ANT1 SAMN10228581\n", | ||
"1.ORI3 SAMN02470617\n", | ||
"2.MED2 SAMN02403012\n", | ||
"0.ANT2 SAMN02404401\n", | ||
"1.ORI2 SAMN14943533\n", | ||
"1.IN3 SAMN02403021\n", | ||
"3.ANT2 SAMN02403094\n", | ||
"0.PE4 SAMN03861481\n", | ||
"0.ANT5 SAMN08866640\n", | ||
"2.ANT1 SAMN13021974\n", | ||
"1.IN2 SAMN02403011\n", | ||
"2.MED3 SAMN13907443\n", | ||
"1.IN1 SAMN02403014\n", | ||
"2.ANT2 SAMN02403001\n", | ||
"3.ANT1 SAMN02403051\n", | ||
"0.PE7 SAMN02403027\n", | ||
"0.PE10 SAMN02403015\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"project_dir = os.path.dirname(os.path.dirname(os.getcwd()))\n", | ||
"database = os.path.join(project_dir, \"results\", \"sqlite_db\", \"yersinia_pestis_db.sqlite\")\n", | ||
"\n", | ||
"query = \"\"\"\n", | ||
" SELECT\n", | ||
" BioSampleAccession,\n", | ||
" BioSampleBranch,\n", | ||
" BioSampleComment\n", | ||
" FROM\n", | ||
" BioSample\n", | ||
" LEFT Join\n", | ||
" Assembly ON BioSampleAccession==AssemblyBioSampleAccession\n", | ||
" WHERE\n", | ||
" BioSampleComment LIKE '%KEEP%Assembly%Modern%'\n", | ||
" \"\"\"\n", | ||
" \n", | ||
"conn = sqlite3.connect(database)\n", | ||
"cur = conn.cursor()\n", | ||
"result = cur.execute(query).fetchall()\n", | ||
"\n", | ||
"# Create a mapping of branches to samples\n", | ||
"branch_dict = {}\n", | ||
"for rec in result:\n", | ||
" accession = rec[0]\n", | ||
" branch = rec[1]\n", | ||
" # Strip sub branches that are letter designations\n", | ||
" while branch[-1].isalpha():\n", | ||
" branch = branch[:-1]\n", | ||
" if branch not in branch_dict:\n", | ||
" branch_dict[branch] = [] \n", | ||
" branch_dict[branch].append(accession) \n", | ||
"\n", | ||
"random_samples = {}\n", | ||
"# Randomly sample\n", | ||
"for branch in branch_dict:\n", | ||
" sample = random.choice(branch_dict[branch])\n", | ||
" random_samples[branch] = sample\n", | ||
"\n", | ||
"# print out\n", | ||
"for branch in random_samples:\n", | ||
" print(branch, random_samples[branch])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "abandoned-jackson", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |