-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
673 additions
and
754 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,245 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "proprietary-spice", | ||
"metadata": {}, | ||
"source": [ | ||
"# BEAST Analysis Notebook" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "gross-cutting", | ||
"metadata": {}, | ||
"source": [ | ||
"---\n", | ||
"\n", | ||
"# 0. SETUP" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "expanded-saudi", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import pandas as pd\n", | ||
"import seaborn as sns" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "linear-burden", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# ------------------------------------------\n", | ||
"BRANCH_LIST = {\n", | ||
" \"0.PRE\": [\"0.PRE1\", \"0.PRE2\"], \n", | ||
" \"0.ANT4\" : [\"0.ANT4\"], \n", | ||
" \"0.PE\": [\"0.PE2\", \"0.PE4m\", \"0.PE4m\", \"0.PE4t\", \"0.PE4a\", \"0.PE5\", \"0.PE7\", \"0.PE8\", \"0.PE10\"], \n", | ||
" \"0.ANT\": [\"0.ANT1\", \"0.ANT2\",\"0.ANT3\",\"0.ANT5\"], \n", | ||
" \"1.PRE\" : [\"1.PRE0\",\"1.PRE1\", \"1.PRE2\", \"1.PRE3\"], \n", | ||
" \"1.ANT\": [\"1.ANT1\"], \n", | ||
" \"1.IN\": [\"1.IN1\",\"1.IN2\",\"1.IN3\"], \n", | ||
" \"1.ORI\" : [\"1.ORI1\", \"1.ORI2\", \"1.ORI3\"],\n", | ||
" \"2.ANT\": [\"2.ANT1\",\"2.ANT2\",\"2.ANT3\" ], \n", | ||
" \"2.MED\": [\"2.MED0\", \"2.MED1\",\"2.MED2\",\"2.MED3\" ], \n", | ||
" \"3.ANT\": [\"3.ANT1\", \"3.ANT2\" ], \n", | ||
" \"4.ANT\": [\"4.ANT1\" ], \n", | ||
"}\n", | ||
"\n", | ||
"NUM_STATES = 10" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "mechanical-monte", | ||
"metadata": {}, | ||
"source": [ | ||
"---\n", | ||
"\n", | ||
"# 1. IMPORT" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "special-clause", | ||
"metadata": {}, | ||
"source": [ | ||
"## Import Log Files" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "jewish-australian", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"0.PRE\n", | ||
"0.ANT4\n", | ||
"0.PE\n", | ||
"0.ANT\n", | ||
"1.PRE\n", | ||
"1.ANT\n", | ||
"1.IN\n", | ||
"1.ORI\n", | ||
"2.ANT\n", | ||
"2.MED\n", | ||
"3.ANT\n", | ||
"4.ANT\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"log_dir = \"/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade\"\n", | ||
"\n", | ||
"# Construct a dictionary to hold the dataframes\n", | ||
"log_dict = {branch:{\"strict\": {}, \"relaxed\": {}} for branch in BRANCH_LIST}\n", | ||
"\n", | ||
"for branch in BRANCH_LIST:\n", | ||
" print(branch)\n", | ||
" for filename in os.listdir(log_dir):\n", | ||
" filepath = os.path.join(log_dir, filename)\n", | ||
" if branch in filename:\n", | ||
" # Find Strict Clock log\n", | ||
" if \"SC\" in filename:\n", | ||
" strict_log = filepath\n", | ||
" # Find relaxed clock log\n", | ||
" elif \"UCLN\" in filename:\n", | ||
" relaxed_log = filepath\n", | ||
" \n", | ||
" # Add log files to dict\n", | ||
" log_dict[branch][\"strict\"][\"logfile\"] = strict_log\n", | ||
" log_dict[branch][\"relaxed\"][\"logfile\"] = relaxed_log" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "aboriginal-intent", | ||
"metadata": {}, | ||
"source": [ | ||
"## Initialize the Parameter data frame" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "absolute-receiver", | ||
"metadata": {}, | ||
"source": [ | ||
"## Parse Log Files to DataFrames" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"id": "lyric-alert", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"0.PRE strict\n" | ||
] | ||
}, | ||
{ | ||
"ename": "TypeError", | ||
"evalue": "unsupported operand type(s) for +: 'dict' and 'dict'", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[0;32m<ipython-input-10-a924a7098954>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0mline_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mval\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msplit_line\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;31m# add the clade and col values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 31\u001b[0;31m \u001b[0mparam_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"clade\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbranch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"clock\"\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mclock\u001b[0m\u001b[0;34m}\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mline_dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 32\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparam_dict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'dict' and 'dict'" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"for branch in BRANCH_LIST:\n", | ||
" for clock in log_dict[branch]:\n", | ||
" print(branch, clock)\n", | ||
" log_filename = log_dict[branch][clock][\"logfile\"]\n", | ||
" with open(log_filename) as infile:\n", | ||
" read_file = infile.read().strip().split(\"\\n\")\n", | ||
" \n", | ||
" # Detect how many lines are headers with \"#\"\n", | ||
" for i in range (0, len(read_file)):\n", | ||
" # These are the column names\n", | ||
" if not read_file[i].startswith(\"#\"):\n", | ||
" col_names = read_file[i].split(\"\\t\")\n", | ||
" # Add branch and clock to col_names\n", | ||
" col_names = [\"clade\",\"clock\"] + col_names\n", | ||
" break\n", | ||
" \n", | ||
" # Create dataframe from column names\n", | ||
" param_df = pd.DataFrame({col:[] for col in col_names})\n", | ||
" \n", | ||
" # Remove the header lines\n", | ||
" read_file = read_file[i+1:]\n", | ||
" \n", | ||
" # Sample the desired number of states, from the end!\n", | ||
" sample_file = read_file[len(read_file)-NUM_STATES:]\n", | ||
"\n", | ||
" # Parse values into dict\n", | ||
" for line in sample_file:\n", | ||
" split_line = line.split(\"\\t\")\n", | ||
" line_dict = {col:float(val) for col,val in zip(col_names, split_line)}\n", | ||
" # add the clade and col values\n", | ||
" param_dict = {\"clade\": branch, \"clock\" : clock} + line_dict\n", | ||
" print(param_dict)\n", | ||
" break\n", | ||
" break" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "environmental-length", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df = log_dict[\"0.PRE\"][\"strict\"][\"df\"]\n", | ||
"\n", | ||
"sns.kdeplot(x=df[\"meanRate\"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "outdoor-advance", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.