Skip to content

Commit

Permalink
add mantel test to iqtree stats
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed Jun 2, 2021
1 parent fc321f4 commit faafa25
Show file tree
Hide file tree
Showing 3 changed files with 673 additions and 754 deletions.
1 change: 1 addition & 0 deletions workflow/envs/merge/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ dependencies:
- conda-forge::pymdown-extensions=7.1
- conda-forge::python=3.7.3
- conda-forge::r-ape
- conda-forge::scikit-bio=0.5.6
- conda-forge::xopen=0.9.0
- contextily=1.0.1
- descartes=1.1.0
Expand Down
245 changes: 245 additions & 0 deletions workflow/notebooks/beast.py.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "proprietary-spice",
"metadata": {},
"source": [
"# BEAST Analysis Notebook"
]
},
{
"cell_type": "markdown",
"id": "gross-cutting",
"metadata": {},
"source": [
"---\n",
"\n",
"# 0. SETUP"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "expanded-saudi",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "linear-burden",
"metadata": {},
"outputs": [],
"source": [
"# ------------------------------------------\n",
"BRANCH_LIST = {\n",
" \"0.PRE\": [\"0.PRE1\", \"0.PRE2\"], \n",
" \"0.ANT4\" : [\"0.ANT4\"], \n",
" \"0.PE\": [\"0.PE2\", \"0.PE4m\", \"0.PE4m\", \"0.PE4t\", \"0.PE4a\", \"0.PE5\", \"0.PE7\", \"0.PE8\", \"0.PE10\"], \n",
" \"0.ANT\": [\"0.ANT1\", \"0.ANT2\",\"0.ANT3\",\"0.ANT5\"], \n",
" \"1.PRE\" : [\"1.PRE0\",\"1.PRE1\", \"1.PRE2\", \"1.PRE3\"], \n",
" \"1.ANT\": [\"1.ANT1\"], \n",
" \"1.IN\": [\"1.IN1\",\"1.IN2\",\"1.IN3\"], \n",
" \"1.ORI\" : [\"1.ORI1\", \"1.ORI2\", \"1.ORI3\"],\n",
" \"2.ANT\": [\"2.ANT1\",\"2.ANT2\",\"2.ANT3\" ], \n",
" \"2.MED\": [\"2.MED0\", \"2.MED1\",\"2.MED2\",\"2.MED3\" ], \n",
" \"3.ANT\": [\"3.ANT1\", \"3.ANT2\" ], \n",
" \"4.ANT\": [\"4.ANT1\" ], \n",
"}\n",
"\n",
"NUM_STATES = 10"
]
},
{
"cell_type": "markdown",
"id": "mechanical-monte",
"metadata": {},
"source": [
"---\n",
"\n",
"# 1. IMPORT"
]
},
{
"cell_type": "markdown",
"id": "special-clause",
"metadata": {},
"source": [
"## Import Log Files"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "jewish-australian",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.PRE\n",
"0.ANT4\n",
"0.PE\n",
"0.ANT\n",
"1.PRE\n",
"1.ANT\n",
"1.IN\n",
"1.ORI\n",
"2.ANT\n",
"2.MED\n",
"3.ANT\n",
"4.ANT\n"
]
}
],
"source": [
"log_dir = \"/mnt/c/Users/ktmea/Projects/plague-phylogeography-projects/main/beast/all/chromosome/clade\"\n",
"\n",
"# Construct a dictionary to hold the dataframes\n",
"log_dict = {branch:{\"strict\": {}, \"relaxed\": {}} for branch in BRANCH_LIST}\n",
"\n",
"for branch in BRANCH_LIST:\n",
" print(branch)\n",
" for filename in os.listdir(log_dir):\n",
" filepath = os.path.join(log_dir, filename)\n",
" if branch in filename:\n",
" # Find Strict Clock log\n",
" if \"SC\" in filename:\n",
" strict_log = filepath\n",
" # Find relaxed clock log\n",
" elif \"UCLN\" in filename:\n",
" relaxed_log = filepath\n",
" \n",
" # Add log files to dict\n",
" log_dict[branch][\"strict\"][\"logfile\"] = strict_log\n",
" log_dict[branch][\"relaxed\"][\"logfile\"] = relaxed_log"
]
},
{
"cell_type": "markdown",
"id": "aboriginal-intent",
"metadata": {},
"source": [
"## Initialize the Parameter data frame"
]
},
{
"cell_type": "markdown",
"id": "absolute-receiver",
"metadata": {},
"source": [
"## Parse Log Files to DataFrames"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "lyric-alert",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.PRE strict\n"
]
},
{
"ename": "TypeError",
"evalue": "unsupported operand type(s) for +: 'dict' and 'dict'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-a924a7098954>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0mline_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mval\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msplit_line\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;31m# add the clade and col values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 31\u001b[0;31m \u001b[0mparam_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"clade\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbranch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"clock\"\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mclock\u001b[0m\u001b[0;34m}\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mline_dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 32\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparam_dict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'dict' and 'dict'"
]
}
],
"source": [
"for branch in BRANCH_LIST:\n",
" for clock in log_dict[branch]:\n",
" print(branch, clock)\n",
" log_filename = log_dict[branch][clock][\"logfile\"]\n",
" with open(log_filename) as infile:\n",
" read_file = infile.read().strip().split(\"\\n\")\n",
" \n",
" # Detect how many lines are headers with \"#\"\n",
" for i in range (0, len(read_file)):\n",
" # These are the column names\n",
" if not read_file[i].startswith(\"#\"):\n",
" col_names = read_file[i].split(\"\\t\")\n",
" # Add branch and clock to col_names\n",
" col_names = [\"clade\",\"clock\"] + col_names\n",
" break\n",
" \n",
" # Create dataframe from column names\n",
" param_df = pd.DataFrame({col:[] for col in col_names})\n",
" \n",
" # Remove the header lines\n",
" read_file = read_file[i+1:]\n",
" \n",
" # Sample the desired number of states, from the end!\n",
" sample_file = read_file[len(read_file)-NUM_STATES:]\n",
"\n",
" # Parse values into dict\n",
" for line in sample_file:\n",
" split_line = line.split(\"\\t\")\n",
" line_dict = {col:float(val) for col,val in zip(col_names, split_line)}\n",
" # add the clade and col values\n",
" param_dict = {\"clade\": branch, \"clock\" : clock} + line_dict\n",
" print(param_dict)\n",
" break\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "environmental-length",
"metadata": {},
"outputs": [],
"source": [
"df = log_dict[\"0.PRE\"][\"strict\"][\"df\"]\n",
"\n",
"sns.kdeplot(x=df[\"meanRate\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "outdoor-advance",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
1,181 changes: 427 additions & 754 deletions workflow/notebooks/iqtree_stats.py.ipynb

Large diffs are not rendered by default.

0 comments on commit faafa25

Please sign in to comment.