Skip to content

Commit

Permalink
add more date col to metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed Apr 27, 2021
1 parent 2d5b754 commit 8fba8a6
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 34 deletions.
40 changes: 16 additions & 24 deletions workflow/rules/phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -136,36 +136,28 @@ rule lsd:

rule beast:
"""
Continuous phylogeography with BEAST
Prepare input files for beast1 and beast2
"""
input:
tsv = results_dir + "/lsd/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/metadata.tsv",
dates = results_dir + "/lsd/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/lsd.dates.txt",
tree = results_dir + "/lsd/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/lsd.filter.nex",
tsv = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-taxa/metadata.tsv",
divtree = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-taxa/iqtree.treefile",
timetree = results_dir + "/lsd/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/lsd.nex",
aln = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-sites/snippy-multi.snps.aln",
constant_sites = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/full/snippy-multi.constant_sites.txt",
output:
latlon = results_dir + "/beast/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/beast.latlon.txt",
tree = results_dir + "/beast/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/beast.nex",
timetree = results_dir + "/beast/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/beast.timetree.nwk",
divtree = results_dir + "/beast/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/beast.divtree.nwk",
dates = results_dir + "/beast/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/beast.dates.txt",
aln = results_dir + "/beast/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/beast.fasta",
constant_sites = results_dir + "/beast/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/beast.constant-sites.txt",

shell:
"""
echo -e "traits\tlat\tlon" > {output.latlon};
echo -e "Reference\t"{config[reference_lat]}"\t"{config[reference_lon]} >> {output.latlon};
echo -e "Reference\t"{config[reference_date_bp]} > {output.dates}
tail -n+2 {input.tsv} | while read line; \
do
sample=`echo "$line" | cut -f 1`;
lat=`echo "$line" | cut -f 9`;
lon=`echo "$line" | cut -f 10`;
date=`echo "$line" | cut -f 4 | sed "s/\[\|\]//g" | tr ":" "\n" | awk '{{sum+=$0}}END{{print 0-sum/NR}}'`;
if [[ $lat == "NA" ]]; then
lat=`echo "$line" | cut -f 7`;
lon=`echo "$line" | cut -f 8`;
fi;
echo -e $sample"\t"$lat"\t"$lon >> {output.latlon};
echo -e $sample"\t"$date >> {output.dates};
done
{scripts_dir}/multi2bi.py --tree {input.tree} --out {output.tree}
tail -n+2 {input.tsv} | cut -f 1,19,20 > {output.dates};
cut -f 1,21,22 {input.tsv} > {output.latlon};
cp {input.aln} {output.aln};
cp {input.divtree} {output.divtree};
cp {input.timetree} {output.timetree};
cp {input.constant_sites} {output.constant_sites};
"""
23 changes: 15 additions & 8 deletions workflow/rules/targets.smk
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ rule snippy_multi_all:
#------------------------------------------------------------------------------#
# Phylogeny
#------------------------------------------------------------------------------#
iqtree_all_input = expand(results_dir + "/iqtree/all/{locus_name}/full/filter{missing_data}/iqtree.nex",
iqtree_all_input = expand(results_dir + "/iqtree/all/{locus_name}/full/filter{missing_data}/iqtree.treefile",
locus_name=config["reference_locus_name"],
missing_data = config["snippy_missing_data"])
iqtree_local_input = [ x.replace("all", "local") for x in iqtree_all_input ]
Expand All @@ -280,6 +280,13 @@ rule iqtree_all:
input:
iqtree_all_input

iqtree_prune_all_input = expand(results_dir + "/iqtree/all/{locus_name}/prune/filter{missing_data}/iqtree.treefile",
locus_name=config["reference_locus_name"],
missing_data = config["snippy_missing_data"])
rule iqtree_prune_all:
input:
iqtree_prune_all_input

#------------------------------------------------------------------------------#
# Remove outgroups
iqtree_filter_all_input = expand(results_dir + "/iqtree/all/{locus_name}/full/filter{missing_data}/filter-sites/snippy-multi.snps.aln",
Expand Down Expand Up @@ -327,20 +334,20 @@ rule lsd_remove_outliers_prune_all:
input:
lsd_remove_outliers_prune_all_input
#------------------------------------------------------------------------------#
# Beast geo
beast_geo_all_input = expand(results_dir + "/beast/all/{locus_name}/full/filter{missing_data}/beast.nex",
# Beast
beast_all_input = expand(results_dir + "/beast/all/{locus_name}/full/filter{missing_data}/beast.dates.txt",
locus_name=config["reference_locus_name"],
missing_data = config["snippy_missing_data"])
rule beast_geo_all:
rule beast_all:
input:
beast_geo_all_input
beast_all_input

beast_geo_prune_all_input = expand(results_dir + "/beast/all/{locus_name}/prune/filter{missing_data}/beast.nex",
beast_prune_all_input = expand(results_dir + "/beast/all/{locus_name}/prune/filter{missing_data}/beast.dates.txt",
locus_name=config["reference_locus_name"],
missing_data = config["snippy_missing_data"])
rule beast_geo_prune_all:
rule beast_prune_all:
input:
beast_geo_prune_all_input
beast_prune_all_input
#------------------------------------------------------------------------------#
# Plot
#------------------------------------------------------------------------------#
Expand Down
24 changes: 22 additions & 2 deletions workflow/scripts/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,10 @@
"branch_number",
"continent",
"date_mean",
"date_bp_mean",
"date_err",
"lat",
"lon",
]

output_ref_vals = [
Expand All @@ -161,7 +164,10 @@
1,
"North America",
1992,
CURRENT_YEAR - 1992,
0,
38.7251776,
-105.607716,
]


Expand Down Expand Up @@ -241,7 +247,10 @@
"NA", # branch_number [15]
"NA", # continent [16]
"NA", # date Mean [17]
"NA", # date Err [18]
"NA", # date vp Mean [18]
"NA", # date err [19]
"NA", # lat [20]
"NA", # lon [21]
]

if result:
Expand Down Expand Up @@ -272,10 +281,13 @@
date_format = split_date[0]
date_bp_format = -(CURRENT_YEAR - int(date_format))

date_bp_mean = CURRENT_YEAR - date_mean

output_main_vals[2] = date_format
output_main_vals[3] = date_bp_format
output_main_vals[17] = date_mean
output_main_vals[18] = date_err
output_main_vals[18] = date_bp_mean
output_main_vals[19] = date_err

# Location Parsing
location = result[3] # Country:Province
Expand All @@ -294,6 +306,10 @@
output_main_vals[6] = geocode_dict[country_name][0]
output_main_vals[7] = geocode_dict[country_name][1]

# Set lat,lon first to country
output_main_vals[20] = geocode_dict[country_name][0]
output_main_vals[21] = geocode_dict[country_name][1]

# province processing (if exists)
if len(split_location) > 1:
province_name = split_location[1]
Expand All @@ -309,6 +325,10 @@
output_main_vals[8] = geocode_dict[province_name][0]
output_main_vals[9] = geocode_dict[province_name][1]

# Update to province
output_main_vals[20] = geocode_dict[province_name][0]
output_main_vals[21] = geocode_dict[province_name][1]

# continent parsing
output_main_vals[16] = continent_dict[country_name]

Expand Down

0 comments on commit 8fba8a6

Please sign in to comment.