Skip to content

Commit

Permalink
add new rule to filter outgroup from iqtree
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed Apr 27, 2021
1 parent f36ae76 commit 2169f03
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 53 deletions.
8 changes: 5 additions & 3 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,13 @@ rule all:
metadata_all_input,
# Multiqc
#multiqc_all_input,
# Alignment
snippy_multi_filter_prune_all_input,
# Phylo
#iqtree_all_input,
# Post-Phylo
lsd_all_input,
snippy_multi_filter_prune_all_input,
iqtree_filter_all_input,
# Post-Phylo (no lsd2 for gh actions)
#lsd_all_input,
# Plot
plot_missing_data_all_input,
plot_snp_matrix_all_input,
Expand Down
28 changes: 0 additions & 28 deletions workflow/rules/filter_mask.smk
Original file line number Diff line number Diff line change
Expand Up @@ -167,31 +167,3 @@ rule snippy_multi_filter:
--output {output.filter_snp_aln} \
--log {output.log};
"""


rule filter_taxa:
"""
Remove taxa from an alignment based on a tree.
"""
input:
tree = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.nex",
tsv = results_dir + "/metadata/{reads_origin}/metadata.tsv",
taxa = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter-taxa.txt",
aln = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/snippy-multi.snps.aln",
output:
nex = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.nex",
nwk = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.nwk",
tsv = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/metadata.tsv",
aln = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.aln",
params:
taxa = config["iqtree_outgroup"],
outdir = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/",
shell:
"""
workflow/scripts/filter_alignment.py \
--tree {input.tree} \
--aln {input.aln} \
--outdir {params.outdir} \
--metadata {input.tsv} \
--prune-tips {input.taxa}
"""
39 changes: 38 additions & 1 deletion workflow/rules/phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,42 @@ rule iqtree:
{scripts_dir}/newick2nexus.py {output.nwk} {output.nex}
"""

rule iqtree_filter:
"""
Filter IQTREE output to remove the output and sync alignment and metadata.
"""
input:
aln = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/snippy-multi.snps.aln",
tsv = results_dir + "/metadata/{reads_origin}/metadata.tsv",
tree = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/iqtree.treefile",
taxa = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/iqtree.filter-taxa.txt",
output:
taxa_aln = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-taxa/snippy-multi.snps.aln",
taxa_tsv = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-taxa/metadata.tsv",
taxa_tree = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-taxa/iqtree.treefile",
sites_aln = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-sites/snippy-multi.snps.aln",
sites_log = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-sites/snippy-multi.snps.log",
params:
taxa_outdir = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-taxa/",
sites_outdir = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/filter-sites/",
keep_singleton = config["snippy_keep_singleton"],
shell:
"""
python3 {scripts_dir}/filter_taxa.py \
--tree {input.tree} \
--aln {input.aln} \
--outdir {params.taxa_outdir} \
--metadata {input.tsv} \
--prune-tips {input.taxa};
python3 {scripts_dir}/filter_sites.py \
--fasta {output.taxa_aln} \
--missing 100 \
{params.keep_singleton} \
--output {output.sites_aln} \
--log {output.sites_log};
"""

rule lsd:
"""
Estimate a time-scaled phylogeny using LSD2 in IQTREE.
Expand Down Expand Up @@ -97,7 +133,8 @@ rule lsd:
touch {output.taxa}
fi
"""
rule beast_geo:

rule beast:
"""
Continuous phylogeography with BEAST
"""
Expand Down
22 changes: 9 additions & 13 deletions workflow/rules/targets.smk
Original file line number Diff line number Diff line change
Expand Up @@ -302,28 +302,24 @@ rule iqtree_all:
iqtree_all_input

#------------------------------------------------------------------------------#
iqtree_prune_all_input = expand(results_dir + "/iqtree/all/{locus_name}/prune/filter{missing_data}/iqtree.nex",
locus_name=config["reference_locus_name"],
missing_data = config["snippy_missing_data"])
rule iqtree_prune_all:
input:
iqtree_prune_all_input

#------------------------------------------------------------------------------#
# Remove outgroups
remove_outgroup_all_input = expand(results_dir + "/iqtree/all/{locus_name}/full/filter{missing_data}/iqtree.filter.nex",
iqtree_filter_all_input = expand(results_dir + "/iqtree/all/{locus_name}/full/filter{missing_data}/filter-sites/snippy-multi.snps.aln",
locus_name=config["reference_locus_name"],
missing_data = config["snippy_missing_data"])
rule remove_outgroup_all:
rule iqtree_filter_all:
input:
remove_outgroup_all_input
iqtree_filter_all_input

remove_outgroup_prune_all_input = expand(results_dir + "/iqtree/all/{locus_name}/prune/filter{missing_data}/iqtree.filter.nex",


#------------------------------------------------------------------------------#
iqtree_prune_all_input = expand(results_dir + "/iqtree/all/{locus_name}/prune/filter{missing_data}/iqtree.nex",
locus_name=config["reference_locus_name"],
missing_data = config["snippy_missing_data"])
rule remove_outgroup_prune_all:
rule iqtree_prune_all:
input:
remove_outgroup_prune_all_input
iqtree_prune_all_input

#------------------------------------------------------------------------------#
# LSD Dating
Expand Down
7 changes: 2 additions & 5 deletions workflow/scripts/filter_taxa.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,8 @@ def main(
tree_type = "newick"
elif tree_ext == ".nex" or ".nexus":
tree_type = "nexus"
tree_prefix = os.path.splitext(tree_basename)[0]
if prune_tips_path or keep_tips_path:
filter_tree_path = os.path.join(
outdir, tree_prefix + ".filter-taxa" + tree_ext
)
filter_tree_path = os.path.join(outdir, tree_basename)

# -------------------------------------------------------------------------
# Load Metadata
Expand Down Expand Up @@ -153,7 +150,7 @@ def main(
# Get minimum branch length for output precision
min_branch_length = 1e10

for c in tree.find_clades():
for c in tree.get_terminals():
# Prune taxa
if c.name not in keep_taxa:
print("Pruning taxa", c.name, "from the tree.")
Expand Down
6 changes: 3 additions & 3 deletions workflow/scripts/prune_taxa.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,13 @@
if geo_val == "NA":
geo_val = metadata_df[GEO_ALT][sample]

date = metadata_df["date"][sample].lstrip("[").rstrip("]")
date = str(metadata_df["date"][sample]).lstrip("[").rstrip("]")
if date != "NA":
# If it's a range, take the mean
date_split = [int(d) for d in date.split(":")]
date_split = [float(d) for d in date.split(":")]
if len(date_split) > 1:
date = sum(date_split) / len(date_split)
date = int(date)
date = float(date)

# Get the shortest pairwise distance (not to iteself)
snp_diffs = snp_mat_df.loc[sample]
Expand Down

0 comments on commit 2169f03

Please sign in to comment.