Skip to content

Commit

Permalink
first attempt at fixing eager CI
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed Apr 27, 2021
1 parent 3acb65f commit 3790bdd
Show file tree
Hide file tree
Showing 12 changed files with 317 additions and 287 deletions.
2 changes: 1 addition & 1 deletion results/config/auspice_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "Plague Phylogeography Test",
"build_url": "https://github.com/ktmeaton/plague-phylogeography-projects/tree/main/test",
"build_url": "https://github.com/ktmeaton/plague-phylogeography-projects/tree/tree",
"maintainers": [
{
"name": "Katherine Eaton",
Expand Down
36 changes: 18 additions & 18 deletions results/config/snakemake.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ sqlite_select_command_asm : SELECT
LEFT JOIN Assembly
ON AssemblyBioSampleAccession = BioSampleAccession
WHERE
BioSampleComment LIKE '%KEEP%Assembly%Modern%Placement%'
(BioSampleComment LIKE '%KEEP%Assembly%Modern%'
AND length(AssemblyFTPGenbank) > 0
AND length(BioSampleCollectionDate) > 0)
OR (BioSampleComment LIKE '%KEEP%Assembly%Modern%Outgroup%')

sqlite_select_command_sra : SELECT
BioSampleAccession,
Expand All @@ -21,8 +24,7 @@ sqlite_select_command_sra : SELECT
LEFT JOIN SRA
ON SRABioSampleAccession = BioSampleAccession
WHERE
(SRARunAccession = 'SRR1048902' OR
SRARunAccession = 'SRR1048905')
(BioSampleComment LIKE '%SRA%Test%')

sqlite_select_command_local : SELECT
BioSampleAccession
Expand Down Expand Up @@ -85,28 +87,26 @@ snippy_snp_density : 10
snippy_keep_singleton : "--keep-singleton"
snippy_multi_plot_missing_data:
- 0
- 5
- 10
- 15
- 20
- 25
- 50
- 75
- 100

# IQTREE
iqtree_model: "-m MFP"
#iqtree_model : "-m K3Pu+F+I"
iqtree_seed : "674947425" # keeping it consistent in a config file allows for checkpointing
#iqtree_model: "-m MFP"
iqtree_model : "-m K3Pu+F+I"
iqtree_seed : "47321424" # keeping it consistent in a config file allows for checkpointing
iqtree_runs : 1
iqtree_other : "--ufboot 1000"

# Outgroup Option #1: Reference
iqtree_outgroup : "Reference"
#iqtree_outgroup : "Reference"

# Outgroup Option #2: Basal modern clade
#iqtree_outgroup : "GCA_000323485.1_ASM32348v1_genomic"
#iqtree_outgroup : "GCA_000323485.1_ASM32348v1_genomic,GCA_000323845.1_ASM32384v1_genomic"

# Outgroup Option #3: Basal ancient clade
#iqtree_outgroup : "SAMEA3541826,SAMEA3541827"

# Outgroup Option #4: Other species
#iqtree_outgroup : "GCA_900637475.1_51108_B01_genomic,GCA_000834295.1_ASM83429v1_genomic"
#iqtree_outgroup : "SAMEA3541826"

iqtree_other : "--ufboot 1000"
iqtree_runs : 1
# Outgroup Option #4: Outgroup
iqtree_outgroup : "GCA_900637475.1_51108_B01_genomic,GCA_000834295.1_ASM83429v1_genomic"
Binary file modified results/sqlite_db/yersinia_pestis_db.sqlite
Binary file not shown.
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ wildcard_constraints:
missing_data="([0-9]*)",
locus_name="([A-Z,a-z]*)",


# -----------------------------------------------------------------------------#
# Main Target #
# -----------------------------------------------------------------------------#
Expand Down
14 changes: 8 additions & 6 deletions workflow/rules/alignment.smk
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ rule eager:
cpus=workflow.global_resources["cpus"] if ("cpus" in workflow.global_resources) else 1,
mem_mb=workflow.global_resources["mem_mb"] if ("mem_mb" in workflow.global_resources) else 4000,
shell:
"export NXF_OPTS='-Xms50m -Xmx{resources.mem_mb}m'; "
"python {scripts_dir}/eager_tsv.py --files \"{input.fastq}\" --organism \"{config[organism]}\" --tsv {output.eager_tsv}; "
"cd {results_dir}/eager/{wildcards.reads_origin}/{wildcards.sample}; "
"nextflow \
"""
export NXF_OPTS='-Xms50m -Xmx{resources.mem_mb}m';
python {scripts_dir}/eager_tsv.py --files \"{input.fastq}\" --organism \"{config[organism]}\" --tsv {output.eager_tsv};
cd {results_dir}/eager/{wildcards.reads_origin}/{wildcards.sample};
nextflow \
-c {config_dir}/eager.config \
run nf-core/eager \
-r {config[eager_rev]} \
Expand All @@ -57,8 +58,9 @@ rule eager:
--max_memory {resources.mem_mb}.MB \
--max_time {resources.time_min}m \
{config[eager_other]} \
-resume > {output.log_txt}"
"{scripts_dir}/eager_cleanup.sh {results_dir} {wildcards.reads_origin} {wildcards.sample}; "
-resume > {output.log_txt};
{scripts_dir}/eager_cleanup.sh {results_dir} {wildcards.reads_origin} {wildcards.sample};
"""

# -----------------------------------------------------------------------------#
rule snippy_pairwise:
Expand Down
45 changes: 40 additions & 5 deletions workflow/rules/filter_mask.smk
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ rule snippy_multi_extract:
resources:
cpus = 1,
params:
outdir = lambda wildcards: os.path.join(results_dir, "snippy_multi", wildcards.reads_origin, wildcards.locus_name, "full"),
outdir = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/full/",
shell:
"""
{scripts_dir}/extract_locus.sh \
Expand All @@ -109,7 +109,6 @@ rule snippy_multi_extract:
cp {input.tsv} {output.tsv};
"""


#------------------------------------------------------------------------------#
rule snippy_multi_prune:
"""
Expand All @@ -121,19 +120,27 @@ rule snippy_multi_prune:
dist = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/full/snippy-multi.snps.dist",
output:
aln = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/prune/snippy-multi.snps.aln",
taxa = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/prune/taxa-keep.txt",
tsv = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/prune/metadata.tsv",
log = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/prune/snippy-multi.snps.log",
resources:
cpus = 1,
params:
outdir = lambda wildcards: os.path.join(results_dir, "snippy_multi", wildcards.reads_origin, wildcards.locus_name, "prune"),
outdir = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/prune/",
shell:
"""
python3 {scripts_dir}/prune_alignment.py \
# Identify taxa to remove
python3 {scripts_dir}/prune_taxa.py \
--metadata {input.tsv} \
--matrix {input.dist} \
--aln {input.aln} \
--outdir {params.outdir} > {output.log}
# Filter the taxa out of the alignment
python3 {scripts_dir}/filter_taxa.py \
--metadata {input.tsv} \
--aln {input.aln} \
--keep-tips {output.taxa} \
--outdir {params.outdir}
"""

#------------------------------------------------------------------------------#
Expand All @@ -160,3 +167,31 @@ rule snippy_multi_filter:
--output {output.filter_snp_aln} \
--log {output.log};
"""


rule filter_taxa:
"""
Remove taxa from an alignment based on a tree.
"""
input:
tree = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.nex",
tsv = results_dir + "/metadata/{reads_origin}/metadata.tsv",
taxa = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter-taxa.txt",
aln = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/snippy-multi.snps.aln",
output:
nex = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.nex",
nwk = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.nwk",
tsv = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/metadata.tsv",
aln = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.aln",
params:
taxa = config["iqtree_outgroup"],
outdir = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/",
shell:
"""
workflow/scripts/filter_alignment.py \
--tree {input.tree} \
--aln {input.aln} \
--outdir {params.outdir} \
--metadata {input.tsv} \
--prune-tips {input.taxa}
"""
34 changes: 3 additions & 31 deletions workflow/rules/phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ rule iqtree:
constant_sites = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/full/snippy-multi.constant_sites.txt",
aln = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/snippy-multi.snps.aln",
output:
tree = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/iqtree.nex",
nwk = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/iqtree.treefile",
nex = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/iqtree.nex",
iqtree = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/iqtree.iqtree",
log = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/iqtree.log",
outgroup = results_dir + "/iqtree/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/iqtree.filter-taxa.txt",
Expand Down Expand Up @@ -43,38 +44,9 @@ rule iqtree:
{params.other} \
-redo \
-pre {params.prefix} > {output.log};
{scripts_dir}/newick2nexus.py {params.prefix}.treefile {output.tree}
{scripts_dir}/newick2nexus.py {output.nwk} {output.nex}
"""

rule filter_taxa:
"""
Remove taxa from an alignment based on a tree.
"""
input:
tree = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.nex",
tsv = results_dir + "/metadata/{reads_origin}/metadata.tsv",
taxa = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter-taxa.txt",
aln = results_dir + "/snippy_multi/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/snippy-multi.snps.aln",
output:
nex = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.nex",
nwk = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.nwk",
tsv = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/metadata.tsv",
aln = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/{rule}.filter.aln",
params:
taxa = config["iqtree_outgroup"],
outdir = results_dir + "/{rule}/{reads_origin}/{locus_name}/{prune}/filter{missing_data}/",
shell:
"""
workflow/scripts/filter_alignment.py \
--tree {input.tree} \
--aln {input.aln} \
--outdir {params.outdir} \
--metadata {input.tsv} \
--prune-tips {input.taxa}
"""


rule lsd:
"""
Estimate a time-scaled phylogeny using LSD2 in IQTREE.
Expand Down
1 change: 0 additions & 1 deletion workflow/rules/targets.smk
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ rule snippy_multi_extract_sra:
input:
snippy_multi_extract_sra_input


# -----------------------------------------------------------------------------#
snippy_multi_prune_all_input = expand(results_dir + "/snippy_multi/all/{locus_name}/prune/snippy-multi.snps.aln",
locus_name=config["reference_locus_name"],
Expand Down
Loading

0 comments on commit 3790bdd

Please sign in to comment.