From f36ae76e53b8255a9b78be9cbfb2207e8075c81f Mon Sep 17 00:00:00 2001 From: Katherine Eaton Date: Tue, 27 Apr 2021 12:47:31 -0400 Subject: [PATCH] fix typo in eager_tsv script --- results/config/snakemake.yaml | 6 +++--- workflow/Snakefile | 5 +++-- workflow/rules/alignment.smk | 4 ++-- workflow/scripts/eager_tsv.py | 4 ++++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/results/config/snakemake.yaml b/results/config/snakemake.yaml index 77c64a938..131042bd4 100644 --- a/results/config/snakemake.yaml +++ b/results/config/snakemake.yaml @@ -14,7 +14,7 @@ sqlite_select_command_asm : SELECT (BioSampleComment LIKE '%KEEP%Assembly%Modern%' AND length(AssemblyFTPGenbank) > 0 AND length(BioSampleCollectionDate) > 0) - OR (BioSampleComment LIKE '%KEEP%Assembly%Modern%Outgroup%') + sqlite_select_command_sra : SELECT BioSampleAccession, @@ -100,7 +100,7 @@ iqtree_runs : 1 iqtree_other : "--ufboot 1000" # Outgroup Option #1: Reference -#iqtree_outgroup : "Reference" +iqtree_outgroup : "Reference" # Outgroup Option #2: Basal modern clade #iqtree_outgroup : "GCA_000323485.1_ASM32348v1_genomic,GCA_000323845.1_ASM32384v1_genomic" @@ -109,4 +109,4 @@ iqtree_other : "--ufboot 1000" #iqtree_outgroup : "SAMEA3541826" # Outgroup Option #4: Outgroup -iqtree_outgroup : "GCA_900637475.1_51108_B01_genomic,GCA_000834295.1_ASM83429v1_genomic" +#iqtree_outgroup : "GCA_900637475.1_51108_B01_genomic,GCA_000834295.1_ASM83429v1_genomic" diff --git a/workflow/Snakefile b/workflow/Snakefile index 448f0ed33..4cba2e8ad 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -114,9 +114,10 @@ rule all: # Multiqc #multiqc_all_input, # Phylo - iqtree_all_input, + #iqtree_all_input, # Post-Phylo - #lsd_all_input, + lsd_all_input, + snippy_multi_filter_prune_all_input, # Plot plot_missing_data_all_input, plot_snp_matrix_all_input, diff --git a/workflow/rules/alignment.smk b/workflow/rules/alignment.smk index 114169a3c..575d61473 100644 --- a/workflow/rules/alignment.smk +++ b/workflow/rules/alignment.smk @@ -21,7 +21,7 @@ rule eager: final_bam = results_dir + "/eager/{reads_origin}/{sample}/final_bams/{sample}.bam", eager_tsv = results_dir + "/eager/{reads_origin}/{sample}/metadata_{sample}.tsv", log_html = results_dir + "/eager/{reads_origin}/{sample}/{sample}.html", - #log_txt = results_dir + "/eager/{reads_origin}/{sample}/{sample}.log", + log_txt = results_dir + "/eager/{reads_origin}/{sample}/{sample}.log", wildcard_constraints: reads_origin = "(sra|local)", resources: @@ -58,7 +58,7 @@ rule eager: --max_memory {resources.mem_mb}.MB \ --max_time {resources.time_min}m \ {config[eager_other]} \ - -resume; + -resume > {output.log_txt}; {scripts_dir}/eager_cleanup.sh {results_dir} {wildcards.reads_origin} {wildcards.sample}; """ diff --git a/workflow/scripts/eager_tsv.py b/workflow/scripts/eager_tsv.py index 06125a331..bfa25e178 100755 --- a/workflow/scripts/eager_tsv.py +++ b/workflow/scripts/eager_tsv.py @@ -86,6 +86,10 @@ sample_file_dict[sample_name] = {} # Iterate through the single or paired fastq files for library_file in sample_files: + # Catch random log files + library_ext = os.path.splitext(library_file)[1] + if library_ext != ".fastq" and library_ext != ".gz": + continue library_id = library_file.split("_")[0] if library_id not in sample_file_dict[sample_name]: sample_file_dict[sample_name][library_id] = []