Skip to content

Commit

Permalink
add table plot
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed Oct 8, 2020
1 parent f653b0b commit 3ecb83b
Show file tree
Hide file tree
Showing 21 changed files with 777 additions and 93 deletions.
14 changes: 11 additions & 3 deletions config/multiqc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ report_comment: >
analysis pipeline. For information about how to interpret these results, please see the
<a href="https://plague-phylogeography.readthedocs.io/en/latest/" target="_blank">documentation</a>.
top_modules:
- 'qualimap'

qualimap_config:
general_stats_coverage:
- 1
Expand All @@ -28,5 +25,16 @@ table_columns_visible:
total_reads: False
mapped_reads: True
general_error_rate: True
snippy:
Percent_Aligned: True
Percent_Het: True
Length: False
Aligned: False
Unaligned: False
Variant: False
Het: False
Masked: False
Lowcov: False


decimalPoint_format: '.'
12 changes: 11 additions & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ include: rules_dir + "/qc.smk"
include: rules_dir + "/filter_mask.smk"
include: rules_dir + "/functions.smk"
include: rules_dir + "/targets.smk"
include: rules_dir + "/test.smk"

# Report file
report: report_dir + "/workflow.rst"
Expand All @@ -62,8 +63,17 @@ rule all:
The default pipeline targets.
"""
input:
# Multiqc
results_dir + "/multiqc/multiqc_report.html",
results_dir + "/iqtree/iqtree.core-filter" + str(config['snippy_missing_data']) + ".treefile",
# IQTREE
expand(results_dir + "/iqtree/iqtree.core-{locus_name}.filter{missing_data}.treefile",
locus_name=config["reference_locus_name"],
missing_data=config['snippy_missing_data']),
# Tables of downloaded data
results_dir + "/data/assembly/table_assembly_fna.pdf",
results_dir + "/data/reference/table_reference_fna.pdf",
results_dir + "/data/local/table_local_fastq-gz.pdf",
results_dir + "/data/sra/table_sra_fastq-gz.pdf",

# -----------------------------------------------------------------------------#
# Help and Usage #
Expand Down
9 changes: 9 additions & 0 deletions workflow/envs/plot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
channels:
- bioconda
- conda-forge
- anaconda
- defaults
dependencies:
- matplotlib=3.3.2
- pandas=1.1.2
- numpy=1.19.1
5 changes: 4 additions & 1 deletion workflow/envs/qc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ channels:
dependencies:
- samtools=1.9
- qualimap=2.2.2d
- multiqc=1.9
#- multiqc=1.9
- pip=20.2.3
- pip:
- git+https://github.com/ktmeaton/MultiQC.git@snippy
1 change: 1 addition & 0 deletions workflow/report/download/table_assembly_fna.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Table of downloaded assembly fasta files.
1 change: 1 addition & 0 deletions workflow/report/download/table_sra_fastq-gz.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Table of downloaded SRA fastq files.
File renamed without changes.
1 change: 1 addition & 0 deletions workflow/report/qualimap_coverage_hist_plot.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Qualimap coverage histogram.
1 change: 1 addition & 0 deletions workflow/report/qualimap_gc_plot.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Qualimap GC content distribution.
1 change: 1 addition & 0 deletions workflow/report/qualimap_genome_fraction_plot.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Qualimap genome fraction covered by at least X reads.
567 changes: 500 additions & 67 deletions workflow/report/report.html

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions workflow/report/snippy_multi_plot.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Snippy-core alignment statistics.
1 change: 1 addition & 0 deletions workflow/report/snippy_pairwise_plot.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Snippy pairwise variant statistics.
16 changes: 8 additions & 8 deletions workflow/rules/alignment.smk
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ rule snippy_pairwise:
filename=wildcards.sample + ".fna" if "assembly" in wildcards.reads_origin else "final_bams/" + wildcards.sample + ".bam"),
ref = expand(results_dir + "/data/reference/{reference}/{reference}.gbff", reference=identify_reference_sample()),
output:
snippy_dir = directory(results_dir + "/snippy_pairwise/{reads_origin}/{sample}"),
snp_txt = results_dir + "/snippy_pairwise/{reads_origin}/{sample}/{sample}_snippy.txt",
snippy_aln = results_dir + "/snippy_pairwise/{reads_origin}/{sample}/{sample}_snippy.aligned.fa",
snps_vcf = results_dir + "/snippy_pairwise/{reads_origin}/{sample}/{sample}_snippy.subs.vcf",
snippy_dir = directory(results_dir + "/snippy_pairwise/{reads_origin}/{sample}/"),
snp_txt = results_dir + "/snippy_pairwise/{reads_origin}/{sample}/{sample}.txt",
snippy_aln = results_dir + "/snippy_pairwise/{reads_origin}/{sample}/{sample}.aligned.fa",
snps_vcf = results_dir + "/snippy_pairwise/{reads_origin}/{sample}/{sample}.subs.vcf",
log:
os.path.join(logs_dir, "snippy_pairwise", "{reads_origin}", "{sample}.log")
wildcard_constraints:
Expand All @@ -78,7 +78,7 @@ rule snippy_pairwise:
shell:
"if [[ {wildcards.reads_origin} == 'assembly' ]]; then \
snippy \
--prefix {wildcards.sample}_snippy \
--prefix {wildcards.sample} \
--reference {input.ref} \
--outdir {output.snippy_dir} \
--ctgs {input.data} \
Expand All @@ -91,7 +91,7 @@ rule snippy_pairwise:
--report 2> {log}; \
else \
snippy \
--prefix {wildcards.sample}_snippy \
--prefix {wildcards.sample} \
--reference {input.ref} \
--outdir {output.snippy_dir} \
--bam {input.data} \
Expand All @@ -110,7 +110,7 @@ rule snippy_multi:
Peform a multiple alignment from pairwise output (assembly, sra, and local).
"""
input:
snippy_asm_dir = expand(results_dir + "/snippy_pairwise/assembly/{sample}", sample=identify_assembly_sample()),
snippy_asm_dir = expand(results_dir + "/snippy_pairwise/assembly/{sample}/", sample=identify_assembly_sample()),
#snippy_sra_dir = expand(results_dir + "/snippy_pairwise_sra/{sample}", sample=identify_sra_sample()),
#snippy_local_dir = expand(results_dir + "/snippy_pairwise_local/{sample}", sample=identify_local_sample()),
ref_fna = expand(results_dir + "/data/reference/{sample}/{sample}.fna",
Expand All @@ -128,7 +128,7 @@ rule snippy_multi:
report(results_dir + "/snippy_multi/snippy-core.txt",
caption=os.path.join(report_dir,"snippy_multi.rst"),
category="Alignment",
subcategory="Snippy"),
subcategory="Snippy Multi"),
snp_aln = results_dir + "/snippy_multi/snippy-core.aln",
full_aln = results_dir + "/snippy_multi/snippy-core.full.aln",
log:
Expand Down
41 changes: 41 additions & 0 deletions workflow/rules/download.smk
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
include: "functions.smk"
import os
import itertools # Chaining list of lists of file accessions

# -----------------------------------------------------------------------------#
# Data Download #
Expand Down Expand Up @@ -55,3 +56,43 @@ rule download_assembly:
# Remove ver number in fasta headers if reference
if wildcards.reads_origin == "reference" and (wildcards.ext == "fna" or wildcards.ext == "gff"):
shell("python {scripts_dir}/rename_headers.py --file {output.file}; ")

rule table_assembly:
"""
Plot a table of all downloaded assemblies.
"""
input:
fna = lambda wildcards: expand(results_dir + "/data/{{reads_origin}}/{sample}/{sample}.fna",
sample=identify_assembly_sample() if wildcards.reads_origin == "assembly" else identify_reference_sample()),
output:
report(results_dir + "/data/{reads_origin}/table_{reads_origin}_fna.pdf",
caption=os.path.join(report_dir,"download","table_a{reads_origin}_fna.rst"),
category="Download",
subcategory="{reads_origin}"),
conda:
os.path.join(envs_dir,"plot.yaml")
shell:
"python workflow/scripts/plot_table.py --indir {results_dir}/data/{wildcards.reads_origin} --outdir {results_dir}/data/{wildcards.reads_origin} --ext fna; "

rule table_fastq:
"""
Plot a table of all downloaded SRA fastq.gz.
"""
input:
sra_fastq = lambda wildcards: expand(results_dir + "/data/{{reads_origin}}/{sample}/{file_acc}_1.fastq.gz",
zip,
sample=list(itertools.chain.from_iterable(
[[key] * len(globals()["identify_" + wildcards.reads_origin + "_sample"]()[key]) for key in globals()["identify_" + wildcards.reads_origin + "_sample"]()]
)
),
file_acc=list(itertools.chain.from_iterable(globals()["identify_" + wildcards.reads_origin + "_sample"]().values()))
)
output:
report(results_dir + "/data/{reads_origin}/table_{reads_origin}_fastq-gz.pdf",
caption=os.path.join(report_dir,"download","table_{reads_origin}_fastq-gz.rst"),
category="Download",
subcategory="{reads_origin}"),
conda:
os.path.join(envs_dir,"plot.yaml")
shell:
"python workflow/scripts/plot_table.py --indir {results_dir}/data/{wildcards.reads_origin} --outdir {results_dir}/data/{wildcards.reads_origin} --ext fastq.gz; "
10 changes: 5 additions & 5 deletions workflow/rules/filter_mask.smk
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ rule detect_snp_density:
Detect regions of high SNP density.
"""
input:
vcf = results_dir + "/snippy_pairwise/{reads_origin}/{sample}/{sample}_snippy.subs.vcf",
vcf = results_dir + "/snippy_pairwise/{reads_origin}/{sample}/{sample}.subs.vcf",
output:
snpden = expand(results_dir + "/detect_snp_density/{{reads_origin}}/{{sample}}_snippy.subs.snpden{density}",
snpden = expand(results_dir + "/detect_snp_density/{{reads_origin}}/{{sample}}.subs.snpden{density}",
density=config["snippy_snp_density"])
wildcard_constraints:
reads_origin = "(assembly|sra|local)",
Expand All @@ -74,13 +74,13 @@ rule merge_snp_density:
Merge filter bed files.
"""
input:
asm = expand(results_dir + "/detect_snp_density/assembly/{sample}_snippy.subs.snpden{density}",
asm = expand(results_dir + "/detect_snp_density/assembly/{sample}.subs.snpden{density}",
sample=identify_assembly_sample(),
density=config["snippy_snp_density"]),
sra = expand(results_dir + "/detect_snp_density/sra/{sample}_snippy.subs.snpden{density}",
sra = expand(results_dir + "/detect_snp_density/sra/{sample}.subs.snpden{density}",
sample=identify_sra_sample(),
density=config["snippy_snp_density"]),
local = expand(results_dir + "/detect_snp_density/local/{sample}_snippy.subs.snpden{density}",
local = expand(results_dir + "/detect_snp_density/local/{sample}.subs.snpden{density}",
sample=identify_local_sample(),
density=config["snippy_snp_density"]),
output:
Expand Down
1 change: 1 addition & 0 deletions workflow/rules/functions.smk
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def identify_local_sample():
data_dir = os.path.join(results_dir, "data", "local")
for dir in os.listdir(data_dir):
sample_dir = os.path.join(data_dir, dir)
if not os.path.isdir(sample_dir): continue
for file in os.listdir(sample_dir):
if "_1.fastq.gz" in file:
biosample = dir
Expand Down
4 changes: 3 additions & 1 deletion workflow/rules/phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ rule iqtree:
caption=os.path.join(report_dir,"iqtree.rst"),
category="Phylogenetics",
subcategory="IQTREE"),
tree = expand(results_dir + "/iqtree/iqtree.core-filter{missing_data}.treefile", missing_data = config["snippy_missing_data"]),
tree = expand(results_dir + "/iqtree/iqtree.core-{locus_name}.filter{missing_data}.treefile",
locus_name=config["reference_locus_name"],
missing_data = config["snippy_missing_data"]),
params:
#seed = random.randint(0, 99999999),
seed = config["iqtree_seed"]
Expand Down
41 changes: 38 additions & 3 deletions workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ rule qualimap:
log:
os.path.join(logs_dir, "qualimap", "{reads_origin}", "{sample}.log")
shell:
"samtools view -b -q {config[snippy_map_qual]} {input.snippy_dir}/{wildcards.sample}_snippy.bam > {output.bamq}; "
"samtools view -b -q {config[snippy_map_qual]} {input.snippy_dir}/{wildcards.sample}.bam > {output.bamq}; "
"qualimap bamqc -bam {output.bamq} --skip-duplicated -c -outformat 'HTML' -outdir {output.dir} -nt {resources.cpus} 1> {log}; "


Expand All @@ -29,11 +29,35 @@ rule multiqc:
qualimap_asm_dir = expand(results_dir + "/qualimap/assembly/{sample}", sample=identify_assembly_sample()),
qualimap_local_dir = expand(results_dir + "/qualimap/local/{sample}", sample=identify_local_sample()),
qualimap_sra_dir = expand(results_dir + "/qualimap/sra/{sample}", sample=identify_sra_sample()),
snippy_multi_txt = results_dir + "/snippy_multi/snippy-core.txt",
snippy_asm_dir = expand(results_dir + "/snippy_pairwise/assembly/{sample}/", sample=identify_assembly_sample()),
snippy_local_dir = expand(results_dir + "/snippy_pairwise/local/{sample}/", sample=identify_local_sample()),
snippy_sra_dir = expand(results_dir + "/snippy_pairwise/sra/{sample}/", sample=identify_sra_sample()),
output:
report(results_dir + "/multiqc/multiqc_report.html",
caption=os.path.join(report_dir,"multiqc.rst"),
caption=os.path.join(report_dir,"multiqc_report.rst"),
category="Quality Control",
subcategory="MultiQC"),
report(results_dir + "/multiqc/multiqc_plots/pdf/mqc_snippy_core_alignment_1.pdf",
caption=os.path.join(report_dir,"snippy_multi_plot.rst"),
category="Alignment",
subcategory="Snippy Multi"),
report(results_dir + "/multiqc/multiqc_plots/pdf/mqc_snippy_variants_1.pdf",
caption=os.path.join(report_dir,"snippy_pairwise_plot.rst"),
category="Alignment",
subcategory="Snippy Pairwise"),
report(results_dir + "/multiqc/multiqc_plots/pdf/mqc_qualimap_gc_content_1.pdf",
caption=os.path.join(report_dir,"qualimap_gc_plot.rst"),
category="Post-Alignment",
subcategory="Qualimap"),
report(results_dir + "/multiqc/multiqc_plots/pdf/mqc_qualimap_genome_fraction_1.pdf",
caption=os.path.join(report_dir,"qualimap_genome_fraction_plot.rst"),
category="Post-Alignment",
subcategory="Qualimap"),
report(results_dir + "/multiqc/multiqc_plots/pdf/mqc_qualimap_coverage_histogram_1.pdf",
caption=os.path.join(report_dir,"qualimap_coverage_hist_plot.rst"),
category="Post-Alignment",
subcategory="Qualimap"),
dir = directory(results_dir + "/multiqc/"),
conda:
os.path.join(envs_dir,"qc.yaml")
Expand All @@ -42,4 +66,15 @@ rule multiqc:
resources:
cpus = 1,
shell:
"multiqc -c {config_dir}/multiqc.yaml --outdir {output.dir} --force {input.qualimap_asm_dir} {input.qualimap_local_dir} {input.qualimap_sra_dir} 2> {log}"
"multiqc \
-c {config_dir}/multiqc.yaml \
--export \
--outdir {output.dir} \
--force \
{input.qualimap_asm_dir} \
{input.qualimap_local_dir} \
{input.qualimap_sra_dir} \
{input.snippy_multi_txt} \
{input.snippy_asm_dir} \
{input.snippy_local_dir} \
{input.snippy_sra_dir} 2> {log}"
28 changes: 24 additions & 4 deletions workflow/rules/targets.smk
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ rule test_eager_local:

rule test_snippy_pairwise_assembly:
input:
expand(results_dir + "/snippy_pairwise/assembly/{sample}/{sample}_snippy.aligned.fa",
expand(results_dir + "/snippy_pairwise/assembly/{sample}/{sample}.aligned.fa",
sample=identify_assembly_sample())

rule test_snippy_pairwise_local:
input:
expand(results_dir + "/snippy_pairwise/local/{sample}/{sample}_snippy.aligned.fa",
expand(results_dir + "/snippy_pairwise/local/{sample}/{sample}.aligned.fa",
sample=identify_local_sample())

rule test_snippy_pairwise_sra:
input:
expand(results_dir + "/snippy_pairwise/sra/{sample}/{sample}_snippy.aligned.fa",
expand(results_dir + "/snippy_pairwise/sra/{sample}/{sample}.aligned.fa",
sample=identify_sra_sample())

rule test_snippy_multi:
Expand All @@ -89,7 +89,7 @@ rule test_detect_low_complexity:

rule test_detect_snp_density:
input:
expand(results_dir + "/snippy_pairwise/assembly/{sample}/{sample}_snippy.subs.snpden",
expand(results_dir + "/snippy_pairwise/assembly/{sample}/{sample}.subs.snpden",
sample=identify_assembly_sample())

rule test_snippy_multi_extract:
Expand Down Expand Up @@ -124,3 +124,23 @@ rule test_qualimap:
rule test_multiqc:
input:
results_dir + "/multiqc/multiqc_report.html"

#------------------------------------------------------------------------------#
# Plot
#------------------------------------------------------------------------------#

rule test_table_assembly:
input:
results_dir + "/data/assembly/table_assembly_fna.pdf",

rule test_table_assembly_reference:
input:
results_dir + "/data/reference/table_reference_fna.pdf",

rule test_table_fastq_local:
input:
results_dir + "/data/local/table_local_fastq-gz.pdf",

rule test_table_fastq_sra:
input:
results_dir + "/data/sra/table_sra_fastq-gz.pdf",
Loading

0 comments on commit 3ecb83b

Please sign in to comment.