Skip to content

Commit

Permalink
Add MultiQC to snv-indels module
Browse files Browse the repository at this point in the history
  • Loading branch information
Redmar-van-den-Berg committed Mar 26, 2024
1 parent 7f47c16 commit 854e3d5
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 8 deletions.
3 changes: 2 additions & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ rule generate_html_report:
rule multiqc:
input:
qc_stats=qc_seq.module_output.multiqc_files,
snv_indel_stats=align.module_output.multiqc_files,
config=srcdir("cfg/multiqc.yml"),
params:
filelist="multiqc_filelist.txt",
Expand All @@ -224,7 +225,7 @@ rule multiqc:
"""
rm -f {params.filelist}
for fname in {input.qc_stats}; do
for fname in {input.qc_stats} {input.snv_indel_stats}; do
echo $fname >> {params.filelist}
done
Expand Down
3 changes: 3 additions & 0 deletions cfg/multiqc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@ show_analysis_paths: False
sample_names_replace_regex: True
sample_names_replace:
"(.+) \\| qc-seq \\| (.+).cutadapt.json": "\\1"
"(.+) \\| snv-indels \\| (.+).ReadsPerGene.out.tab": "\\1"
"(.+) \\| snv-indels \\| Log.final.out": "\\1"
"(.+) \\| snv-indels \\| .*": "\\1"
2 changes: 1 addition & 1 deletion includes/qc-seq/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ rule multiqc:
output:
html="multiqc_qc_seq.html",
log:
"log/multiqc.txt",
"log/qc_seq.multiqc.txt",
container:
containers["multiqc"]
shell:
Expand Down
39 changes: 34 additions & 5 deletions includes/snv-indels/Snakefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
include: "common.smk"


# Put each sample name in a SimpleNamespace to mimic Snakemake wildcard usage
# (e.g {wildcards.sample}). This is only used in the 'all' rule.
samples = [SimpleNamespace(sample=sample) for sample in pep.sample_table["sample_name"]]


localrules:
filter_vep_target,
filter_vep_high,
Expand All @@ -20,6 +15,7 @@ rule all:
vep_target=[module_output.vep_target(sample) for sample in samples],
json=[module_output.json(sample) for sample in samples],
hotspot=[module_output.hotspot(sample) for sample in samples],
multiqc="multiqc_snv_indels.html",


rule tmpdir:
Expand Down Expand Up @@ -464,3 +460,36 @@ rule json_output:
--exon_cov_stats_path {input.exon_cov_stats} \
--vep_stats_path {input.vep_stats} > {output} 2> {log}
"""


rule multiqc:
input:
stats=module_output.multiqc_files,
config=srcdir("../../cfg/multiqc.yml"),
params:
filelist="multiqc_filelist.txt",
depth=2,
output:
html="multiqc_snv_indels.html",
log:
"log/snv_indels.multiqc.txt",
container:
containers["multiqc"]
shell:
"""
rm -f {params.filelist}
for fname in {input.stats}; do
echo $fname >> {params.filelist}
done
multiqc \
--force \
--dirs \
--dirs-depth {params.depth} \
--fullnames \
--fn_as_s_name \
--file-list {params.filelist} \
--config {input.config} \
--filename {output.html} 2> {log}
"""
30 changes: 30 additions & 0 deletions includes/snv-indels/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,15 @@ containers = {
"vep": "docker://quay.io/biocontainers/ensembl-vep:108.2--pl5321h4a94de4_0",
"star": "docker://quay.io/biocontainers/star:2.7.10b--h9ee0642_0",
"crimson": "docker://quay.io/biocontainers/crimson:1.1.0--pyh5e36f6f_0",
"multiqc": "docker://quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0",
}


# Put each sample name in a SimpleNamespace to mimic Snakemake wildcard usage
# (e.g {wildcards.sample}). This is only used in the 'all' rule.
samples = [SimpleNamespace(sample=sample) for sample in pep.sample_table["sample_name"]]


def get_forward(wildcards):
return pep.sample_table.loc[wildcards.sample, "R1"]

Expand Down Expand Up @@ -48,11 +54,35 @@ def get_hotspot(wildcards):
return f"{wildcards.sample}/snv-indels/{wildcards.sample}.hotspot.vcf"


def multiqc_files():
star_count = [
f"{wildcards.sample}/snv-indels/{wildcards.sample}.ReadsPerGene.out.tab"
for wildcards in samples
]

star_log = [f"{wildcards.sample}/snv-indels/Log.final.out" for wildcards in samples]

picard_stats = list()
for tool in ["rna_stats", "aln_stats", "insert_stats"]:
picard_stats += [
f"{wildcards.sample}/snv-indels/{wildcards.sample}.{tool}"
for wildcards in samples
]

vep_stats = [
f"{wildcards.sample}/snv-indels/{wildcards.sample}.vep_stats.txt"
for wildcards in samples
]

return star_count + star_log + picard_stats + vep_stats


module_output = SimpleNamespace(
bam=get_bam_output,
bai=get_bai_output,
vep_high=get_vep_high,
vep_target=get_vep_target,
json=get_json,
hotspot=get_hotspot,
multiqc_files=multiqc_files(),
)
5 changes: 4 additions & 1 deletion test/test_hamlet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,10 @@
- "sample\tboundary_type\tfuzziness"
- "SRR8615409\tfuzzy-end\t1"
- path: "multiqc_hamlet.html"

contains:
- "VEP"
- "STAR"
- "Picard"

- name: lint-hamlet
tags:
Expand Down
15 changes: 15 additions & 0 deletions test/test_snv_indels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- "TestSample3/snv-indels/TestSample3.vep.high.txt.gz"

- "TestSample3/snv-indels/TestSample3.hotspot.vcf"
- "rule multiqc"
contains_regex:
- "STAR .* \"ID:TestSample1\" \"SM:TestSample1\" .* --readFilesIn 'test/data/fastq/SRR8615409 chrM_1.fastq.gz' 'test/data/fastq/SRR8615409 chrM_2.fastq.gz'"
# Test that params.min_intron_size is used
Expand Down Expand Up @@ -118,6 +119,20 @@
contains:
- "allele_string"

# Test that file names are set correctly in MultiQC report
- path: multiqc_snv_indels.html
must_not_contain:
- "ReadsPerGene.out.tab"
- "Log.final.out"
- "aln_stats"
- "insert_stats"
- "rna_stats"
- "vep_stats.txt"
contains:
- "VEP"
- "STAR"
- "Picard"

# VEP should switch to offline mode when "vep_cache" is specified
- name: test-chrM-vep-cache
tags:
Expand Down

0 comments on commit 854e3d5

Please sign in to comment.