From 854e3d5b408004f522b413c72dbbf8c22585feed Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 26 Mar 2024 16:35:20 +0100 Subject: [PATCH] Add MultiQC to snv-indels module --- Snakefile | 3 ++- cfg/multiqc.yml | 3 +++ includes/qc-seq/Snakefile | 2 +- includes/snv-indels/Snakefile | 39 +++++++++++++++++++++++++++++----- includes/snv-indels/common.smk | 30 ++++++++++++++++++++++++++ test/test_hamlet.yml | 5 ++++- test/test_snv_indels.yml | 15 +++++++++++++ 7 files changed, 89 insertions(+), 8 deletions(-) diff --git a/Snakefile b/Snakefile index 57020ff..e152faf 100644 --- a/Snakefile +++ b/Snakefile @@ -210,6 +210,7 @@ rule generate_html_report: rule multiqc: input: qc_stats=qc_seq.module_output.multiqc_files, + snv_indel_stats=align.module_output.multiqc_files, config=srcdir("cfg/multiqc.yml"), params: filelist="multiqc_filelist.txt", @@ -224,7 +225,7 @@ rule multiqc: """ rm -f {params.filelist} - for fname in {input.qc_stats}; do + for fname in {input.qc_stats} {input.snv_indel_stats}; do echo $fname >> {params.filelist} done diff --git a/cfg/multiqc.yml b/cfg/multiqc.yml index 89f266c..1da6f56 100644 --- a/cfg/multiqc.yml +++ b/cfg/multiqc.yml @@ -2,3 +2,6 @@ show_analysis_paths: False sample_names_replace_regex: True sample_names_replace: "(.+) \\| qc-seq \\| (.+).cutadapt.json": "\\1" + "(.+) \\| snv-indels \\| (.+).ReadsPerGene.out.tab": "\\1" + "(.+) \\| snv-indels \\| Log.final.out": "\\1" + "(.+) \\| snv-indels \\| .*": "\\1" diff --git a/includes/qc-seq/Snakefile b/includes/qc-seq/Snakefile index aace0de..81da665 100644 --- a/includes/qc-seq/Snakefile +++ b/includes/qc-seq/Snakefile @@ -100,7 +100,7 @@ rule multiqc: output: html="multiqc_qc_seq.html", log: - "log/multiqc.txt", + "log/qc_seq.multiqc.txt", container: containers["multiqc"] shell: diff --git a/includes/snv-indels/Snakefile b/includes/snv-indels/Snakefile index c78a89a..ce05888 100644 --- a/includes/snv-indels/Snakefile +++ b/includes/snv-indels/Snakefile @@ -1,11 +1,6 @@ include: "common.smk" -# Put each sample name in a SimpleNamespace to mimic Snakemake wildcard usage -# (e.g {wildcards.sample}). This is only used in the 'all' rule. -samples = [SimpleNamespace(sample=sample) for sample in pep.sample_table["sample_name"]] - - localrules: filter_vep_target, filter_vep_high, @@ -20,6 +15,7 @@ rule all: vep_target=[module_output.vep_target(sample) for sample in samples], json=[module_output.json(sample) for sample in samples], hotspot=[module_output.hotspot(sample) for sample in samples], + multiqc="multiqc_snv_indels.html", rule tmpdir: @@ -464,3 +460,36 @@ rule json_output: --exon_cov_stats_path {input.exon_cov_stats} \ --vep_stats_path {input.vep_stats} > {output} 2> {log} """ + + +rule multiqc: + input: + stats=module_output.multiqc_files, + config=srcdir("../../cfg/multiqc.yml"), + params: + filelist="multiqc_filelist.txt", + depth=2, + output: + html="multiqc_snv_indels.html", + log: + "log/snv_indels.multiqc.txt", + container: + containers["multiqc"] + shell: + """ + rm -f {params.filelist} + + for fname in {input.stats}; do + echo $fname >> {params.filelist} + done + + multiqc \ + --force \ + --dirs \ + --dirs-depth {params.depth} \ + --fullnames \ + --fn_as_s_name \ + --file-list {params.filelist} \ + --config {input.config} \ + --filename {output.html} 2> {log} + """ diff --git a/includes/snv-indels/common.smk b/includes/snv-indels/common.smk index 6485d43..d3fe302 100644 --- a/includes/snv-indels/common.smk +++ b/includes/snv-indels/common.smk @@ -12,9 +12,15 @@ containers = { "vep": "docker://quay.io/biocontainers/ensembl-vep:108.2--pl5321h4a94de4_0", "star": "docker://quay.io/biocontainers/star:2.7.10b--h9ee0642_0", "crimson": "docker://quay.io/biocontainers/crimson:1.1.0--pyh5e36f6f_0", + "multiqc": "docker://quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0", } +# Put each sample name in a SimpleNamespace to mimic Snakemake wildcard usage +# (e.g {wildcards.sample}). This is only used in the 'all' rule. +samples = [SimpleNamespace(sample=sample) for sample in pep.sample_table["sample_name"]] + + def get_forward(wildcards): return pep.sample_table.loc[wildcards.sample, "R1"] @@ -48,6 +54,29 @@ def get_hotspot(wildcards): return f"{wildcards.sample}/snv-indels/{wildcards.sample}.hotspot.vcf" +def multiqc_files(): + star_count = [ + f"{wildcards.sample}/snv-indels/{wildcards.sample}.ReadsPerGene.out.tab" + for wildcards in samples + ] + + star_log = [f"{wildcards.sample}/snv-indels/Log.final.out" for wildcards in samples] + + picard_stats = list() + for tool in ["rna_stats", "aln_stats", "insert_stats"]: + picard_stats += [ + f"{wildcards.sample}/snv-indels/{wildcards.sample}.{tool}" + for wildcards in samples + ] + + vep_stats = [ + f"{wildcards.sample}/snv-indels/{wildcards.sample}.vep_stats.txt" + for wildcards in samples + ] + + return star_count + star_log + picard_stats + vep_stats + + module_output = SimpleNamespace( bam=get_bam_output, bai=get_bai_output, @@ -55,4 +84,5 @@ module_output = SimpleNamespace( vep_target=get_vep_target, json=get_json, hotspot=get_hotspot, + multiqc_files=multiqc_files(), ) diff --git a/test/test_hamlet.yml b/test/test_hamlet.yml index 5b5a453..307a69a 100644 --- a/test/test_hamlet.yml +++ b/test/test_hamlet.yml @@ -156,7 +156,10 @@ - "sample\tboundary_type\tfuzziness" - "SRR8615409\tfuzzy-end\t1" - path: "multiqc_hamlet.html" - + contains: + - "VEP" + - "STAR" + - "Picard" - name: lint-hamlet tags: diff --git a/test/test_snv_indels.yml b/test/test_snv_indels.yml index 063bced..b1ad26a 100644 --- a/test/test_snv_indels.yml +++ b/test/test_snv_indels.yml @@ -21,6 +21,7 @@ - "TestSample3/snv-indels/TestSample3.vep.high.txt.gz" - "TestSample3/snv-indels/TestSample3.hotspot.vcf" + - "rule multiqc" contains_regex: - "STAR .* \"ID:TestSample1\" \"SM:TestSample1\" .* --readFilesIn 'test/data/fastq/SRR8615409 chrM_1.fastq.gz' 'test/data/fastq/SRR8615409 chrM_2.fastq.gz'" # Test that params.min_intron_size is used @@ -118,6 +119,20 @@ contains: - "allele_string" + # Test that file names are set correctly in MultiQC report + - path: multiqc_snv_indels.html + must_not_contain: + - "ReadsPerGene.out.tab" + - "Log.final.out" + - "aln_stats" + - "insert_stats" + - "rna_stats" + - "vep_stats.txt" + contains: + - "VEP" + - "STAR" + - "Picard" + # VEP should switch to offline mode when "vep_cache" is specified - name: test-chrM-vep-cache tags: