From 1d26fa9981466869606578140ffe1a0084cf7ad9 Mon Sep 17 00:00:00 2001 From: Fredrik Boulund Date: Fri, 15 Mar 2024 14:22:31 +0100 Subject: [PATCH 1/3] Remove mention of assembly workflow from config --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6c0be8..db13bde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ situations. ### Deprecated ### Removed -- Removed mentions of assembly workflow from docs. +- Removed mentions of assembly workflow from docs and config. ## [0.7.0] 2023-06-13 From 6d7849f6702bf5fd6f1a7b461d61cd773b8d0cc2 Mon Sep 17 00:00:00 2001 From: Fredrik Boulund Date: Fri, 15 Mar 2024 15:31:49 +0100 Subject: [PATCH 2/3] Change to config[containers] everywhere, and remove unneeded str conversions --- config/config.yaml | 27 ++-- workflow/Snakefile | 6 - workflow/envs/humann.yaml | 2 +- workflow/envs/krakenuniq.yaml | 2 +- workflow/envs/metaphlan.yaml | 2 +- workflow/envs/stag-mwc.yaml | 19 ++- .../rules/functional_profiling/humann.smk | 7 +- workflow/rules/mappers/bbmap.smk | 28 ++-- workflow/rules/mappers/bowtie2.smk | 34 ++--- workflow/rules/multiqc/multiqc.smk | 9 +- workflow/rules/naive/bbcountunique.smk | 33 +++-- workflow/rules/naive/sketch_compare.smk | 22 ++-- workflow/rules/preproc/host_removal.smk | 14 +- .../rules/preproc/preprocessing_summary.smk | 2 +- workflow/rules/preproc/read_quality.smk | 2 +- workflow/rules/taxonomic_profiling/kaiju.smk | 13 +- .../rules/taxonomic_profiling/kraken2.smk | 124 +++++++++--------- .../rules/taxonomic_profiling/krakenuniq.smk | 14 +- .../rules/taxonomic_profiling/metaphlan.smk | 10 +- .../rules/taxonomic_profiling/strainphlan.smk | 10 +- 20 files changed, 200 insertions(+), 180 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index ba9a8ce..b77a675 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -17,7 +17,6 @@ input_fn_pattern: "{sample}_{readpair}.fq.gz" samplesheet: "" # Three-column samplesheet with sample_id,fastq_1,fastq_2 columns. Used instead of inputdir outdir: "output_dir" logdir: "output_dir/logs" -dbdir: "databases" # Databases will be downloaded to this dir report: "StaG_report-" # Filename prefix for report file ("-{datetime}.html" automatically appended) email: "" # Email to send status message after completed/failed run. @@ -29,6 +28,25 @@ s3_endpoint_url: "https://s3.ki.se" # Use https://s3.amazonaws.com for Amazon S keep_local: False # Keep local copies of remote input files, default False. +######################### +# Container images +######################### +containers: + bbmap: "docker://quay.io/biocontainers/bbmap:39.06--h92535d8_0" + bowtie2: "docker://quay.io/biocontainers/bowtie2:2.5.1--py38he00c5e5_2" + bracken: "docker://quay.io/biocontainers/bracken:2.9--py39h1f90b4d_0" + fastp: "docker://quay.io/biocontainers/fastp:0.23.4--hadf994f_2" + humann: "docker://quay.io/biocontainers/humann:3.8--pyh7cba7a3_0" + kaiju: "docker://quai.io/biocontainers/kaiju:1.10.1--h43eeafb_0" + kraken2: "docker://quay.io/biocontainers/kraken2:2.1.3--pl5321hdcf5f25_0" + krakenuniq: "docker://quay.io/biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_1" + krona: "docker://quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1" + metaphlan: "docker://quay.io/biocontainers/metaphlan:4.1.0--pyhca03a8a_0" + multiqc: "docker://quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0" + samtools: "docker://quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + stag: "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc-develop" + + ######################### # Pipeline steps included ######################### @@ -52,8 +70,6 @@ functional_profile: mappers: bbmap: False bowtie2: False -assembly: False -binning: False ######################### @@ -190,8 +206,3 @@ bowtie2: attribute_type: "" # Attribute type to summarize counts for, default is "gene_id" (any attribute in the GTF file's attribute field can be used) extra: "" # Extra featureCount command line parameters - -######################### -# Assembly -######################### -# Assembly workflow was removed in StaG v0.7.0 diff --git a/workflow/Snakefile b/workflow/Snakefile index e4f0b47..95a8c3d 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -22,7 +22,6 @@ from scripts.common import UserMessages, SampleSheet user_messages = UserMessages() stag_version = "0.7.1" -singularity_branch_tag = "-develop" # Replace with "-master" before publishing new version configfile: "config/config.yaml" report: "report/workflow.rst" @@ -31,7 +30,6 @@ citations = {publications["StaG"], publications["Snakemake"]} INPUTDIR = Path(config["inputdir"]) OUTDIR = Path(config["outdir"]) LOGDIR = Path(config["logdir"]) -DBDIR = Path(config["dbdir"]) all_outputs = [] if config["samplesheet"]: @@ -100,10 +98,6 @@ include: "rules/functional_profiling/humann.smk" include: "rules/mappers/bbmap.smk" include: "rules/mappers/bowtie2.smk" -############################# -# Assembly -############################# - ############################# # MultiQC ############################# diff --git a/workflow/envs/humann.yaml b/workflow/envs/humann.yaml index cb7d6a8..52f2120 100644 --- a/workflow/envs/humann.yaml +++ b/workflow/envs/humann.yaml @@ -5,4 +5,4 @@ channels: - bioconda - defaults dependencies: - - humann=3.7 + - humann=3.8 diff --git a/workflow/envs/krakenuniq.yaml b/workflow/envs/krakenuniq.yaml index 3a1248a..2323b42 100644 --- a/workflow/envs/krakenuniq.yaml +++ b/workflow/envs/krakenuniq.yaml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - krakenuniq =1.0.3 + - krakenuniq =1.0.4 diff --git a/workflow/envs/metaphlan.yaml b/workflow/envs/metaphlan.yaml index 647e3e6..5e459c4 100644 --- a/workflow/envs/metaphlan.yaml +++ b/workflow/envs/metaphlan.yaml @@ -4,5 +4,5 @@ channels: - bioconda - defaults dependencies: - - metaphlan =4.0.6 + - metaphlan =4.1.0 - krona =2.8.1 diff --git a/workflow/envs/stag-mwc.yaml b/workflow/envs/stag-mwc.yaml index c7aed39..3890f2b 100644 --- a/workflow/envs/stag-mwc.yaml +++ b/workflow/envs/stag-mwc.yaml @@ -5,21 +5,20 @@ channels: - defaults dependencies: - python =3.10.9 - - fastp =0.23.2 - - bbmap =39.01 - - kaiju =1.9.2 - - kraken2 =2.1.2 - - bracken =2.8 + - bbmap =39.06 + - bracken =2.9 + - fastp =0.23.4 + - kaiju =1.10.1 + - kraken2 =2.1.3 - krona =2.8.1 - matplotlib =3.7.1 - - multiqc =1.14 + - multiqc =1.21 - pandas =2.0.0 + - pigz =2.6 + - sambamba =1.0 + - samtools =1.19.2 - seaborn =0.12.2 - subread =2.0.3 - - sambamba =1.0 - - samtools =1.6 - - groot =1.1.2 - - pigz =2.6 - pip - pip: - fastcluster diff --git a/workflow/rules/functional_profiling/humann.smk b/workflow/rules/functional_profiling/humann.smk index 3887659..3d7bdc4 100644 --- a/workflow/rules/functional_profiling/humann.smk +++ b/workflow/rules/functional_profiling/humann.smk @@ -48,7 +48,7 @@ rule humann: conda: "../../envs/humann.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag + config["containers"]["humann"] threads: 20 params: outdir=f"{OUTDIR}/humann/", @@ -96,7 +96,7 @@ rule normalize_humann_tables: conda: "../../envs/humann.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag + config["containers"]["humann"] threads: 1 params: method=h_config["norm_method"], @@ -143,7 +143,7 @@ rule humann_join_tables: conda: "../../envs/humann.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag + config["containers"]["humann"] threads: 1 params: @@ -174,3 +174,4 @@ rule humann_join_tables: >> {log.stdout} \ 2>> {log.stderr} """ + diff --git a/workflow/rules/mappers/bbmap.smk b/workflow/rules/mappers/bbmap.smk index 29361bd..a270b6c 100644 --- a/workflow/rules/mappers/bbmap.smk +++ b/workflow/rules/mappers/bbmap.smk @@ -1,6 +1,5 @@ # vim: syntax=python expandtab # Rules for generic read mapping using BBMap -# TODO: Remove superfluous str conversions when Snakemake is pathlib compatible. from pathlib import Path from snakemake.exceptions import WorkflowError @@ -20,14 +19,14 @@ for bbmap_config in config["bbmap"]: # Add final output files from this module to 'all_outputs' from the main # Snakefile scope. SAMPLES is also from the main Snakefile scope. - bbmap_alignments = expand(str(OUTDIR/"bbmap/{db_name}/{sample}.{output_type}"), + bbmap_alignments = expand(OUTDIR/"bbmap/{db_name}/{sample}.{output_type}", db_name=db_name, sample=SAMPLES, output_type=("bam", "covstats.txt", "rpkm.txt")) - counts_table = expand(str(OUTDIR/"bbmap/{db_name}/counts.{column}.tsv"), + counts_table = expand(OUTDIR/"bbmap/{db_name}/counts.{column}.tsv", db_name=db_name, column=map(str.strip, bbmap_config["counts_table"]["columns"].split(","))) - featureCounts = expand(str(OUTDIR/"bbmap/{db_name}/all_samples.featureCounts{output_type}"), + featureCounts = expand(OUTDIR/"bbmap/{db_name}/all_samples.featureCounts{output_type}", db_name=db_name, sample=SAMPLES, output_type=["", ".summary", ".table.txt"]) @@ -64,8 +63,8 @@ for bbmap_config in config["bbmap"]: bamscript=temp(bbmap_output_folder/"{sample}.bamscript.sh"), bamfile=bbmap_output_folder/"{sample}.bam" if bbmap_config["keep_bam"] else temp(bbmap_output_folder/"{sample}.bam"), log: - stdout=str(bbmap_logdir/"{sample}.bbmap.stdout.log"), - stderr=str(bbmap_logdir/"{sample}.bbmap.statsfile.txt"), + stdout=bbmap_logdir/"{sample}.bbmap.stdout.log", + stderr=bbmap_logdir/"{sample}.bbmap.statsfile.txt", message: "Mapping {{wildcards.sample}} to {db_name} using BBMap".format(db_name=db_name) shadow: @@ -73,7 +72,7 @@ for bbmap_config in config["bbmap"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["bbmap"] threads: 8 params: db_path=bbmap_config["db_path"], @@ -108,16 +107,16 @@ for bbmap_config in config["bbmap"]: f"""Summarize read counts for {db_name}""" name: f"bbmap_counts_{db_name}" input: - rpkms=expand(str(OUTDIR/"bbmap/{db_name}/{sample}.rpkm.txt"), + rpkms=expand(OUTDIR/"bbmap/{db_name}/{sample}.rpkm.txt", db_name=db_name, sample=SAMPLES) output: - expand(str(OUTDIR/"bbmap/{db_name}/counts.{column}.tsv"), + expand(OUTDIR/"bbmap/{db_name}/counts.{column}.tsv", db_name=db_name, column=map(str.strip, bbmap_config["counts_table"]["columns"].split(",")) ) log: - str(bbmap_logdir/"counts.log") + bbmap_logdir/"counts.log" message: "Summarizing read counts for {db_name}".format(db_name=db_name) shadow: @@ -125,7 +124,7 @@ for bbmap_config in config["bbmap"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] threads: 1 params: annotations=bbmap_config["counts_table"]["annotations"], @@ -147,7 +146,7 @@ for bbmap_config in config["bbmap"]: f"""Summarize feature counts for {db_name}""" name: f"bbmap_feature_counts_{db_name}" input: - bams=expand(str(OUTDIR/"bbmap/{db_name}/{sample}.bam"), + bams=expand(OUTDIR/"bbmap/{db_name}/{sample}.bam", db_name=db_name, sample=SAMPLES) output: @@ -155,7 +154,7 @@ for bbmap_config in config["bbmap"]: counts_table=OUTDIR/"bbmap/{db_name}/all_samples.featureCounts.table.txt".format(db_name=db_name), summary=OUTDIR/"bbmap/{db_name}/all_samples.featureCounts.summary".format(db_name=db_name), log: - str(bbmap_logdir/"all_samples.featureCounts.log") + bbmap_logdir/"all_samples.featureCounts.log" message: "Summarizing feature counts for {db_name}".format(db_name=db_name) shadow: @@ -163,7 +162,7 @@ for bbmap_config in config["bbmap"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] threads: 4 params: annotations=fc_config["annotations"], @@ -183,6 +182,7 @@ for bbmap_config in config["bbmap"]: {input.bams} \ > {log} \ 2>> {log} + cut \ -f1,7- \ {output.counts} \ diff --git a/workflow/rules/mappers/bowtie2.smk b/workflow/rules/mappers/bowtie2.smk index 3b19c62..2d5b0d3 100644 --- a/workflow/rules/mappers/bowtie2.smk +++ b/workflow/rules/mappers/bowtie2.smk @@ -1,6 +1,5 @@ # vim: syntax=python expandtab # Generic rules for alignment of reads to a reference database using Bowtie2 -# TODO: Remove superfluous str conversions when Snakemake is pathlib compatible. from pathlib import Path from snakemake.exceptions import WorkflowError @@ -17,17 +16,17 @@ for bt2_config in config["bowtie2"]: # Add final output files from this module to 'all_outputs' from the main # Snakefile scope. SAMPLES is also from the main Snakefile scope. - bowtie2_alignments = expand(str(OUTDIR/"bowtie2/{db_name}/{sample}.bam"), + bowtie2_alignments = expand(OUTDIR/"bowtie2/{db_name}/{sample}.bam", sample=SAMPLES, db_name=bt2_db_name) - bowtie2_stats = expand(str(OUTDIR/"bowtie2/{db_name}/{sample}.{stats}.txt"), + bowtie2_stats = expand(OUTDIR/"bowtie2/{db_name}/{sample}.{stats}.txt", sample=SAMPLES, stats=["covstats", "rpkm"], db_name=bt2_db_name) - counts_table = expand(str(OUTDIR/"bowtie2/{db_name}/counts.{column}.tsv"), + counts_table = expand(OUTDIR/"bowtie2/{db_name}/counts.{column}.tsv", db_name=bt2_db_name, column=map(str.strip, bt2_config["counts_table"]["columns"].split(","))) - featureCounts = expand(str(OUTDIR/"bowtie2/{db_name}/all_samples.featureCounts{output_type}"), + featureCounts = expand(OUTDIR/"bowtie2/{db_name}/all_samples.featureCounts{output_type}", db_name=bt2_db_name, sample=SAMPLES, output_type=["", ".summary", ".table.txt"]) @@ -61,7 +60,7 @@ for bt2_config in config["bowtie2"]: output: OUTDIR/"bowtie2/{db_name}/{{sample}}.bam".format(db_name=bt2_db_name) if bt2_config["keep_bam"] else temp(OUTDIR/"bowtie2/{db_name}/{{sample}}.bam".format(db_name=bt2_db_name)), log: - str(LOGDIR/"bowtie2/{db_name}/{{sample}}.log".format(db_name=bt2_db_name)) + LOGDIR/"bowtie2/{db_name}/{{sample}}.log".format(db_name=bt2_db_name) message: "Mapping {{wildcards.sample}} to {db_name} using Bowtie2".format(db_name=bt2_db_name) params: @@ -71,7 +70,7 @@ for bt2_config in config["bowtie2"]: conda: "../../envs/metaphlan.yaml" container: - "docker://quay.io/biocontainers/metaphlan:4.0.3--pyhca03a8a_0" + config["containers"]["bowtie2"] wrapper: "0.23.1/bio/bowtie2/align" @@ -85,7 +84,7 @@ for bt2_config in config["bowtie2"]: covstats=OUTDIR/"bowtie2/{db_name}/{{sample}}.covstats.txt".format(db_name=bt2_db_name), rpkm=OUTDIR/"bowtie2/{db_name}/{{sample}}.rpkm.txt".format(db_name=bt2_db_name) log: - str(LOGDIR/"bowtie2/{db_name}/{{sample}}.pileup.log".format(db_name=bt2_db_name)) + LOGDIR/"bowtie2/{db_name}/{{sample}}.pileup.log".format(db_name=bt2_db_name) message: "Summarizing bowtie2 mapping statistics against {db_name}.".format(db_name=bt2_db_name) shadow: @@ -93,7 +92,7 @@ for bt2_config in config["bowtie2"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["bbmap"] shell: """ pileup.sh \ @@ -112,16 +111,16 @@ for bt2_config in config["bowtie2"]: """Create count table for Bowtie2 mappings.""" name: f"bowtie2_count_table_{bt2_db_name}" input: - rpkms=expand(str(OUTDIR/"bowtie2/{db_name}/{sample}.rpkm.txt"), + rpkms=expand(OUTDIR/"bowtie2/{db_name}/{sample}.rpkm.txt", db_name=bt2_db_name, sample=SAMPLES) output: - expand(str(OUTDIR/"bowtie2/{db_name}/counts.{column}.tsv"), + expand(OUTDIR/"bowtie2/{db_name}/counts.{column}.tsv", db_name=bt2_db_name, column=map(str.strip, bt2_config["counts_table"]["columns"].split(",")) ) log: - str(LOGDIR/"bowtie2/{db_name}/counts.log".format(db_name=bt2_db_name)) + LOGDIR/"bowtie2/{db_name}/counts.log".format(db_name=bt2_db_name) message: "Creating count table for mappings to {db_name}".format(db_name=bt2_db_name) shadow: @@ -129,7 +128,7 @@ for bt2_config in config["bowtie2"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] threads: 1 params: annotations=bt2_config["counts_table"]["annotations"], @@ -151,7 +150,7 @@ for bt2_config in config["bowtie2"]: """Summarize featureCounts for Bowtie2 mappings.""" name: f"bowtie2_feature_counts_{bt2_db_name}" input: - bams=expand(str(OUTDIR/"bowtie2/{db_name}/{sample}.bam"), + bams=expand(OUTDIR/"bowtie2/{db_name}/{sample}.bam", db_name=bt2_db_name, sample=SAMPLES) output: @@ -159,7 +158,7 @@ for bt2_config in config["bowtie2"]: counts_table=OUTDIR/"bowtie2/{db_name}/all_samples.featureCounts.table.txt".format(db_name=bt2_db_name), summary=OUTDIR/"bowtie2/{db_name}/all_samples.featureCounts.summary".format(db_name=bt2_db_name), log: - str(LOGDIR/"bowtie2/{db_name}/all_samples.featureCounts.log".format(db_name=bt2_db_name)) + LOGDIR/"bowtie2/{db_name}/all_samples.featureCounts.log".format(db_name=bt2_db_name) message: "Summarizing feature counts for {db_name} mappings.".format(db_name=bt2_db_name) shadow: @@ -167,7 +166,7 @@ for bt2_config in config["bowtie2"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] threads: 4 params: annotations=fc_config["annotations"], @@ -187,7 +186,7 @@ for bt2_config in config["bowtie2"]: {input.bams} \ > {log} \ 2>> {log} \ - && \ + cut \ -f1,7- \ {output.counts} \ @@ -195,3 +194,4 @@ for bt2_config in config["bowtie2"]: | sed 's|\t\w\+/bowtie2/{params.dbname}/|\t|g' \ > {output.counts_table} """ + diff --git a/workflow/rules/multiqc/multiqc.smk b/workflow/rules/multiqc/multiqc.smk index 6517092..02cdce6 100644 --- a/workflow/rules/multiqc/multiqc.smk +++ b/workflow/rules/multiqc/multiqc.smk @@ -9,17 +9,17 @@ if config["multiqc_report"]: input: all_outputs output: - report=report(f"{OUTDIR}/multiqc/multiqc_report.html", + report=report(OUTDIR/"multiqc/multiqc_report.html", category="Sequencing data quality", caption="../../report/multiqc.rst"), log: - f"{LOGDIR}/multiqc/multiqc.log" + LOGDIR/"multiqc/multiqc.log" shadow: "shallow" conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["multiqc"] threads: 1 params: extra=mqc_config["extra"], @@ -32,4 +32,5 @@ if config["multiqc_report"]: """ # Appended after the rule definition to avoid circular dependency - all_outputs.append(f"{OUTDIR}/multiqc/multiqc_report.html") + all_outputs.append(OUTDIR/"multiqc/multiqc_report.html") + diff --git a/workflow/rules/naive/bbcountunique.smk b/workflow/rules/naive/bbcountunique.smk index 963c41b..023ee95 100644 --- a/workflow/rules/naive/bbcountunique.smk +++ b/workflow/rules/naive/bbcountunique.smk @@ -1,6 +1,5 @@ # vim: syntax=python expandtab # Assess sequencing depth of sample using BBCountUnique from the BBMap suite. -# TODO: Remove superfluous str conversions when Snakemake is pathlib compatible. if config["naive"]["assess_depth"]: # Add final output files from this module to 'all_outputs' from @@ -18,19 +17,16 @@ if config["naive"]["assess_depth"]: INPUT_read1, output: txt=OUTDIR/"bbcountunique/{sample}.bbcountunique.txt", - pdf=report(OUTDIR/"bbcountunique/{sample}.bbcountunique.pdf", - caption="../../report/bbcountunique.rst", - category="Sequencing depth") log: - stdout=str(LOGDIR/"bbcountunique/{sample}.bbcountunique.stdout.log"), - stderr=str(LOGDIR/"bbcountunique/{sample}.bbcountunique.stderr.log"), + stdout=LOGDIR/"bbcountunique/{sample}.bbcountunique.stdout.log", + stderr=LOGDIR/"bbcountunique/{sample}.bbcountunique.stderr.log", shadow: "shallow" threads: 2 conda: "../../envs/stag-mwc.yaml", container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["bbmap"] params: interval=config["bbcountunique"]["interval"] shell: @@ -41,9 +37,30 @@ if config["naive"]["assess_depth"]: interval={params.interval} \ > {log.stdout} \ 2> {log.stderr} + """ + rule plot_bbcountunique: + """Assess sequencing depth using BBCountUnique.""" + input: + txt=OUTDIR/"bbcountunique/{sample}.bbcountunique.txt", + output: + pdf=report(OUTDIR/"bbcountunique/{sample}.bbcountunique.pdf", + caption="../../report/bbcountunique.rst", + category="Sequencing depth") + log: + stdout=LOGDIR/"bbcountunique/{sample}.plot_bbcountunique.stdout.log", + stderr=LOGDIR/"bbcountunique/{sample}.plot_bbcountunique.stderr.log", + shadow: + "shallow" + threads: 1 + conda: + "../../envs/stag-mwc.yaml", + container: + config["containers"]["stag"] + shell: + """ workflow/scripts/plot_bbcountunique.py \ - {output.txt} \ + {input.txt} \ {output.pdf} \ >> {log.stdout} \ 2>> {log.stderr} diff --git a/workflow/rules/naive/sketch_compare.smk b/workflow/rules/naive/sketch_compare.smk index 3b6c831..987d9c5 100644 --- a/workflow/rules/naive/sketch_compare.smk +++ b/workflow/rules/naive/sketch_compare.smk @@ -1,6 +1,5 @@ # vim: syntax=python expandtab # Compare all samples against all samples using MinHash sketches -# TODO: Remove superfluous str conversions when Snakemake is pathlib compatible. localrules: compare_sketches, @@ -10,7 +9,7 @@ localrules: if config["naive"]["sketch_compare"]: # Add final output files from this module to 'all_outputs' from the # main Snakefile scope. - sample_similarity_plot = str(OUTDIR/"sketch_compare/sample_similarity.pdf") + sample_similarity_plot = OUTDIR/"sketch_compare/sample_similarity.pdf" all_outputs.append(sample_similarity_plot) citations.add(publications["BBMap"]) @@ -24,13 +23,13 @@ rule sketch: output: sketch=OUTDIR/"sketch_compare/{sample}.sketch.gz", log: - str(LOGDIR/"sketch_compare/{sample}.sketch.log") + LOGDIR/"sketch_compare/{sample}.sketch.log", shadow: "shallow" conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["bbmap"] threads: 4 shell: """ @@ -45,18 +44,17 @@ rule sketch: rule compare_sketches: """Compare all samples using BBMap's comparesketch.sh""" input: - samples=expand(str(OUTDIR/"sketch_compare/{sample}.sketch.gz"), - sample=SAMPLES) + samples=expand(OUTDIR/"sketch_compare/{sample}.sketch.gz", sample=SAMPLES), output: alltoall=OUTDIR/"sketch_compare/alltoall.txt", log: - str(LOGDIR/"sketch_compare/comparesketch.log") + LOGDIR/"sketch_compare/comparesketch.log", shadow: "shallow" conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["bbmap"] shell: """ comparesketch.sh \ @@ -76,14 +74,14 @@ rule plot_sample_similarity: heatmap=OUTDIR/"sketch_compare/sample_similarity.pdf", clustered=report(OUTDIR/"sketch_compare/sample_similarity.clustered.pdf", category="Sketch comparison", - caption="../../report/sketch_compare.rst") + caption="../../report/sketch_compare.rst"), log: - stdout=str(LOGDIR/"sketch_compare/sample_similarity_plot.stdout.log"), - stderr=str(LOGDIR/"sketch_compare/sample_similarity_plot.stderr.log"), + stdout=LOGDIR/"sketch_compare/sample_similarity_plot.stdout.log", + stderr=LOGDIR/"sketch_compare/sample_similarity_plot.stderr.log", conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/plot_sketch_comparison_heatmap.py \ diff --git a/workflow/rules/preproc/host_removal.smk b/workflow/rules/preproc/host_removal.smk index 20c9ef2..7d20740 100644 --- a/workflow/rules/preproc/host_removal.smk +++ b/workflow/rules/preproc/host_removal.smk @@ -56,7 +56,7 @@ if config["host_removal"]["kraken2"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["kraken2"] threads: 8 params: db=rh_kraken2["db_path"], @@ -109,7 +109,7 @@ if config["host_removal"]["kraken2"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] threads: 1 shell: """ @@ -155,7 +155,7 @@ if config["host_removal"]["bowtie2"]: conda: "../../envs/metaphlan.yaml" container: - "docker://quay.io/biocontainers/bowtie2:2.5.1--py38he00c5e5_2" + config["containers"]["bowtie2"] params: db_path=rh_bowtie2["db_path"], extra=rh_bowtie2["extra"], @@ -183,7 +183,7 @@ if config["host_removal"]["bowtie2"]: conda: "../../envs/metaphlan.yaml" container: - "docker://quay.io/biocontainers/samtools:1.17--hd87286a_1" + config["containers"]["samtools"] shell: """ samtools view \ @@ -207,7 +207,7 @@ if config["host_removal"]["bowtie2"]: conda: "../../envs/metaphlan.yaml" container: - "docker://quay.io/biocontainers/samtools:1.17--hd87286a_1" + config["containers"]["samtools"] shell: """ samtools view \ @@ -233,7 +233,7 @@ if config["host_removal"]["bowtie2"]: conda: "../../envs/metaphlan.yaml" container: - "docker://quay.io/biocontainers/samtools:1.17--hd87286a_1" + config["containers"]["samtools"] shell: """ samtools sort \ @@ -259,7 +259,7 @@ if config["host_removal"]["bowtie2"]: conda: "../../envs/metaphlan.yaml" container: - "docker://quay.io/biocontainers/samtools:1.17--hd87286a_1" + config["containers"]["samtools"] shell: """ samtools fastq \ diff --git a/workflow/rules/preproc/preprocessing_summary.smk b/workflow/rules/preproc/preprocessing_summary.smk index efa4fcb..c3c1349 100644 --- a/workflow/rules/preproc/preprocessing_summary.smk +++ b/workflow/rules/preproc/preprocessing_summary.smk @@ -29,7 +29,7 @@ rule preprocessing_summary: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] threads: 1 params: fastp_arg=lambda w: f"--fastp {LOGDIR}/fastp/*.fastp.json" if config["qc_reads"] else "", diff --git a/workflow/rules/preproc/read_quality.smk b/workflow/rules/preproc/read_quality.smk index b93cd28..0584094 100644 --- a/workflow/rules/preproc/read_quality.smk +++ b/workflow/rules/preproc/read_quality.smk @@ -33,7 +33,7 @@ if config["qc_reads"]: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["fastp"] threads: 4 params: extra=fastp_config["extra"], diff --git a/workflow/rules/taxonomic_profiling/kaiju.smk b/workflow/rules/taxonomic_profiling/kaiju.smk index 37bb6ba..ddb5288 100644 --- a/workflow/rules/taxonomic_profiling/kaiju.smk +++ b/workflow/rules/taxonomic_profiling/kaiju.smk @@ -18,7 +18,6 @@ if config["taxonomic_profile"]["kaiju"]: Path(kaiju_config["names"]).exists()]): err_message = "No Kaiju database files at: '{}', '{}', '{}'!\n".format(kaiju_config["db"], kaiju_config["nodes"], kaiju_config["names"]) err_message += "Specify relevant paths in the kaiju section of config.yaml.\n" - err_message += "Run 'snakemake download_kaiju_database' to download a copy into '{dbdir}'\n".format(dbdir=DBDIR/"kaiju") err_message += "If you do not want to run Kaiju for taxonomic profiling, set 'kaiju: False' in config.yaml" raise WorkflowError(err_message) @@ -56,7 +55,7 @@ rule kaiju: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["kaiju"] params: db=kaiju_config["db"], nodes=kaiju_config["nodes"], @@ -86,7 +85,7 @@ rule kaiju2krona: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["kaiju"] shell: """ kaiju2krona \ @@ -109,7 +108,7 @@ rule create_kaiju_krona_plot: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["krona"] shell: """ ktImportText \ @@ -132,7 +131,7 @@ rule kaiju_report: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["kaiju"] shell: """ kaiju2table \ @@ -163,7 +162,7 @@ rule join_kaiju_reports: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/join_tables.py \ @@ -187,7 +186,7 @@ rule kaiju_area_plot: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/area_plot.py \ diff --git a/workflow/rules/taxonomic_profiling/kraken2.smk b/workflow/rules/taxonomic_profiling/kraken2.smk index 260e91f..d657769 100644 --- a/workflow/rules/taxonomic_profiling/kraken2.smk +++ b/workflow/rules/taxonomic_profiling/kraken2.smk @@ -1,7 +1,6 @@ # vim: syntax=python expandtab # Taxonomic classification of metagenomic reads using Kraken2 with abundance # estimation using Bracken -# TODO: Remove superfluous str conversions when Snakemake is pathlib compatible. from pathlib import Path from snakemake.exceptions import WorkflowError @@ -27,19 +26,18 @@ if config["taxonomic_profile"]["kraken2"]: if not (kraken2_config["db"] and Path(kraken2_config["db"]).exists()): err_message = "No Kraken2 database folder at: '{}'!\n".format(kraken2_config["db"]) err_message += "Specify the path in the kraken2 section of config.yaml.\n" - err_message += "Run 'snakemake download_minikraken2' to download a copy into '{dbdir}'\n".format(dbdir=DBDIR/"kraken2") err_message += "If you do not want to run kraken2 for taxonomic profiling, set 'kraken2: False' in config.yaml" raise WorkflowError(err_message) # Add Kraken2 output files to 'all_outputs' from the main Snakefile scope. # SAMPLES is also from the main Snakefile scope. - krakens = expand(str(OUTDIR/"kraken2/{sample}.kraken"), sample=SAMPLES) - kreports = expand(str(OUTDIR/"kraken2/{sample}.kreport"), sample=SAMPLES) - kreports_mpa_style = expand(str(OUTDIR/"kraken2/{sample}.mpa_style.txt"), sample=SAMPLES) - joined_kreport_mpa_style = str(OUTDIR/"kraken2/all_samples.kraken2.mpa_style.txt") - combined_kreport = str(OUTDIR/"kraken2/all_samples.kraken2.txt") - kraken_krona = str(OUTDIR/"kraken2/all_samples.kraken2.krona.html") - kraken_area_plot = str(OUTDIR/"kraken2/area_plot.kraken2.pdf") + krakens = expand(OUTDIR/"kraken2/{sample}.kraken", sample=SAMPLES) + kreports = expand(OUTDIR/"kraken2/{sample}.kreport", sample=SAMPLES) + kreports_mpa_style = expand(OUTDIR/"kraken2/{sample}.mpa_style.txt", sample=SAMPLES) + joined_kreport_mpa_style = OUTDIR/"kraken2/all_samples.kraken2.mpa_style.txt" + combined_kreport = OUTDIR/"kraken2/all_samples.kraken2.txt" + kraken_krona = OUTDIR/"kraken2/all_samples.kraken2.krona.html" + kraken_area_plot = OUTDIR/"kraken2/area_plot.kraken2.pdf" if kraken2_config["keep_kraken"]: all_outputs.extend(krakens) @@ -64,14 +62,14 @@ rule kraken2: kraken=OUTDIR/"kraken2/{sample}.kraken" if kraken2_config["keep_kraken"] else temp(OUTDIR/"kraken2/{sample}.kraken"), kreport=OUTDIR/"kraken2/{sample}.kreport" if kraken2_config["keep_kreport"] else temp(OUTDIR/"kraken2/{sample}.kreport"), log: - str(LOGDIR/"kraken2/{sample}.kraken2.log") + LOGDIR/"kraken2/{sample}.kraken2.log" shadow: "shallow" threads: 8 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["kraken2"] params: db=kraken2_config["db"], confidence=kraken2_config["confidence"], @@ -96,16 +94,16 @@ rule kraken2: rule kraken_mpa_style: input: - kreport=OUTDIR/"kraken2/{sample}.kreport" + kreport=OUTDIR/"kraken2/{sample}.kreport", output: txt=OUTDIR/"kraken2/{sample}.mpa_style.txt", log: - str(LOGDIR/"kraken2/{sample}.mpa_style.log") + LOGDIR/"kraken2/{sample}.mpa_style.log", threads: 1 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/KrakenTools/kreport2mpa.py \ @@ -113,24 +111,25 @@ rule kraken_mpa_style: --output {output.txt} \ --display-header \ 2>&1 > {log} + sed --in-place 's|{input.kreport}|taxon_name\treads|g' {output.txt} """ rule join_kraken2_mpa: input: - txt=expand(str(OUTDIR/"kraken2/{sample}.mpa_style.txt"), sample=SAMPLES), + txt=expand(OUTDIR/"kraken2/{sample}.mpa_style.txt", sample=SAMPLES), output: table=report(OUTDIR/"kraken2/all_samples.kraken2.mpa_style.txt", category="Taxonomic profiling", caption="../../report/kraken2_table_mpa.rst"), log: - str(LOGDIR/"kraken2/join_kraken2_mpa_tables.log") + LOGDIR/"kraken2/join_kraken2_mpa_tables.log", threads: 1 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] params: value_column="reads", feature_column="taxon_name", @@ -147,17 +146,17 @@ rule join_kraken2_mpa: rule kraken2_area_plot: input: - OUTDIR/"kraken2/all_samples.kraken2.mpa_style.txt" + OUTDIR/"kraken2/all_samples.kraken2.mpa_style.txt", output: report(OUTDIR/"kraken2/area_plot.kraken2.pdf", category="Taxonomic profiling", caption="../../report/area_plot.rst") log: - str(LOGDIR/"kraken2/area_plot.kraken2.log") + LOGDIR/"kraken2/area_plot.kraken2.log", conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/area_plot.py \ @@ -170,19 +169,19 @@ rule kraken2_area_plot: rule combine_kreports: input: - kreports=expand(str(OUTDIR/"kraken2/{sample}.kreport"), sample=SAMPLES), + kreports=expand(OUTDIR/"kraken2/{sample}.kreport", sample=SAMPLES), output: report(OUTDIR/"kraken2/all_samples.kraken2.txt", category="Taxonomic profiling", caption="../../report/kraken2_table.rst"), log: - str(LOGDIR/"kraken2/combined_kreport.log") + LOGDIR/"kraken2/combined_kreport.log" shadow: "shallow" conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/KrakenTools/combine_kreports.py \ @@ -199,14 +198,14 @@ rule kreport2krona: output: OUTDIR/"kraken2/{sample}.krona" log: - str(LOGDIR/"kraken2/{sample}.kreport2krona.log") + LOGDIR/"kraken2/{sample}.kreport2krona.log", shadow: "shallow" threads: 1 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/KrakenTools/kreport2krona.py \ @@ -218,7 +217,7 @@ rule kreport2krona: rule create_kraken2_krona_plot: input: - expand(str(OUTDIR/"kraken2/{sample}.krona"), sample=SAMPLES), + expand(OUTDIR/"kraken2/{sample}.krona", sample=SAMPLES), output: krona_html=report(OUTDIR/"kraken2/all_samples.kraken2.krona.html", category="Taxonomic profiling", @@ -228,7 +227,7 @@ rule create_kraken2_krona_plot: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["krona"] shell: """ ktImportText \ @@ -241,13 +240,12 @@ if config["taxonomic_profile"]["kraken2"] and kraken2_config["bracken"]["kmer_di if not Path(kraken2_config["bracken"]["kmer_distrib"]).exists(): err_message = "No Bracken kmer_distrib database file at: '{}'!\n".format(kraken2_config["bracken"]["kmer_distrib"]) err_message += "Specify the path in the kraken2 section of config.yaml.\n" - err_message += "Run 'snakemake download_minikraken2' to download a copy of the required files into '{dbdir}'\n".format(dbdir=DBDIR/"kraken2") err_message += "If you do not want to run Bracken for abundance profiling, set 'kmer_distrib: ""' in the bracken section of config.yaml" raise WorkflowError(err_message) if kraken2_config["filter_bracken"]["include"] or kraken2_config["filter_bracken"]["exclude"]: - filtered_brackens = expand(str(OUTDIR/"kraken2/{sample}.{level}.filtered.bracken"), sample=SAMPLES, level=kraken2_config["bracken"]["levels"].split()) - all_table = expand(str(OUTDIR/"kraken2/all_samples.{level}.bracken.txt"), level=kraken2_config["bracken"]["levels"].split()) - all_table_filtered = expand(str(OUTDIR/"kraken2/all_samples.{level}.filtered.bracken.txt"), level=kraken2_config["bracken"]["levels"].split()) + filtered_brackens = expand(OUTDIR/"kraken2/{sample}.{level}.filtered.bracken", sample=SAMPLES, level=kraken2_config["bracken"]["levels"].split()) + all_table = expand(OUTDIR/"kraken2/all_samples.{level}.bracken.txt", level=kraken2_config["bracken"]["levels"].split()) + all_table_filtered = expand(OUTDIR/"kraken2/all_samples.{level}.filtered.bracken.txt", level=kraken2_config["bracken"]["levels"].split()) all_outputs.extend(filtered_brackens) all_outputs.append(all_table) @@ -255,11 +253,11 @@ if config["taxonomic_profile"]["kraken2"] and kraken2_config["bracken"]["kmer_di citations.add(publications["Bracken"]) - brackens = expand(str(OUTDIR/"kraken2/{sample}.{level}.bracken"), sample=SAMPLES, level=kraken2_config["bracken"]["levels"].split()) - brackens_mpa_style = expand(str(OUTDIR/"kraken2/{sample}.bracken.mpa_style.txt"), sample=SAMPLES) - bracken_area_plot = str(OUTDIR/"kraken2/area_plot.bracken.pdf") - bracken_krona = str(OUTDIR/"kraken2/all_samples.bracken.krona.html") - all_table_mpa = str(OUTDIR/"kraken2/all_samples.bracken.mpa_style.txt") + brackens = expand(OUTDIR/"kraken2/{sample}.{level}.bracken", sample=SAMPLES, level=kraken2_config["bracken"]["levels"].split()) + brackens_mpa_style = expand(OUTDIR/"kraken2/{sample}.bracken.mpa_style.txt", sample=SAMPLES) + bracken_area_plot = OUTDIR/"kraken2/area_plot.bracken.pdf" + bracken_krona = OUTDIR/"kraken2/all_samples.bracken.krona.html" + all_table_mpa = OUTDIR/"kraken2/all_samples.bracken.mpa_style.txt" all_outputs.extend(brackens) all_outputs.extend(brackens_mpa_style) @@ -272,19 +270,19 @@ rule bracken_kreport: """Run Bracken summarization for Species level to get total sample bracken.kreport (required for mpa-conversion later).""" input: - kreport=OUTDIR/"kraken2/{sample}.kreport" + kreport=OUTDIR/"kraken2/{sample}.kreport", output: bracken=OUTDIR/"kraken2/{sample}.bracken", bracken_kreport=OUTDIR/"kraken2/{sample}_bracken.kreport", log: - str(LOGDIR/"kraken2/{sample}.bracken.log") + LOGDIR/"kraken2/{sample}.bracken.log", threads: 2 shadow: "shallow" conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["bracken"] params: kmer_distrib=kraken2_config["bracken"]["kmer_distrib"], thresh=kraken2_config["bracken"]["thresh"], @@ -304,18 +302,18 @@ rule bracken_kreport: rule bracken_all_levels: """Run Bracken summarization for all levels.""" input: - kreport=OUTDIR/"kraken2/{sample}.kreport" + kreport=OUTDIR/"kraken2/{sample}.kreport", output: bracken=OUTDIR/"kraken2/{sample}.{level,[DPOCFGS]}.bracken", log: - str(LOGDIR/"kraken2/{sample}.{level}.bracken.log") + LOGDIR/"kraken2/{sample}.{level}.bracken.log", shadow: # shadow required because est_abundance.py always creates the "shallow" # sample-level output file with fixed filename: {sample}_bracken.kreport threads: 2 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["bracken"] params: kmer_distrib=kraken2_config["bracken"]["kmer_distrib"], thresh=kraken2_config["bracken"]["thresh"], @@ -333,16 +331,16 @@ rule bracken_all_levels: rule bracken_mpa_style: input: - kreport=OUTDIR/"kraken2/{sample}_bracken.kreport" + kreport=OUTDIR/"kraken2/{sample}_bracken.kreport", output: txt=OUTDIR/"kraken2/{sample}.bracken.mpa_style.txt", log: - str(LOGDIR/"kraken2/{sample}.bracken.mpa_style.log") + LOGDIR/"kraken2/{sample}.bracken.mpa_style.log", threads: 1 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/KrakenTools/kreport2mpa.py \ @@ -350,24 +348,25 @@ rule bracken_mpa_style: --output {output.txt} \ --display-header \ 2>&1 > {log} + sed --in-place 's|{input.kreport}|taxon_name\treads|g' {output.txt} """ rule join_bracken_mpa: input: - txt=expand(str(OUTDIR/"kraken2/{sample}.bracken.mpa_style.txt"), sample=SAMPLES), + txt=expand(OUTDIR/"kraken2/{sample}.bracken.mpa_style.txt", sample=SAMPLES), output: table=report(OUTDIR/"kraken2/all_samples.bracken.mpa_style.txt", category="Taxonomic profiling", caption="../../report/bracken_table_mpa.rst"), log: - str(LOGDIR/"kraken2/join_bracken_mpa_tables.log") + LOGDIR/"kraken2/join_bracken_mpa_tables.log", threads: 1 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] params: value_column="reads", feature_column="taxon_name", @@ -390,11 +389,11 @@ rule bracken_area_plot: category="Taxonomic profiling", caption="../../report/area_plot.rst") log: - str(LOGDIR/"kraken2/area_plot.bracken.log") + LOGDIR/"kraken2/area_plot.bracken.log", conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/area_plot.py \ @@ -407,18 +406,18 @@ rule bracken_area_plot: rule join_bracken: input: - bracken=expand(str(OUTDIR/"kraken2/{sample}.{{level}}.bracken"), sample=SAMPLES), + bracken=expand(OUTDIR/"kraken2/{sample}.{{level}}.bracken", sample=SAMPLES), output: table=report(OUTDIR/"kraken2/all_samples.{level,[DPOCFGS]}.bracken.txt", category="Taxonomic profiling", caption="../../report/bracken_table.rst"), log: - str(LOGDIR/"kraken2/join_bracken_tables.{level}.log") + LOGDIR/"kraken2/join_bracken_tables.{level}.log", threads: 1 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] params: value_column="fraction_total_reads", feature_column="name", @@ -439,14 +438,14 @@ rule bracken2krona: output: bracken_krona=OUTDIR/"kraken2/{sample}.bracken.krona", log: - str(LOGDIR/"kraken2/{sample}.bracken2krona.log") + LOGDIR/"kraken2/{sample}.bracken2krona.log", threads: 1 shadow: "shallow" conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/KrakenTools/kreport2krona.py \ @@ -458,7 +457,7 @@ rule bracken2krona: rule create_bracken_krona_plot: input: - expand(str(OUTDIR/"kraken2/{sample}.bracken.krona"), sample=SAMPLES), + expand(OUTDIR/"kraken2/{sample}.bracken.krona", sample=SAMPLES), output: krona_html=report(OUTDIR/"kraken2/all_samples.bracken.krona.html", category="Taxonomic profiling", @@ -468,7 +467,7 @@ rule create_bracken_krona_plot: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["krona"] shell: """ ktImportText \ @@ -483,12 +482,12 @@ rule filter_bracken: output: filtered=OUTDIR/"kraken2/{sample}.{level,[DPOCFGS]}.filtered.bracken", log: - str(LOGDIR/"kraken2/{sample}.{level}.filter_bracken.log") + LOGDIR/"kraken2/{sample}.{level}.filter_bracken.log", threads: 1 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] params: filter_bracken="workflow/scripts/KrakenTools/filter_bracken.out.py", include=kraken2_config["filter_bracken"]["include"], @@ -506,18 +505,18 @@ rule filter_bracken: rule join_bracken_filtered: input: - bracken=expand(str(OUTDIR/"kraken2/{sample}.{{level}}.filtered.bracken"), sample=SAMPLES), + bracken=expand(OUTDIR/"kraken2/{sample}.{{level}}.filtered.bracken", sample=SAMPLES), output: table=report(OUTDIR/"kraken2/all_samples.{level,[DPCOFGS]}.filtered.bracken.txt", category="Taxonomic profiling", caption="../../report/bracken_table_filtered.rst"), log: - str(LOGDIR/"kraken2/join_bracken_tables.{level}.log") + LOGDIR/"kraken2/join_bracken_tables.{level}.log", threads: 1 conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] params: value_column="fraction_total_reads", feature_column="name", @@ -530,3 +529,4 @@ rule join_bracken_filtered: {input.bracken} \ 2>&1 > {log} """ + diff --git a/workflow/rules/taxonomic_profiling/krakenuniq.smk b/workflow/rules/taxonomic_profiling/krakenuniq.smk index bc5a0a1..d64dc5a 100644 --- a/workflow/rules/taxonomic_profiling/krakenuniq.smk +++ b/workflow/rules/taxonomic_profiling/krakenuniq.smk @@ -54,7 +54,7 @@ rule krakenuniq_merge_reads: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["bbmap"] shell: """ fuse.sh \ @@ -80,7 +80,7 @@ rule krakenuniq: conda: "../../envs/krakenuniq.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:krakenuniq"+singularity_branch_tag + config["containers"]["krakenuniq"] params: db=krakenuniq_config["db"], preload_size=krakenuniq_config["preload_size"], @@ -114,7 +114,7 @@ rule krakenuniq_combine_reports: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/join_tables.py \ @@ -139,7 +139,7 @@ rule krakenuniq_mpa_style: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/KrakenTools/kreport2mpa.py \ @@ -165,7 +165,7 @@ rule krakenuniq_join_mpa: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] params: value_column="reads", feature_column="taxon_name", @@ -195,7 +195,7 @@ rule krakenuniq_kreport2krona: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ awk -v OFS='\\t' '{{ @@ -220,7 +220,7 @@ rule krakenuniq_krona_plot: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["krona"] shell: """ ktImportText \ diff --git a/workflow/rules/taxonomic_profiling/metaphlan.smk b/workflow/rules/taxonomic_profiling/metaphlan.smk index 83346d6..94b858e 100644 --- a/workflow/rules/taxonomic_profiling/metaphlan.smk +++ b/workflow/rules/taxonomic_profiling/metaphlan.smk @@ -56,7 +56,7 @@ rule metaphlan: conda: "../../envs/metaphlan.yaml" container: - "docker://quay.io/biocontainers/metaphlan:4.0.6--pyhca03a8a_0" + config["containers"]["metaphlan"] threads: 8 params: bt2_db_dir=mpa_config["bt2_db_dir"], @@ -118,7 +118,7 @@ rule combine_metaphlan_tables: conda: "../../envs/metaphlan.yaml" container: - "docker://quay.io/biocontainers/metaphlan:4.0.6--pyhca03a8a_0" + config["containers"]["metaphlan"] threads: 1 shell: """ @@ -139,7 +139,7 @@ rule metaphlan_area_plot: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] shell: """ workflow/scripts/area_plot.py \ @@ -165,7 +165,7 @@ rule plot_metaphlan_heatmap: conda: "../../envs/stag-mwc.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["stag"] threads: 1 params: outfile_prefix=lambda w: f"{OUTDIR}/metaphlan/all_samples", @@ -206,7 +206,7 @@ rule create_metaphlan_krona_plots: conda: "../../envs/metaphlan.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag + config["containers"]["krona"] threads: 1 shell: """ diff --git a/workflow/rules/taxonomic_profiling/strainphlan.smk b/workflow/rules/taxonomic_profiling/strainphlan.smk index 13bc24a..af64cff 100644 --- a/workflow/rules/taxonomic_profiling/strainphlan.smk +++ b/workflow/rules/taxonomic_profiling/strainphlan.smk @@ -45,7 +45,7 @@ rule consensus_markers: conda: "../../envs/metaphlan.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag + config["containers"]["metaphlan"] threads: 8 params: output_dir=f"{OUTDIR}/strainphlan/consensus_markers/{{sample}}" @@ -73,7 +73,7 @@ rule print_clades: conda: "../../envs/metaphlan.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag + config["containers"]["metaphlan"] threads: 8 params: out_dir=f"{OUTDIR}/strainphlan", @@ -107,7 +107,7 @@ rule extract_markers: conda: "../../envs/metaphlan.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag + config["containers"]["metaphlan"] threads: 8 params: clade=spa_config["clade_of_interest"], @@ -139,7 +139,7 @@ rule strainphlan: conda: "../../envs/metaphlan.yaml" container: - "oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag + config["containers"]["metaphlan"] threads: 8 params: clade=spa_config["clade_of_interest"], @@ -149,7 +149,7 @@ rule strainphlan: extra=spa_config["extra"], # This is extremely useful if you want to include a reference genome shell: """ - echo "please compare your clade_of_interest to list of available clades in available_clades.txt" > {log.stderr} + echo "Please compare your clade_of_interest to list of available clades in available_clades.txt" > {log.stderr} strainphlan \ -s {input.consensus_markers} \ From 70b434371059b4af7a6d8cc9ce870e14a6aa5138 Mon Sep 17 00:00:00 2001 From: Fredrik Boulund Date: Fri, 15 Mar 2024 15:39:45 +0100 Subject: [PATCH 3/3] Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index db13bde..73bba7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,10 @@ situations. removal steps. ### Changed +- Moved container specifications into config file. Now using more detailed + container specifications for most rules. This also enables easier use of + local copies of containers (e.g. in HPC environments without external network + access). ### Deprecated