Skip to content

Commit

Permalink
Merge pull request #223 from ctmrbio/parameterize-singularity-images
Browse files Browse the repository at this point in the history
Parameterize container images
  • Loading branch information
boulund authored Mar 26, 2024
2 parents 15d33ea + 70b4343 commit ef444a8
Show file tree
Hide file tree
Showing 21 changed files with 205 additions and 181 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,15 @@ situations.
removal steps.

### Changed
- Moved container specifications into config file. Now using more detailed
container specifications for most rules. This also enables easier use of
local copies of containers (e.g. in HPC environments without external network
access).

### Deprecated

### Removed
- Removed mentions of assembly workflow from docs.
- Removed mentions of assembly workflow from docs and config.


## [0.7.0] 2023-06-13
Expand Down
27 changes: 19 additions & 8 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ input_fn_pattern: "{sample}_{readpair}.fq.gz"
samplesheet: "" # Three-column samplesheet with sample_id,fastq_1,fastq_2 columns. Used instead of inputdir
outdir: "output_dir"
logdir: "output_dir/logs"
dbdir: "databases" # Databases will be downloaded to this dir
report: "StaG_report-" # Filename prefix for report file ("-{datetime}.html" automatically appended)
email: "" # Email to send status message after completed/failed run.

Expand All @@ -29,6 +28,25 @@ s3_endpoint_url: "https://s3.ki.se" # Use https://s3.amazonaws.com for Amazon S
keep_local: False # Keep local copies of remote input files, default False.


#########################
# Container images
#########################
containers:
bbmap: "docker://quay.io/biocontainers/bbmap:39.06--h92535d8_0"
bowtie2: "docker://quay.io/biocontainers/bowtie2:2.5.1--py38he00c5e5_2"
bracken: "docker://quay.io/biocontainers/bracken:2.9--py39h1f90b4d_0"
fastp: "docker://quay.io/biocontainers/fastp:0.23.4--hadf994f_2"
humann: "docker://quay.io/biocontainers/humann:3.8--pyh7cba7a3_0"
kaiju: "docker://quai.io/biocontainers/kaiju:1.10.1--h43eeafb_0"
kraken2: "docker://quay.io/biocontainers/kraken2:2.1.3--pl5321hdcf5f25_0"
krakenuniq: "docker://quay.io/biocontainers/krakenuniq:1.0.4--pl5321h6dccd9a_1"
krona: "docker://quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1"
metaphlan: "docker://quay.io/biocontainers/metaphlan:4.1.0--pyhca03a8a_0"
multiqc: "docker://quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0"
samtools: "docker://quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
stag: "oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc-develop"


#########################
# Pipeline steps included
#########################
Expand All @@ -52,8 +70,6 @@ functional_profile:
mappers:
bbmap: False
bowtie2: False
assembly: False
binning: False


#########################
Expand Down Expand Up @@ -190,8 +206,3 @@ bowtie2:
attribute_type: "" # Attribute type to summarize counts for, default is "gene_id" (any attribute in the GTF file's attribute field can be used)
extra: "" # Extra featureCount command line parameters


#########################
# Assembly
#########################
# Assembly workflow was removed in StaG v0.7.0
6 changes: 0 additions & 6 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ from scripts.common import UserMessages, SampleSheet
user_messages = UserMessages()

stag_version = "0.7.1"
singularity_branch_tag = "-develop" # Replace with "-master" before publishing new version

configfile: "config/config.yaml"
report: "report/workflow.rst"
Expand All @@ -31,7 +30,6 @@ citations = {publications["StaG"], publications["Snakemake"]}
INPUTDIR = Path(config["inputdir"])
OUTDIR = Path(config["outdir"])
LOGDIR = Path(config["logdir"])
DBDIR = Path(config["dbdir"])
all_outputs = []

if config["samplesheet"]:
Expand Down Expand Up @@ -100,10 +98,6 @@ include: "rules/functional_profiling/humann.smk"
include: "rules/mappers/bbmap.smk"
include: "rules/mappers/bowtie2.smk"

#############################
# Assembly
#############################

#############################
# MultiQC
#############################
Expand Down
2 changes: 1 addition & 1 deletion workflow/envs/humann.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ channels:
- bioconda
- defaults
dependencies:
- humann=3.7
- humann=3.8
2 changes: 1 addition & 1 deletion workflow/envs/krakenuniq.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- krakenuniq =1.0.3
- krakenuniq =1.0.4
2 changes: 1 addition & 1 deletion workflow/envs/metaphlan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ channels:
- bioconda
- defaults
dependencies:
- metaphlan =4.0.6
- metaphlan =4.1.0
- krona =2.8.1
19 changes: 9 additions & 10 deletions workflow/envs/stag-mwc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@ channels:
- defaults
dependencies:
- python =3.10.9
- fastp =0.23.2
- bbmap =39.01
- kaiju =1.9.2
- kraken2 =2.1.2
- bracken =2.8
- bbmap =39.06
- bracken =2.9
- fastp =0.23.4
- kaiju =1.10.1
- kraken2 =2.1.3
- krona =2.8.1
- matplotlib =3.7.1
- multiqc =1.14
- multiqc =1.21
- pandas =2.0.0
- pigz =2.6
- sambamba =1.0
- samtools =1.19.2
- seaborn =0.12.2
- subread =2.0.3
- sambamba =1.0
- samtools =1.6
- groot =1.1.2
- pigz =2.6
- pip
- pip:
- fastcluster
7 changes: 4 additions & 3 deletions workflow/rules/functional_profiling/humann.smk
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ rule humann:
conda:
"../../envs/humann.yaml"
container:
"oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag
config["containers"]["humann"]
threads: 20
params:
outdir=f"{OUTDIR}/humann/",
Expand Down Expand Up @@ -96,7 +96,7 @@ rule normalize_humann_tables:
conda:
"../../envs/humann.yaml"
container:
"oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag
config["containers"]["humann"]
threads: 1
params:
method=h_config["norm_method"],
Expand Down Expand Up @@ -143,7 +143,7 @@ rule humann_join_tables:
conda:
"../../envs/humann.yaml"
container:
"oras://ghcr.io/ctmrbio/stag-mwc:biobakery"+singularity_branch_tag
config["containers"]["humann"]
threads:
1
params:
Expand Down Expand Up @@ -174,3 +174,4 @@ rule humann_join_tables:
>> {log.stdout} \
2>> {log.stderr}
"""

28 changes: 14 additions & 14 deletions workflow/rules/mappers/bbmap.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# vim: syntax=python expandtab
# Rules for generic read mapping using BBMap
# TODO: Remove superfluous str conversions when Snakemake is pathlib compatible.
from pathlib import Path

from snakemake.exceptions import WorkflowError
Expand All @@ -20,14 +19,14 @@ for bbmap_config in config["bbmap"]:

# Add final output files from this module to 'all_outputs' from the main
# Snakefile scope. SAMPLES is also from the main Snakefile scope.
bbmap_alignments = expand(str(OUTDIR/"bbmap/{db_name}/{sample}.{output_type}"),
bbmap_alignments = expand(OUTDIR/"bbmap/{db_name}/{sample}.{output_type}",
db_name=db_name,
sample=SAMPLES,
output_type=("bam", "covstats.txt", "rpkm.txt"))
counts_table = expand(str(OUTDIR/"bbmap/{db_name}/counts.{column}.tsv"),
counts_table = expand(OUTDIR/"bbmap/{db_name}/counts.{column}.tsv",
db_name=db_name,
column=map(str.strip, bbmap_config["counts_table"]["columns"].split(",")))
featureCounts = expand(str(OUTDIR/"bbmap/{db_name}/all_samples.featureCounts{output_type}"),
featureCounts = expand(OUTDIR/"bbmap/{db_name}/all_samples.featureCounts{output_type}",
db_name=db_name,
sample=SAMPLES,
output_type=["", ".summary", ".table.txt"])
Expand Down Expand Up @@ -64,16 +63,16 @@ for bbmap_config in config["bbmap"]:
bamscript=temp(bbmap_output_folder/"{sample}.bamscript.sh"),
bamfile=bbmap_output_folder/"{sample}.bam" if bbmap_config["keep_bam"] else temp(bbmap_output_folder/"{sample}.bam"),
log:
stdout=str(bbmap_logdir/"{sample}.bbmap.stdout.log"),
stderr=str(bbmap_logdir/"{sample}.bbmap.statsfile.txt"),
stdout=bbmap_logdir/"{sample}.bbmap.stdout.log",
stderr=bbmap_logdir/"{sample}.bbmap.statsfile.txt",
message:
"Mapping {{wildcards.sample}} to {db_name} using BBMap".format(db_name=db_name)
shadow:
"shallow"
conda:
"../../envs/stag-mwc.yaml"
container:
"oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag
config["containers"]["bbmap"]
threads: 8
params:
db_path=bbmap_config["db_path"],
Expand Down Expand Up @@ -108,24 +107,24 @@ for bbmap_config in config["bbmap"]:
f"""Summarize read counts for {db_name}"""
name: f"bbmap_counts_{db_name}"
input:
rpkms=expand(str(OUTDIR/"bbmap/{db_name}/{sample}.rpkm.txt"),
rpkms=expand(OUTDIR/"bbmap/{db_name}/{sample}.rpkm.txt",
db_name=db_name,
sample=SAMPLES)
output:
expand(str(OUTDIR/"bbmap/{db_name}/counts.{column}.tsv"),
expand(OUTDIR/"bbmap/{db_name}/counts.{column}.tsv",
db_name=db_name,
column=map(str.strip, bbmap_config["counts_table"]["columns"].split(","))
)
log:
str(bbmap_logdir/"counts.log")
bbmap_logdir/"counts.log"
message:
"Summarizing read counts for {db_name}".format(db_name=db_name)
shadow:
"shallow"
conda:
"../../envs/stag-mwc.yaml"
container:
"oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag
config["containers"]["stag"]
threads: 1
params:
annotations=bbmap_config["counts_table"]["annotations"],
Expand All @@ -147,23 +146,23 @@ for bbmap_config in config["bbmap"]:
f"""Summarize feature counts for {db_name}"""
name: f"bbmap_feature_counts_{db_name}"
input:
bams=expand(str(OUTDIR/"bbmap/{db_name}/{sample}.bam"),
bams=expand(OUTDIR/"bbmap/{db_name}/{sample}.bam",
db_name=db_name,
sample=SAMPLES)
output:
counts=OUTDIR/"bbmap/{db_name}/all_samples.featureCounts".format(db_name=db_name),
counts_table=OUTDIR/"bbmap/{db_name}/all_samples.featureCounts.table.txt".format(db_name=db_name),
summary=OUTDIR/"bbmap/{db_name}/all_samples.featureCounts.summary".format(db_name=db_name),
log:
str(bbmap_logdir/"all_samples.featureCounts.log")
bbmap_logdir/"all_samples.featureCounts.log"
message:
"Summarizing feature counts for {db_name}".format(db_name=db_name)
shadow:
"shallow"
conda:
"../../envs/stag-mwc.yaml"
container:
"oras://ghcr.io/ctmrbio/stag-mwc:stag-mwc"+singularity_branch_tag
config["containers"]["stag"]
threads: 4
params:
annotations=fc_config["annotations"],
Expand All @@ -183,6 +182,7 @@ for bbmap_config in config["bbmap"]:
{input.bams} \
> {log} \
2>> {log}
cut \
-f1,7- \
{output.counts} \
Expand Down
Loading

0 comments on commit ef444a8

Please sign in to comment.