Skip to content

Commit

Permalink
feat: added wrappers (#46)
Browse files Browse the repository at this point in the history
* fix: file paths after cluster migration

* fix: minimap2 indexing rule now uses wrapper

* fix: minimap2 mapping rule now uses wrapper

* fix: rule sam_view SAM to BAM now uses wrapper

* fix: sam_stats qc rule now uses wrapper

* fix: sam_sorts rule now uses wrapper

* fix: updated wrappers

* fix: updated samsort mem allocation

* fix: linting
  • Loading branch information
yeising authored Jul 5, 2024
1 parent 030e041 commit 9c3bfe4
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 31 deletions.
18 changes: 15 additions & 3 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ samples: samples.csv
workflow: "workflow-transcriptome-de_phe"

# this is the input directory. All samples are looked for in this directory
inputdir: "/lustre/project/m2_zdvhpc/transcriptome_data"
inputdir: "/lustre/project/nhr-zdvhpc/transcriptome_data"
# Repository URL:
repo: "https://github.com/snakemake-workflows/transriptome-differential-expression"

## Workflow-specific Parameters:

# Genome fasta (absolute path)
genome: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.fa"
genome: "/lustre/miifs01/project/nhr-zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.fa"
# Annotation GFF/GTF (absolute path)
annotation: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.gff"
annotation: "/lustre/miifs01/project/nhr-zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.gff"
# these samples ought to contain all samples comprising of the

# Minimum read length, put 0 if you want to proceed with all reads.
Expand All @@ -40,9 +40,21 @@ maximum_secondary: 100
# Secondary score ratio (-p for minimap2)
secondary_score_ratio: 1.0

# Samtools view opts, "-b" creates BAM from SAM.
sview_opts: "-b"

# Samtools sort opts,
ssort_opts: ""

# Salmon library type
salmon_libtype: "U"


# QC options

# Samtools stats opts
sstats_opts: ""

# Count filtering options - customize these according to your experimental design:

# Genes expressed in minimum this many samples
Expand Down
32 changes: 14 additions & 18 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ rule filter_reads:
),
output:
temp("filter/{sample}_filtered.fq"),
message:
message:
f"Filtering with read length >= {config['min_length']}."
log:
"logs/filter_reads/{sample}.log",
Expand All @@ -80,40 +80,34 @@ rule filter_reads:

rule build_minimap_index: ## build minimap2 index
input:
transcriptome=rules.genome_to_transcriptome.output,
target=rules.genome_to_transcriptome.output,
output:
index="index/transcriptome_index.mmi",
params:
opts=config["minimap_index_opts"],
extra=config["minimap_index_opts"],
log:
"logs/minimap2/index.log",
conda:
"envs/env.yml"
shell:
"""
minimap2 -t {resources.cpus_per_task} {params.opts} -ax map-ont -d {output.index} {input.transcriptome} 2> {log}
"""
wrapper:
"v3.13.4/bio/minimap2/index"


# mapping reads with minimap2
rule map_reads:
input:
index=rules.build_minimap_index.output.index,
fastq_filtered=rules.filter_reads.output,
target=rules.build_minimap_index.output.index,
query=rules.filter_reads.output,
output:
"alignments/{sample}.sam",
log:
"logs/minimap2/mapping_{sample}.log",
params:
opts=config["minimap2_opts"],
msec=config["maximum_secondary"],
psec=config["secondary_score_ratio"],
extra=f"-p {config['secondary_score_ratio']} -N {config['maximum_secondary']} {config['minimap2_opts']}",
conda:
"envs/env.yml"
shell:
"""
minimap2 -t {resources.cpus_per_task} -ax map-ont -p {params.psec} -N {params.msec} {params.opts} {input.index} {input.fastq} > {output} 2> {log}
"""
wrapper:
"v3.13.4/bio/minimap2/aligner"


rule sam_view:
Expand All @@ -123,10 +117,12 @@ rule sam_view:
"sorted_alignments/{sample}.bam",
log:
"logs/samtools/samview_{sample}.log",
params:
extra=f'{config["sview_opts"]}',
conda:
"envs/env.yml"
shell:
"samtools view -@ {resources.cpus_per_task} -bS {input.sam} > {output} 2> {log}"
wrapper:
"v3.13.4/bio/samtools/view"


rule sam_index:
Expand Down
11 changes: 8 additions & 3 deletions workflow/profile/Mainz-MogonNHR/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,22 @@ default-resources:

set-resources:
genome_to_transcriptome:
cpus_per_task: 1
mem_mb_per_cpu: 1800
runtime: "30m"

filter_reads:
cpus_per_task: 1
mem_mb_per_cpu: 1800
runtime: "2h"

build_minimap_index:
cpus_per_task: 4
mem_mb_per_cpu: 3600
runtime: "1h"
runtime: "30m"

map_reads:
cpus_per_task: 40
cpus_per_task: 32
mem_mb_per_cpu: 1800
runtime: "3h"
slurm_partition: "smallcpu" # needs benchmarking
Expand All @@ -36,7 +41,7 @@ set-resources:

sam_sort:
cpus_per_task: 4
mem_mb_per_cpu: 1800
mem_mb_per_cpu: 3600
runtime: "2h"

sam_view:
Expand Down
16 changes: 9 additions & 7 deletions workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,12 @@ rule sam_sort:
"sorted_alignments/{sample}_sorted.bam",
log:
"logs/samtools/samsort_{sample}.log",
params:
extra=f'{config["ssort_opts"]}',
conda:
"../envs/env.yml"
shell:
"samtools sort -@ {resources.cpus_per_task} {input.sam} -o {output} -O bam &> {log}"
wrapper:
"v3.13.4/bio/samtools/sort"


rule map_qc:
Expand All @@ -134,7 +136,7 @@ rule map_qc:
conda:
"../envs/env.yml"
wrapper:
"v3.12.1/bio/qualimap/bamqc"
"v3.13.4/bio/qualimap/bamqc"


rule compress_map_qc:
Expand All @@ -157,9 +159,9 @@ rule sam_stats:
"QC/samstats/{sample}.txt",
log:
"logs/samtools/samstats_{sample}.log",
params:
extra=f'{config["sstats_opts"]}',
conda:
"../envs/env.yml"
shell:
"""
samtools stats -@ {resources.cpus_per_task} {input.bam} > {output} 2> {log}
"""
wrapper:
"v3.13.4/bio/samtools/stats"

0 comments on commit 9c3bfe4

Please sign in to comment.