Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added STAR temp dir #453

Merged
merged 14 commits into from
Feb 21, 2022
2 changes: 1 addition & 1 deletion bio/star/align/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- bioconda
- conda-forge
dependencies:
- star ==2.7.9a
- star =2.7
1 change: 1 addition & 0 deletions bio/star/align/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ description: Map reads with STAR.
authors:
- Johannes Köster
- Tomás Di Domenico
- Filipe G. Vieira
notes: |
* The `extra` param allows for additional program arguments.
* It is advisable to consider updating the limits setting before running STAR,
Expand Down
12 changes: 7 additions & 5 deletions bio/star/align/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ rule star_pe_multi:
fq2=["reads/{sample}_R2.1.fastq", "reads/{sample}_R2.2.fastq"], #optional
output:
# see STAR manual for additional output files
"star/pe/{sample}/Aligned.out.sam",
sam="star/pe/{sample}/Aligned.out.sam",
log="star/pe/{sample}/Log.out",
log:
"logs/star/pe/{sample}.log",
params:
# path to STAR reference genome index
index="index",
idx="index",
# optional parameters
extra="",
threads: 8
Expand All @@ -25,12 +26,13 @@ rule star_se:
fq1="reads/{sample}_R1.1.fastq",
output:
# see STAR manual for additional output files
"star/{sample}/Aligned.out.sam",
sam="star/se/{sample}/Aligned.out.sam",
log="star/se/{sample}/Log.out",
log:
"logs/star/{sample}.log",
"logs/star/se/{sample}.log",
params:
# path to STAR reference genome index
index="index",
idx="index",
# optional parameters
extra="",
threads: 8
Expand Down
52 changes: 34 additions & 18 deletions bio/star/align/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,42 @@
else:
readcmd = ""

if "SortedByCoordinate" in extra:
bamprefix = "Aligned.sortedByCoord.out."
else:
bamprefix = "Aligned.out."

outprefix = snakemake.output[0].split(bamprefix)[0]

if outprefix == os.path.dirname(snakemake.output[0]):
outprefix += "/"
index = snakemake.input.get("idx")
if not index:
index = snakemake.params.get("idx", "")

with tempfile.TemporaryDirectory() as tmpdir:
shell(
"STAR "
"{extra} "
"--runThreadN {snakemake.threads} "
"--genomeDir {snakemake.params.index} "
"--readFilesIn {input_str} "
"{readcmd} "
"--outFileNamePrefix {outprefix} "
"--outStd Log "
"--outTmpDir {tmpdir}/STARtmp "
"{log}"
" --runThreadN {snakemake.threads}"
" --genomeDir {index}"
" --readFilesIn {input_str}"
" {readcmd}"
" {extra}"
" --outTmpDir {tmpdir}/temp"
" --outFileNamePrefix {tmpdir}/"
" --outStd Log "
" {log}"
)

if "SortedByCoordinate" in extra:
bamprefix = "Aligned.sortedByCoord.out"
else:
bamprefix = "Aligned.out"

if snakemake.output.get("bam"):
shell("cat {tmpdir}/{bamprefix}.bam > {snakemake.output.bam:q}")
if snakemake.output.get("sam"):
shell("cat {tmpdir}/{bamprefix}.sam > {snakemake.output.sam:q}")
if snakemake.output.get("reads_per_gene"):
shell("cat {tmpdir}/ReadsPerGene.out.tab > {snakemake.output.reads_per_gene:q}")
if snakemake.output.get("chim_junc"):
shell("cat {tmpdir}/Chimeric.out.junction > {snakemake.output.chim_junc:q}")
if snakemake.output.get("sj"):
shell("cat {tmpdir}/SJ.out.tab > {snakemake.output.sj:q}")
if snakemake.output.get("log"):
shell("cat {tmpdir}/Log.out > {snakemake.output.log:q}")
if snakemake.output.get("log_progress"):
shell("cat {tmpdir}/Log.progress.out > {snakemake.output.log_progress:q}")
if snakemake.output.get("log_final"):
shell("cat {tmpdir}/Log.final.out > {snakemake.output.log_final:q}")
2 changes: 1 addition & 1 deletion bio/star/index/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- bioconda
- conda-forge
dependencies:
- star ==2.7.8a
- star =2.7
1 change: 1 addition & 0 deletions bio/star/index/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ description: Index fasta sequences with STAR
authors:
- Thibault Dayris
- Tomás Di Domenico
- Filipe G. Vieira
input:
- A (multi)fasta formatted file
output:
Expand Down
11 changes: 5 additions & 6 deletions bio/star/index/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
rule star_index:
input:
fasta = "{genome}.fasta"
fasta="{genome}.fasta",
output:
directory("{genome}")
directory("{genome}"),
message:
"Testing STAR index"
threads:
1
threads: 1
params:
extra = ""
extra="",
log:
"logs/star_index_{genome}.log"
"logs/star_index_{genome}.log",
wrapper:
"master/bio/star/index"
29 changes: 16 additions & 13 deletions bio/star/index/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
__email__ = "thibault.dayris@gustaveroussy.fr"
__license__ = "MIT"

import tempfile
from snakemake.shell import shell
from snakemake.utils import makedirs

Expand All @@ -15,21 +16,23 @@

gtf = snakemake.input.get("gtf")
if gtf is not None:
gtf = "--sjdbGTFfile " + gtf
sjdb_overhang = "--sjdbOverhang " + sjdb_overhang
gtf = f"--sjdbGTFfile {gtf}"
sjdb_overhang = f"--sjdbOverhang {sjdb_overhang}"
else:
gtf = sjdb_overhang = ""

makedirs(snakemake.output)

shell(
"STAR " # Tool
"--runMode genomeGenerate " # Indexation mode
"{extra} " # Optional parameters
"--runThreadN {snakemake.threads} " # Number of threads
"--genomeDir {snakemake.output} " # Path to output
"--genomeFastaFiles {snakemake.input.fasta} " # Path to fasta files
"{sjdb_overhang} " # Read-len - 1
"{gtf} " # Highly recommended GTF
"{log}" # Logging
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"STAR"
" --runThreadN {snakemake.threads}" # Number of threads
" --runMode genomeGenerate" # Indexation mode
" --genomeFastaFiles {snakemake.input.fasta}" # Path to fasta files
" {sjdb_overhang}" # Read-len - 1
" {gtf}" # Highly recommended GTF
" {extra}" # Optional parameters
" --outTmpDir {tmpdir}/STARtmp" # Temp dir
" --genomeDir {snakemake.output}" # Path to output
" {log}" # Logging
)
36 changes: 18 additions & 18 deletions meta/bio/star_arriba/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,57 +1,57 @@
rule star_index:
input:
fasta="resources/genome.fasta",
annotation="resources/genome.gtf"
annotation="resources/genome.gtf",
output:
directory("resources/star_genome")
directory("resources/star_genome"),
threads: 4
params:
extra="--sjdbGTFfile resources/genome.gtf --sjdbOverhang 100"
extra="--sjdbGTFfile resources/genome.gtf --sjdbOverhang 100",
log:
"logs/star_index_genome.log"
"logs/star_index_genome.log",
cache: True
wrapper:
"master/bio/star/index"


rule star_align:
input:
# use a list for multiple fastq files for one sample
# usually technical replicates across lanes/flowcells
fq1="reads/{sample}_R1.1.fastq",
fq2="reads/{sample}_R2.1.fastq", #optional
index="resources/star_genome"
fq2="reads/{sample}_R2.1.fastq", #optional
idx="resources/star_genome",
output:
# see STAR manual for additional output files
"star/{sample}/Aligned.out.bam",
"star/{sample}/ReadsPerGene.out.tab"
bam="star/{sample}/Aligned.out.bam",
reads_per_gene="star/{sample}/ReadsPerGene.out.tab",
log:
"logs/star/{sample}.log"
"logs/star/{sample}.log",
params:
# path to STAR reference genome index
index="resources/star_genome",
# specific parameters to work well with arriba
extra="--quantMode GeneCounts --sjdbGTFfile resources/genome.gtf"
" --outSAMtype BAM Unsorted --chimSegmentMin 10 --chimOutType WithinBAM SoftClip"
" --chimJunctionOverhangMin 10 --chimScoreMin 1 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0"
" --chimScoreSeparation 1 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentReadGapMax 3"
" --outSAMtype BAM Unsorted --chimSegmentMin 10 --chimOutType WithinBAM SoftClip"
" --chimJunctionOverhangMin 10 --chimScoreMin 1 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0"
" --chimScoreSeparation 1 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentReadGapMax 3",
threads: 12
wrapper:
"master/bio/star/align"


rule arriba:
input:
bam="star/{sample}/Aligned.out.bam",
genome="resources/genome.fasta",
annotation="resources/genome.gtf"
annotation="resources/genome.gtf",
output:
fusions="results/arriba/{sample}.fusions.tsv",
discarded="results/arriba/{sample}.fusions.discarded.tsv"
discarded="results/arriba/{sample}.fusions.discarded.tsv",
params:
# A tsv containing identified artifacts, such as read-through fusions of neighbouring genes, see https://arriba.readthedocs.io/en/latest/input-files/#blacklist
blacklist="arriba_blacklist.tsv",
extra="-T -P -i 1,2" # -i describes the wanted contigs, remove if you want to use all hg38 chromosomes
extra="-T -P -i 1,2", # -i describes the wanted contigs, remove if you want to use all hg38 chromosomes
log:
"logs/arriba/{sample}.log"
"logs/arriba/{sample}.log",
threads: 1
wrapper:
"master/bio/arriba"
2 changes: 1 addition & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2599,7 +2599,7 @@ def test_star_align():

run(
"bio/star/align",
["snakemake", "--cores", "1", "star/a/Aligned.out.sam", "--use-conda", "-F"],
["snakemake", "--cores", "1", "star/se/a/Aligned.out.sam", "--use-conda", "-F"],
)
run(
"bio/star/align",
Expand Down