snakemake · johanneskoester · Feb 21, 2022 · Feb 9, 2022 · Feb 9, 2022 · Feb 9, 2022
diff --git a/bio/star/align/environment.yaml b/bio/star/align/environment.yaml
@@ -2,4 +2,4 @@ channels:
   - bioconda
   - conda-forge
 dependencies:
-  - star ==2.7.9a
+  - star =2.7
diff --git a/bio/star/align/meta.yaml b/bio/star/align/meta.yaml
@@ -3,6 +3,7 @@ description: Map reads with STAR.
 authors:
   - Johannes Köster
   - Tomás Di Domenico
+  - Filipe G. Vieira
 notes: |
   * The `extra` param allows for additional program arguments.
   * It is advisable to consider updating the limits setting before running STAR,

diff --git a/bio/star/align/test/Snakefile b/bio/star/align/test/Snakefile
@@ -7,12 +7,13 @@ rule star_pe_multi:
         fq2=["reads/{sample}_R2.1.fastq", "reads/{sample}_R2.2.fastq"],  #optional
     output:
         # see STAR manual for additional output files
-        "star/pe/{sample}/Aligned.out.sam",
+        sam="star/pe/{sample}/Aligned.out.sam",
+        log="star/pe/{sample}/Log.out",
     log:
         "logs/star/pe/{sample}.log",
     params:
         # path to STAR reference genome index
-        index="index",
+        idx="index",
         # optional parameters
         extra="",
     threads: 8
@@ -25,12 +26,13 @@ rule star_se:
         fq1="reads/{sample}_R1.1.fastq",
     output:
         # see STAR manual for additional output files
-        "star/{sample}/Aligned.out.sam",
+        sam="star/se/{sample}/Aligned.out.sam",
+        log="star/se/{sample}/Log.out",
     log:
-        "logs/star/{sample}.log",
+        "logs/star/se/{sample}.log",
     params:
         # path to STAR reference genome index
-        index="index",
+        idx="index",
         # optional parameters
         extra="",
     threads: 8

diff --git a/bio/star/align/wrapper.py b/bio/star/align/wrapper.py
@@ -37,26 +37,42 @@
 else:
     readcmd = ""
 
-if "SortedByCoordinate" in extra:
-    bamprefix = "Aligned.sortedByCoord.out."
-else:
-    bamprefix = "Aligned.out."
-
-outprefix = snakemake.output[0].split(bamprefix)[0]
-
-if outprefix == os.path.dirname(snakemake.output[0]):
-    outprefix += "/"
+index = snakemake.input.get("idx")
+if not index:
+    index = snakemake.params.get("idx", "")
 
 with tempfile.TemporaryDirectory() as tmpdir:
     shell(
         "STAR "
-        "{extra} "
-        "--runThreadN {snakemake.threads} "
-        "--genomeDir {snakemake.params.index} "
-        "--readFilesIn {input_str} "
-        "{readcmd} "
-        "--outFileNamePrefix {outprefix} "
-        "--outStd Log "
-        "--outTmpDir {tmpdir}/STARtmp "
-        "{log}"
+        " --runThreadN {snakemake.threads}"
+        " --genomeDir {index}"
+        " --readFilesIn {input_str}"
+        " {readcmd}"
+        " {extra}"
+        " --outTmpDir {tmpdir}/temp"
+        " --outFileNamePrefix {tmpdir}/"
+        " --outStd Log "
+        " {log}"
     )
+
+    if "SortedByCoordinate" in extra:
+        bamprefix = "Aligned.sortedByCoord.out"
+    else:
+        bamprefix = "Aligned.out"
+
+    if snakemake.output.get("bam"):
+        shell("cat {tmpdir}/{bamprefix}.bam > {snakemake.output.bam:q}")
+    if snakemake.output.get("sam"):
+        shell("cat {tmpdir}/{bamprefix}.sam > {snakemake.output.sam:q}")
+    if snakemake.output.get("reads_per_gene"):
+        shell("cat {tmpdir}/ReadsPerGene.out.tab > {snakemake.output.reads_per_gene:q}")
+    if snakemake.output.get("chim_junc"):
+        shell("cat {tmpdir}/Chimeric.out.junction > {snakemake.output.chim_junc:q}")
+    if snakemake.output.get("sj"):
+        shell("cat {tmpdir}/SJ.out.tab > {snakemake.output.sj:q}")
+    if snakemake.output.get("log"):
+        shell("cat {tmpdir}/Log.out > {snakemake.output.log:q}")
+    if snakemake.output.get("log_progress"):
+        shell("cat {tmpdir}/Log.progress.out > {snakemake.output.log_progress:q}")
+    if snakemake.output.get("log_final"):
+        shell("cat {tmpdir}/Log.final.out > {snakemake.output.log_final:q}")
diff --git a/bio/star/index/environment.yaml b/bio/star/index/environment.yaml
@@ -2,4 +2,4 @@ channels:
   - bioconda
   - conda-forge
 dependencies:
-  - star ==2.7.8a
+  - star =2.7
diff --git a/bio/star/index/meta.yaml b/bio/star/index/meta.yaml
@@ -3,6 +3,7 @@ description: Index fasta sequences with STAR
 authors:
   - Thibault Dayris
   - Tomás Di Domenico
+  - Filipe G. Vieira
 input:
   - A (multi)fasta formatted file
 output:

diff --git a/bio/star/index/test/Snakefile b/bio/star/index/test/Snakefile
@@ -1,15 +1,14 @@
 rule star_index:
     input:
-        fasta = "{genome}.fasta"
+        fasta="{genome}.fasta",
     output:
-        directory("{genome}")
+        directory("{genome}"),
     message:
         "Testing STAR index"
-    threads:
-        1
+    threads: 1
     params:
-        extra = ""
+        extra="",
     log:
-        "logs/star_index_{genome}.log"
+        "logs/star_index_{genome}.log",
     wrapper:
         "master/bio/star/index"
diff --git a/bio/star/index/wrapper.py b/bio/star/index/wrapper.py
@@ -5,6 +5,7 @@
 __email__ = "thibault.dayris@gustaveroussy.fr"
 __license__ = "MIT"
 
+import tempfile
 from snakemake.shell import shell
 from snakemake.utils import makedirs
 
@@ -15,21 +16,23 @@
 
 gtf = snakemake.input.get("gtf")
 if gtf is not None:
-    gtf = "--sjdbGTFfile " + gtf
-    sjdb_overhang = "--sjdbOverhang " + sjdb_overhang
+    gtf = f"--sjdbGTFfile {gtf}"
+    sjdb_overhang = f"--sjdbOverhang {sjdb_overhang}"
 else:
     gtf = sjdb_overhang = ""
 
 makedirs(snakemake.output)
 
-shell(
-    "STAR "  # Tool
-    "--runMode genomeGenerate "  # Indexation mode
-    "{extra} "  # Optional parameters
-    "--runThreadN {snakemake.threads} "  # Number of threads
-    "--genomeDir {snakemake.output} "  # Path to output
-    "--genomeFastaFiles {snakemake.input.fasta} "  # Path to fasta files
-    "{sjdb_overhang} "  # Read-len - 1
-    "{gtf} "  # Highly recommended GTF
-    "{log}"  # Logging
-)
+with tempfile.TemporaryDirectory() as tmpdir:
+    shell(
+        "STAR"
+        " --runThreadN {snakemake.threads}"  # Number of threads
+        " --runMode genomeGenerate"  # Indexation mode
+        " --genomeFastaFiles {snakemake.input.fasta}"  # Path to fasta files
+        " {sjdb_overhang}"  # Read-len - 1
+        " {gtf}"  # Highly recommended GTF
+        " {extra}"  # Optional parameters
+        " --outTmpDir {tmpdir}/STARtmp"  # Temp dir
+        " --genomeDir {snakemake.output}"  # Path to output
+        " {log}"  # Logging
+    )
diff --git a/meta/bio/star_arriba/test/Snakefile b/meta/bio/star_arriba/test/Snakefile
@@ -1,57 +1,57 @@
 rule star_index:
     input:
         fasta="resources/genome.fasta",
-        annotation="resources/genome.gtf"
+        annotation="resources/genome.gtf",
     output:
-        directory("resources/star_genome")
+        directory("resources/star_genome"),
     threads: 4
     params:
-        extra="--sjdbGTFfile resources/genome.gtf --sjdbOverhang 100"
+        extra="--sjdbGTFfile resources/genome.gtf --sjdbOverhang 100",
     log:
-        "logs/star_index_genome.log"
+        "logs/star_index_genome.log",
     cache: True
     wrapper:
         "master/bio/star/index"
 
+
 rule star_align:
     input:
         # use a list for multiple fastq files for one sample
         # usually technical replicates across lanes/flowcells
         fq1="reads/{sample}_R1.1.fastq",
-        fq2="reads/{sample}_R2.1.fastq", #optional
-        index="resources/star_genome"
+        fq2="reads/{sample}_R2.1.fastq",  #optional
+        idx="resources/star_genome",
     output:
         # see STAR manual for additional output files
-        "star/{sample}/Aligned.out.bam",
-        "star/{sample}/ReadsPerGene.out.tab"
+        bam="star/{sample}/Aligned.out.bam",
+        reads_per_gene="star/{sample}/ReadsPerGene.out.tab",
     log:
-        "logs/star/{sample}.log"
+        "logs/star/{sample}.log",
     params:
-        # path to STAR reference genome index
-        index="resources/star_genome",
         # specific parameters to work well with arriba
         extra="--quantMode GeneCounts --sjdbGTFfile resources/genome.gtf"
-            " --outSAMtype BAM Unsorted --chimSegmentMin 10 --chimOutType WithinBAM SoftClip"
-            " --chimJunctionOverhangMin 10 --chimScoreMin 1 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0"
-            " --chimScoreSeparation 1 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentReadGapMax 3"
+        " --outSAMtype BAM Unsorted --chimSegmentMin 10 --chimOutType WithinBAM SoftClip"
+        " --chimJunctionOverhangMin 10 --chimScoreMin 1 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0"
+        " --chimScoreSeparation 1 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentReadGapMax 3",
     threads: 12
     wrapper:
         "master/bio/star/align"
 
+
 rule arriba:
     input:
         bam="star/{sample}/Aligned.out.bam",
         genome="resources/genome.fasta",
-        annotation="resources/genome.gtf"
+        annotation="resources/genome.gtf",
     output:
         fusions="results/arriba/{sample}.fusions.tsv",
-        discarded="results/arriba/{sample}.fusions.discarded.tsv"
+        discarded="results/arriba/{sample}.fusions.discarded.tsv",
     params:
         # A tsv containing identified artifacts, such as read-through fusions of neighbouring genes, see https://arriba.readthedocs.io/en/latest/input-files/#blacklist
         blacklist="arriba_blacklist.tsv",
-        extra="-T -P -i 1,2" # -i describes the wanted contigs, remove if you want to use all hg38 chromosomes
+        extra="-T -P -i 1,2",  # -i describes the wanted contigs, remove if you want to use all hg38 chromosomes
     log:
-        "logs/arriba/{sample}.log"
+        "logs/arriba/{sample}.log",
     threads: 1
     wrapper:
         "master/bio/arriba"
diff --git a/test.py b/test.py
@@ -2599,7 +2599,7 @@ def test_star_align():
 
     run(
         "bio/star/align",
-        ["snakemake", "--cores", "1", "star/a/Aligned.out.sam", "--use-conda", "-F"],
+        ["snakemake", "--cores", "1", "star/se/a/Aligned.out.sam", "--use-conda", "-F"],
     )
     run(
         "bio/star/align",