diff --git a/bio/star/align/environment.yaml b/bio/star/align/environment.yaml index 2e10fedebf..b7f7eef6f1 100644 --- a/bio/star/align/environment.yaml +++ b/bio/star/align/environment.yaml @@ -2,4 +2,4 @@ channels: - bioconda - conda-forge dependencies: - - star ==2.7.9a + - star =2.7 diff --git a/bio/star/align/meta.yaml b/bio/star/align/meta.yaml index 8aebe8c88a..8ca5f03fae 100644 --- a/bio/star/align/meta.yaml +++ b/bio/star/align/meta.yaml @@ -3,6 +3,7 @@ description: Map reads with STAR. authors: - Johannes Köster - Tomás Di Domenico + - Filipe G. Vieira notes: | * The `extra` param allows for additional program arguments. * It is advisable to consider updating the limits setting before running STAR, diff --git a/bio/star/align/test/Snakefile b/bio/star/align/test/Snakefile index 59ca52c007..d2bff02ef5 100644 --- a/bio/star/align/test/Snakefile +++ b/bio/star/align/test/Snakefile @@ -7,12 +7,13 @@ rule star_pe_multi: fq2=["reads/{sample}_R2.1.fastq", "reads/{sample}_R2.2.fastq"], #optional output: # see STAR manual for additional output files - "star/pe/{sample}/Aligned.out.sam", + sam="star/pe/{sample}/Aligned.out.sam", + log="star/pe/{sample}/Log.out", log: "logs/star/pe/{sample}.log", params: # path to STAR reference genome index - index="index", + idx="index", # optional parameters extra="", threads: 8 @@ -25,12 +26,13 @@ rule star_se: fq1="reads/{sample}_R1.1.fastq", output: # see STAR manual for additional output files - "star/{sample}/Aligned.out.sam", + sam="star/se/{sample}/Aligned.out.sam", + log="star/se/{sample}/Log.out", log: - "logs/star/{sample}.log", + "logs/star/se/{sample}.log", params: # path to STAR reference genome index - index="index", + idx="index", # optional parameters extra="", threads: 8 diff --git a/bio/star/align/wrapper.py b/bio/star/align/wrapper.py index 834e41cc66..3861864b25 100644 --- a/bio/star/align/wrapper.py +++ b/bio/star/align/wrapper.py @@ -37,26 +37,42 @@ else: readcmd = "" -if "SortedByCoordinate" in extra: - bamprefix = "Aligned.sortedByCoord.out." -else: - bamprefix = "Aligned.out." - -outprefix = snakemake.output[0].split(bamprefix)[0] - -if outprefix == os.path.dirname(snakemake.output[0]): - outprefix += "/" +index = snakemake.input.get("idx") +if not index: + index = snakemake.params.get("idx", "") with tempfile.TemporaryDirectory() as tmpdir: shell( "STAR " - "{extra} " - "--runThreadN {snakemake.threads} " - "--genomeDir {snakemake.params.index} " - "--readFilesIn {input_str} " - "{readcmd} " - "--outFileNamePrefix {outprefix} " - "--outStd Log " - "--outTmpDir {tmpdir}/STARtmp " - "{log}" + " --runThreadN {snakemake.threads}" + " --genomeDir {index}" + " --readFilesIn {input_str}" + " {readcmd}" + " {extra}" + " --outTmpDir {tmpdir}/temp" + " --outFileNamePrefix {tmpdir}/" + " --outStd Log " + " {log}" ) + + if "SortedByCoordinate" in extra: + bamprefix = "Aligned.sortedByCoord.out" + else: + bamprefix = "Aligned.out" + + if snakemake.output.get("bam"): + shell("cat {tmpdir}/{bamprefix}.bam > {snakemake.output.bam:q}") + if snakemake.output.get("sam"): + shell("cat {tmpdir}/{bamprefix}.sam > {snakemake.output.sam:q}") + if snakemake.output.get("reads_per_gene"): + shell("cat {tmpdir}/ReadsPerGene.out.tab > {snakemake.output.reads_per_gene:q}") + if snakemake.output.get("chim_junc"): + shell("cat {tmpdir}/Chimeric.out.junction > {snakemake.output.chim_junc:q}") + if snakemake.output.get("sj"): + shell("cat {tmpdir}/SJ.out.tab > {snakemake.output.sj:q}") + if snakemake.output.get("log"): + shell("cat {tmpdir}/Log.out > {snakemake.output.log:q}") + if snakemake.output.get("log_progress"): + shell("cat {tmpdir}/Log.progress.out > {snakemake.output.log_progress:q}") + if snakemake.output.get("log_final"): + shell("cat {tmpdir}/Log.final.out > {snakemake.output.log_final:q}") diff --git a/bio/star/index/environment.yaml b/bio/star/index/environment.yaml index a69cb3f38f..b7f7eef6f1 100644 --- a/bio/star/index/environment.yaml +++ b/bio/star/index/environment.yaml @@ -2,4 +2,4 @@ channels: - bioconda - conda-forge dependencies: - - star ==2.7.8a + - star =2.7 diff --git a/bio/star/index/meta.yaml b/bio/star/index/meta.yaml index 2909f75b91..4d604d03ea 100644 --- a/bio/star/index/meta.yaml +++ b/bio/star/index/meta.yaml @@ -3,6 +3,7 @@ description: Index fasta sequences with STAR authors: - Thibault Dayris - Tomás Di Domenico + - Filipe G. Vieira input: - A (multi)fasta formatted file output: diff --git a/bio/star/index/test/Snakefile b/bio/star/index/test/Snakefile index 43762dd8bc..84cdc2212d 100644 --- a/bio/star/index/test/Snakefile +++ b/bio/star/index/test/Snakefile @@ -1,15 +1,14 @@ rule star_index: input: - fasta = "{genome}.fasta" + fasta="{genome}.fasta", output: - directory("{genome}") + directory("{genome}"), message: "Testing STAR index" - threads: - 1 + threads: 1 params: - extra = "" + extra="", log: - "logs/star_index_{genome}.log" + "logs/star_index_{genome}.log", wrapper: "master/bio/star/index" diff --git a/bio/star/index/wrapper.py b/bio/star/index/wrapper.py index 341a0eb09b..7b2ff20e8a 100644 --- a/bio/star/index/wrapper.py +++ b/bio/star/index/wrapper.py @@ -5,6 +5,7 @@ __email__ = "thibault.dayris@gustaveroussy.fr" __license__ = "MIT" +import tempfile from snakemake.shell import shell from snakemake.utils import makedirs @@ -15,21 +16,23 @@ gtf = snakemake.input.get("gtf") if gtf is not None: - gtf = "--sjdbGTFfile " + gtf - sjdb_overhang = "--sjdbOverhang " + sjdb_overhang + gtf = f"--sjdbGTFfile {gtf}" + sjdb_overhang = f"--sjdbOverhang {sjdb_overhang}" else: gtf = sjdb_overhang = "" makedirs(snakemake.output) -shell( - "STAR " # Tool - "--runMode genomeGenerate " # Indexation mode - "{extra} " # Optional parameters - "--runThreadN {snakemake.threads} " # Number of threads - "--genomeDir {snakemake.output} " # Path to output - "--genomeFastaFiles {snakemake.input.fasta} " # Path to fasta files - "{sjdb_overhang} " # Read-len - 1 - "{gtf} " # Highly recommended GTF - "{log}" # Logging -) +with tempfile.TemporaryDirectory() as tmpdir: + shell( + "STAR" + " --runThreadN {snakemake.threads}" # Number of threads + " --runMode genomeGenerate" # Indexation mode + " --genomeFastaFiles {snakemake.input.fasta}" # Path to fasta files + " {sjdb_overhang}" # Read-len - 1 + " {gtf}" # Highly recommended GTF + " {extra}" # Optional parameters + " --outTmpDir {tmpdir}/STARtmp" # Temp dir + " --genomeDir {snakemake.output}" # Path to output + " {log}" # Logging + ) diff --git a/meta/bio/star_arriba/test/Snakefile b/meta/bio/star_arriba/test/Snakefile index 2d6b7bbd43..efcd6cea2a 100644 --- a/meta/bio/star_arriba/test/Snakefile +++ b/meta/bio/star_arriba/test/Snakefile @@ -1,57 +1,57 @@ rule star_index: input: fasta="resources/genome.fasta", - annotation="resources/genome.gtf" + annotation="resources/genome.gtf", output: - directory("resources/star_genome") + directory("resources/star_genome"), threads: 4 params: - extra="--sjdbGTFfile resources/genome.gtf --sjdbOverhang 100" + extra="--sjdbGTFfile resources/genome.gtf --sjdbOverhang 100", log: - "logs/star_index_genome.log" + "logs/star_index_genome.log", cache: True wrapper: "master/bio/star/index" + rule star_align: input: # use a list for multiple fastq files for one sample # usually technical replicates across lanes/flowcells fq1="reads/{sample}_R1.1.fastq", - fq2="reads/{sample}_R2.1.fastq", #optional - index="resources/star_genome" + fq2="reads/{sample}_R2.1.fastq", #optional + idx="resources/star_genome", output: # see STAR manual for additional output files - "star/{sample}/Aligned.out.bam", - "star/{sample}/ReadsPerGene.out.tab" + bam="star/{sample}/Aligned.out.bam", + reads_per_gene="star/{sample}/ReadsPerGene.out.tab", log: - "logs/star/{sample}.log" + "logs/star/{sample}.log", params: - # path to STAR reference genome index - index="resources/star_genome", # specific parameters to work well with arriba extra="--quantMode GeneCounts --sjdbGTFfile resources/genome.gtf" - " --outSAMtype BAM Unsorted --chimSegmentMin 10 --chimOutType WithinBAM SoftClip" - " --chimJunctionOverhangMin 10 --chimScoreMin 1 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0" - " --chimScoreSeparation 1 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentReadGapMax 3" + " --outSAMtype BAM Unsorted --chimSegmentMin 10 --chimOutType WithinBAM SoftClip" + " --chimJunctionOverhangMin 10 --chimScoreMin 1 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0" + " --chimScoreSeparation 1 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentReadGapMax 3", threads: 12 wrapper: "master/bio/star/align" + rule arriba: input: bam="star/{sample}/Aligned.out.bam", genome="resources/genome.fasta", - annotation="resources/genome.gtf" + annotation="resources/genome.gtf", output: fusions="results/arriba/{sample}.fusions.tsv", - discarded="results/arriba/{sample}.fusions.discarded.tsv" + discarded="results/arriba/{sample}.fusions.discarded.tsv", params: # A tsv containing identified artifacts, such as read-through fusions of neighbouring genes, see https://arriba.readthedocs.io/en/latest/input-files/#blacklist blacklist="arriba_blacklist.tsv", - extra="-T -P -i 1,2" # -i describes the wanted contigs, remove if you want to use all hg38 chromosomes + extra="-T -P -i 1,2", # -i describes the wanted contigs, remove if you want to use all hg38 chromosomes log: - "logs/arriba/{sample}.log" + "logs/arriba/{sample}.log", threads: 1 wrapper: "master/bio/arriba" diff --git a/test.py b/test.py index 0208d0c549..57b96fabe1 100644 --- a/test.py +++ b/test.py @@ -2599,7 +2599,7 @@ def test_star_align(): run( "bio/star/align", - ["snakemake", "--cores", "1", "star/a/Aligned.out.sam", "--use-conda", "-F"], + ["snakemake", "--cores", "1", "star/se/a/Aligned.out.sam", "--use-conda", "-F"], ) run( "bio/star/align",