Skip to content

Commit

Permalink
feat(rules): support CellRanger
Browse files Browse the repository at this point in the history
Due to ongoing file formatting issues with STAR, we have reverted to Cellranger. This commits also add support for the necessary mkgtf and mkref calls required for a custom cellranger reference.

BREAKING CHANGE: Though the reversion does not impact apparent function significantly, the outputs and underlying analysis are changed enough to warrant this BREAKING.
  • Loading branch information
rbpatt2019 committed Dec 9, 2021
1 parent c71d2e7 commit 4398450
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 71 deletions.
92 changes: 92 additions & 0 deletions workflow/rules/cellranger.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
rule mkgtf:
input:
gtf=rules.move_coordinates.output.gtf,
bin=rules.get_cellranger.output.bin,
output:
gtf="resources/genome_filtered.gtf",
log:
"results/logs/mkgtf/mkgtf.log",
benchmark:
"results/benchmarks/mkgtf/mkgtf.txt"
shell:
"{input.bin} "
"mkgtf "
"{input.gtf} "
"{output.gtf} "
"--attribute=gene_biotype:protein_coding "
"--attribute=gene_biotype:lincRNA "
"--attribute=gene_biotype:antisense "
"--attribute=gene_biotype:IG_LV_gene "
"--attribute=gene_biotype:IG_V_gene "
"--attribute=gene_biotype:IG_V_pseudogene "
"--attribute=gene_biotype:IG_D_gene "
"--attribute=gene_biotype:IG_J_gene "
"--attribute=gene_biotype:IG_J_pseudogene "
"--attribute=gene_biotype:IG_C_gene "
"--attribute=gene_biotype:IG_C_pseudogene "
"--attribute=gene_biotype:TR_V_gene "
"--attribute=gene_biotype:TR_V_pseudogene "
"--attribute=gene_biotype:TR_D_gene "
"--attribute=gene_biotype:TR_J_gene "
"--attribute=gene_biotype:TR_J_pseudogene "
"--attribute=gene_biotype:TR_C_gene"


rule mkref:
input:
gtf=rules.mkgtf.output.gtf,
fa=rules.get_fa.output.fa,
bin=rules.get_cellranger.output.bin,
output:
ref=directory("resources/ref_genome"),
log:
"results/logs/mkref/mkref.log",
benchmark:
"results/benchmarks/mkref/mkref.txt"
shell:
"{input.bin} "
"mkref "
"--genome={output.ref} "
"--genes={input.gtf} "
"--fasta={input.fa} "


rule count:
input:
R1="data/{sample}_S1_L00{lane}_R1_001.fastq.gz",
R2="data/{sample}_S1_L00{lane}_R2_001.fastq.gz",
bin=rules.get_cellranger.output.bin,
genome=rules.mkref.output.ref,
output:
results=directory("results/counts/{sample}_{lane}/outs/raw_feature_bc_matrix"),
mtx="results/counts/{sample}_{lane}/outs/raw_feature_bc_matrix/matrix.mtx.gz",
html=report(
"results/counts/{sample}_{lane}/outs/web_summary.html",
caption="../reports/counts.rst",
category="2. Cellranger Counts",
subcategory="{sample}_{lane}",
),
params:
introns=convert_introns(),
n_cells=config["counts"]["n_cells"],
mem=config["counts"]["mem"],
log:
"results/logs/counts/{sample}_{lane}.log",
benchmark:
"results/benchmarks/counts/{sample}_{lane}.txt"
threads: 16
shell:
"{input.bin} "
"count "
"--nosecondary "
"{params.introns} "
"--id {wildcards.sample}_{wildcards.lane} "
"--transcriptome {input.genome} "
"--fastqs data "
"--sample {wildcards.sample} "
"--lanes {wildcards.lane} "
"--expect-cells {params.n_cells} "
"--localcores {threads} "
"--localmem {params.mem} "
"&> {log} && "
"mv {wildcards.sample}_{wildcards.lane} results/counts/{wildcards.sample}_{wildcards.lane} "
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
# Gather input resources for STAR


rule get_whitelist:
params:
url=config["get_whitelist"]["url"],
rule get_cellranger:
output:
wl="resources/whitelist.txt.gz",
cr=directory("resources/cellranger"),
bin="resources/cellranger/bin/cellranger",
params:
url=config["get_cellranger"]["url"],
log:
"results/logs/get_whitelist/get_whitelist.log",
"results/logs/get_cellranger/get_cellranger.log",
benchmark:
"results/benchmarks/get_whitelist/get_whitelist.txt"
"results/benchmarks/get_cellranger/get_cellranger.txt"
shell:
"wget "
"--no-verbose "
"-O {output.wl} "
"{params.url} "
"&> {log}"
"wget --no-verbose -O- {params.url} | "
"tar -xzf - -C resources && "
"rm -rf resources/cellranger.tar.gz && "
"mv resources/cellranger-* resources/cellranger "


rule get_gtf:
Expand Down
60 changes: 0 additions & 60 deletions workflow/rules/star.smk

This file was deleted.

0 comments on commit 4398450

Please sign in to comment.