From ffc8f9eff9a42c0b15ee91aa4a9565a0c5d356b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Fri, 19 Aug 2022 17:17:10 +0200 Subject: [PATCH] feat: support branches (e.g. plants) in ensembl wrappers for sequence, annotation, and variation download --- bio/reference/ensembl-annotation/test/Snakefile | 2 ++ bio/reference/ensembl-annotation/wrapper.py | 2 ++ bio/reference/ensembl-sequence/test/Snakefile | 14 ++++++++------ bio/reference/ensembl-sequence/wrapper.py | 2 ++ bio/reference/ensembl-variation/test/Snakefile | 1 + bio/reference/ensembl-variation/wrapper.py | 9 ++++++--- 6 files changed, 21 insertions(+), 9 deletions(-) diff --git a/bio/reference/ensembl-annotation/test/Snakefile b/bio/reference/ensembl-annotation/test/Snakefile index 284577b267..0a6f8a12b0 100644 --- a/bio/reference/ensembl-annotation/test/Snakefile +++ b/bio/reference/ensembl-annotation/test/Snakefile @@ -6,6 +6,7 @@ rule get_annotation: release="87", build="GRCh37", flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP. + # branch="plants", # optional: specify branch log: "logs/get_annotation.log", cache: True # save space and time with between workflow caching (see docs) @@ -21,6 +22,7 @@ rule get_annotation_gz: release="87", build="GRCh37", flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP. + # branch="plants", # optional: specify branch log: "logs/get_annotation.log", cache: True # save space and time with between workflow caching (see docs) diff --git a/bio/reference/ensembl-annotation/wrapper.py b/bio/reference/ensembl-annotation/wrapper.py index bb27ac5499..dc0f8506e0 100644 --- a/bio/reference/ensembl-annotation/wrapper.py +++ b/bio/reference/ensembl-annotation/wrapper.py @@ -24,6 +24,8 @@ if release >= 81 and build == "GRCh37": # use the special grch37 branch for new releases branch = "grch37/" +elif snakemake.params.get("branch"): + branch = snakemake.params.branch + "/" flavor = snakemake.params.get("flavor", "") diff --git a/bio/reference/ensembl-sequence/test/Snakefile b/bio/reference/ensembl-sequence/test/Snakefile index 510e81618b..487dd907fb 100644 --- a/bio/reference/ensembl-sequence/test/Snakefile +++ b/bio/reference/ensembl-sequence/test/Snakefile @@ -1,28 +1,30 @@ rule get_genome: output: - "refs/genome.fasta" + "refs/genome.fasta", params: species="saccharomyces_cerevisiae", datatype="dna", build="R64-1-1", - release="98" + release="98", log: - "logs/get_genome.log" + "logs/get_genome.log", cache: True # save space and time with between workflow caching (see docs) wrapper: "master/bio/reference/ensembl-sequence" + rule get_chromosome: output: - "refs/chr1.fasta" + "refs/chr1.fasta", params: species="saccharomyces_cerevisiae", datatype="dna", build="R64-1-1", release="101", - chromosome="I" + chromosome="I", # optional: restrict to chromosome + # branch="plants", # optional: specify branch log: - "logs/get_genome.log" + "logs/get_genome.log", cache: True # save space and time with between workflow caching (see docs) wrapper: "master/bio/reference/ensembl-sequence" diff --git a/bio/reference/ensembl-sequence/wrapper.py b/bio/reference/ensembl-sequence/wrapper.py index 09070857c2..50ea7d46b9 100644 --- a/bio/reference/ensembl-sequence/wrapper.py +++ b/bio/reference/ensembl-sequence/wrapper.py @@ -16,6 +16,8 @@ if release >= 81 and build == "GRCh37": # use the special grch37 branch for new releases branch = "grch37/" +elif snakemake.params.get("branch"): + branch = snakemake.params.branch + "/" log = snakemake.log_fmt_shell(stdout=False, stderr=True) diff --git a/bio/reference/ensembl-variation/test/Snakefile b/bio/reference/ensembl-variation/test/Snakefile index df64321c60..765aa668ab 100644 --- a/bio/reference/ensembl-variation/test/Snakefile +++ b/bio/reference/ensembl-variation/test/Snakefile @@ -11,6 +11,7 @@ rule get_variation: build="R64-1-1", type="all", # one of "all", "somatic", "structural_variation" # chromosome="21", # optionally constrain to chromosome, only supported for homo_sapiens + # branch="plants", # optional: specify branch log: "logs/get_variation.log", cache: True # save space and time with between workflow caching (see docs) diff --git a/bio/reference/ensembl-variation/wrapper.py b/bio/reference/ensembl-variation/wrapper.py index d6ebb12b86..7282498c22 100644 --- a/bio/reference/ensembl-variation/wrapper.py +++ b/bio/reference/ensembl-variation/wrapper.py @@ -16,14 +16,17 @@ type = snakemake.params.type chromosome = snakemake.params.get("chromosome", "") -if release < 98: - print("Ensembl releases <98 are unsupported.", file=open(snakemake.log[0], "w")) - exit(1) branch = "" if release >= 81 and build == "GRCh37": # use the special grch37 branch for new releases branch = "grch37/" +elif snakemake.params.get("branch"): + branch = snakemake.params.branch + "/" + +if release < 98 and not branch: + print("Ensembl releases <98 are unsupported.", file=open(snakemake.log[0], "w")) + exit(1) log = snakemake.log_fmt_shell(stdout=False, stderr=True)