Skip to content

Commit

Permalink
Add list of genes to include in the report
Browse files Browse the repository at this point in the history
  • Loading branch information
Redmar-van-den-Berg committed Dec 5, 2024
1 parent 668dc0a commit e280912
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ v2.2.1-dev
**********
* Add novel module, **expression**, which analyzes gene expression
* Add optional input `strandedness` to the sample configuration
* Add json output file for the expression module

**********
v2.1.3
Expand Down
25 changes: 25 additions & 0 deletions includes/expression/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ rule all:
input:
multiqc="multiqc_expression.html",
seamless=[f"{sample.sample}/expression/seAMLess.tsv" for sample in samples],
json=[module_output.json(sample) for sample in samples],


rule normalized_coverage:
Expand Down Expand Up @@ -74,6 +75,30 @@ rule transform_counts:
"""


rule json_output:
input:
coverage=module_output.normalized_expression,
src=srcdir("scripts/json-output.py"),
params:
strandedness=get_strand,
genes=config["report"],
output:
"{sample}/expression/expression-output.json",
log:
"log/expression.{sample}.txt",
container:
containers["pysam"]
shell:
"""
python3 {input.src} \
--coverage {input.coverage} \
--strandedness {params.strandedness} \
--genes {params.genes} \
--sample {wildcards.sample} \
> {output} 2> {log}
"""


rule merge_samples:
input:
counts=[module_output.normalized_expression(sample) for sample in samples],
Expand Down
41 changes: 36 additions & 5 deletions includes/expression/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def set_housekeeping_genes():
config["housekeeping"] = str_to_list(housekeeping)


def set_pdf_report_genes():
report_genes = config.get("report")
config["report"] = str_to_list(report_genes)


## Input functions ##
def get_bam(wildcards):
return pep.sample_table.loc[wildcards.sample, "bam"]
Expand Down Expand Up @@ -79,24 +84,38 @@ def check_housekeeping():
raise RuntimeError(msg)


def check_bed_genes_of_interest():
"""Check for duplicates genes bed file and genes_of_interest"""
def _bed_names():
"""Get gene names from the Bed file"""
if "bed" not in config:
return
return list()

# Get the names from the bed file
names = list()
with open(config["bed"]) as fin:
for line in fin:
spline = line.strip("\n").split("\t")
names.append(spline[3])
return names


def check_bed_genes_of_interest():
"""Check for duplicates genes bed file and genes_of_interest"""
# Get the names from the bed file
names = _bed_names()

for gene in config["genes_of_interest"]:
if gene in names:
msg = f"{gene} is specified twice: in the bed file and genes_of_interest"
raise RuntimeError(msg)


def check_genes_for_report():
quantified_genes = _bed_names() + config["genes_of_interest"]
for gene in config["report"]:
if gene not in quantified_genes:
msg = f"{gene} is specified for the report, but not in genes_of_interest or the bed file"
raise RuntimeError(msg)


## Functions for module outputs ##
def coverage(wildcards):
return f"{wildcards.sample}/expression/coverage.csv"
Expand All @@ -113,11 +132,23 @@ def multiqc_files():
return unstranded + stranded


def get_json(wildcards):
return f"{wildcards.sample}/expression/expression-output.json"


# Set default values, parse specified values
set_genes_of_interest()
set_housekeeping_genes()
set_pdf_report_genes()

# Check the sanity of the reported settings
check_housekeeping()
check_bed_genes_of_interest()
check_genes_for_report()

module_output = SimpleNamespace(
coverage=coverage, normalized_expression=normalized, multiqc_files=multiqc_files()
coverage=coverage,
normalized_expression=normalized,
multiqc_files=multiqc_files(),
json=get_json,
)
46 changes: 46 additions & 0 deletions includes/expression/scripts/json-output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env python

import json
import argparse
import os

from multiqc import read_expression

def fusion_results(fname):
# Initialise the results dictionary
with open(fname) as fin:
return json.load(fin)

def main(args):
""" Create json output of expression results """

# Create results dictionary
results = {
"metadata": {
"sample_name": args.sample
},
"genes": dict()
}

# Read the normalized expression for the specified strand
expression = read_expression(args.coverage, args.strandedness)

# Extract the genes of interest
genes = dict()
for gene in args.genes:
genes[gene] = expression[gene]

results["genes"] = genes
print(json.dumps({"expression": results}, sort_keys=True, indent=2))


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--coverage', help='Normalized coverage file')
parser.add_argument('--sample', help = 'Sample name')
parser.add_argument('--strandedness', help='Strandedness of the sample')
parser.add_argument('--genes', nargs='*', default=list(), help='genes to include')

args = parser.parse_args()
main(args)

9 changes: 8 additions & 1 deletion test/test_expression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
contains:
- "rule merge_samples"
- "rule transform_counts"
- "rule json_output"

- name: Test error on unknown housekeeping gene
tags:
Expand Down Expand Up @@ -62,6 +63,7 @@
--config
pepfile=test/pep/expression.csv
bed=test/data/reference/transcripts_chrM.bed
report="MT-ND4 MT-TH"
exit_code: 0
files:
- path: SRR8615409/expression/coverage.csv
Expand Down Expand Up @@ -94,6 +96,10 @@
- path: multiqc_expression.html
contains:
- "MT-CO2"
- path: SRR8615409/expression/expression-output.json
contains:
- "MT-ND4"
- "MT-TH"

- name: Run the expression module with genes of interest
tags:
Expand All @@ -106,7 +112,7 @@
--configfile test/data/config/expression.json
--config
pepfile=test/pep/expression.csv
genes_of_interest="MT-CO2"
genes_of_interest="MT-CO2 MT-ND4 MT-TH"
exit_code: 0
files:
- path: SRR8615409/expression/coverage.csv
Expand All @@ -126,6 +132,7 @@
tags:
- functional
- expression
- current
command: >
snakemake
--snakefile includes/expression/Snakefile
Expand Down

0 comments on commit e280912

Please sign in to comment.