Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
kennyworkman committed Aug 2, 2022
0 parents commit 1c2115a
Show file tree
Hide file tree
Showing 16 changed files with 10,867 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash
eval "$(conda shell.bash hook)"
conda activate wf-core-go-pathway
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__TESTING__/
.latch_report.tar.gz
wf/__pycache__/
build/
node_modules/


104 changes: 104 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
FROM 812206152185.dkr.ecr.us-west-2.amazonaws.com/latch-base:02ab-main

SHELL ["/usr/bin/env", "bash", "-c"]


# Allow --mount=cache to do its job
# https://github.com/moby/buildkit/blob/86c33b66e176a6fc74b88d6f46798d3ec18e2e73/frontend/dockerfile/docs/syntax.md#run---mounttypecache
RUN rm /etc/apt/apt.conf.d/docker-clean
RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache


# Generic installation dependencies
# wget - obvious
# software-properties-common - `add-apt-repository`
# dirmngr - GPG key manager, loads from `/etc/apt/trusted.gpg.d/`
RUN apt-get update && apt-get install --yes --no-install-recommends \
wget \
software-properties-common \
dirmngr


#
# R
#

# >>> Install R
# https://cloud.r-project.org/bin/linux/debian/
# https://github.com/rocker-org/rocker-versioned2/blob/f3325b2cf88d8899ddcb2f0945aa9f87ad150cd7/scripts/install_R_ppa.sh
RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-key '95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7'
RUN add-apt-repository "deb https://cloud.r-project.org/bin/linux/debian $(lsb_release --codename --short)-cran40/"

RUN apt-get update && apt-get install --yes \
r-base \
r-base-dev \
locales

RUN apt-mark hold r-base r-base-dev

RUN echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen
RUN locale-gen en_US.utf8
RUN /usr/sbin/update-locale LANG="en_US.UTF-8"

# >>> R packages
RUN apt-get update
RUN apt install --yes \
libcurl4-openssl-dev \
libxml2-dev \
libssl-dev

RUN Rscript -e 'install.packages("BiocManager")'
RUN Rscript -e 'BiocManager::install(version = "3.15")'
RUN Rscript -e 'BiocManager::install(c( \
"purrr", \
"dplyr", \
"tibble", \
"readr", \
"readxl", \
"stringr", \
"vctrs", \
"clusterProfiler", \
"DOSE", \
"ggridges", \
"enrichplot", \
"ggplot2", \
"msigdbr", \
"pathview" \
), update=FALSE)'

RUN Rscript -e 'BiocManager::install(c( \
"org.Hs.eg.db", \
"org.Mm.eg.db", \
"org.Rn.eg.db", \
"org.Dm.eg.db", \
"org.At.tair.db", \
"org.Sc.sgd.db", \
"org.Dr.eg.db", \
"org.Ce.eg.db", \
"org.Bt.eg.db", \
"org.Ss.eg.db", \
"org.Gg.eg.db", \
"org.Mmu.eg.db", \
"org.Cf.eg.db", \
"org.EcK12.eg.db", \
"org.Xl.eg.db", \
"org.Ag.eg.db", \
"org.Pt.eg.db", \
"org.EcSakai.eg.db", \
"org.Mxanthus.db" \
), update=FALSE)'

RUN python3 -m pip install --upgrade latch imagesize jinja2

# todo: automatically build frontend in Dockerfile

# STOP HERE:
# The following lines are needed to ensure your build environement works
# correctly with latch.
COPY ./report/build/index.html /root/template.html
COPY ./go_pathway.r /root/go_pathway.r
COPY wf /root/wf

ARG tag
ENV FLYTE_INTERNAL_IMAGE $tag
WORKDIR /root
42 changes: 42 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<html>
<p align="center">
<img src="https://user-images.githubusercontent.com/31255434/182289305-4cc620e3-86ae-480f-9b61-6ca83283caa5.jpg" alt="Latch Verified" width="100">
</p>

<h1 align="center">
Pathway Analysis
</h1>

<p align="center">
<strong>
Latch Verified
</strong>
</p>

<p align="center">
Implicate pathways + gene ontologies from a set of desired genes.
</p>

<p align="center">
<a href="https://github.com/latch-verified/pathway/releases/latest">
<img src="https://img.shields.io/github/release/latch-verified/pathway.svg" alt="Current Release" />
</a>
<a href="https://github.com/latch-verified/pathway/actions/workflows/tests.yml">
<img src="https://github.com/latch-verified/pathway/actions/workflows/tests.yml/badge.svg" alt="End-to-End Tests" />
</a>
<a href="https://opensource.org/licenses/MIT">
<img src="https://img.shields.io/badge/LICENSE-MIT-brightgreen.svg" alt="License" />
</a>
<img src="https://img.shields.io/github/commit-activity/w/latch-verified/pathway.svg?style=plastic" alt="Commit Activity" />
<img src="https://img.shields.io/github/commits-since/latch-verified/pathway/latest.svg?style=plastic" alt="Commits since Last Release" />
</p>

<h3 align="center">
<a href="https://console.latch.bio/explore/65992/info">Hosted Interface</a>
<span> · </span>
<a href="https://docs.latch.bio">SDK Documentation</a>
<span> · </span>
<a href="https://join.slack.com/t/latchbiosdk/shared_invite/zt-193ibmedi-WB6mBu2GJ2WejUHhxMOuwg">Slack Community</a>
</h3>

</html>
159 changes: 159 additions & 0 deletions go_pathway.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
options(show.error.locations = TRUE)

p <- function (...) {
cat(as.character(...), sep = "\n")
}

warn <- function(...) {
output <- paste(
"__LATCH_WARNING_START__",
as.character(...),
"__LATCH_WARNING_END__",
sep = "\n"
)
cat(output)
}

p("Importing")
suppressMessages(suppressWarnings(library(vctrs)))
suppressMessages(suppressWarnings(library(dplyr)))
suppressMessages(suppressWarnings(library(tibble)))
suppressMessages(suppressWarnings(library(readr)))
suppressMessages(suppressWarnings(library(readxl)))
suppressMessages(suppressWarnings(library(stringr)))

suppressMessages(suppressWarnings(library(purrr)))

suppressMessages(suppressWarnings(library(clusterProfiler)))
suppressMessages(suppressWarnings(library(DOSE)))
suppressMessages(suppressWarnings(library(ggridges)))
suppressMessages(suppressWarnings(library(enrichplot)))
suppressMessages(suppressWarnings(library(ggplot2)))
suppressMessages(suppressWarnings(library(msigdbr)))
suppressMessages(suppressWarnings(library(pathview)))

read_tabular = function (path) {
tryCatch({
read_excel(path)
}, error = function (cond) {
read_delim(path, trim_ws = TRUE)
})
}

p("Loading gene annotations")
organism = "org.Hs.eg.db"
suppressMessages(suppressWarnings(library(organism, character.only=TRUE)))
msig_db <- msigdbr(species="Homo sapiens") %>% dplyr::select(gs_name, entrez_gene)

p("Loading contrast data")
args <- commandArgs(trailingOnly = TRUE)

res <- read_tabular(args[[1]])
num_pathways <- strtoi(args[[2]])

p("Preparing data")
dsD <- res %>%
mutate(gene_name = .data[[vec_as_names("", repair = "unique")]]) %>%
dplyr::select(gene_name, log2FoldChange) %>%
na.omit %>%
mutate(gene_name = mapIds(org.Hs.eg.db, keys=gene_name, keytype="ALIAS", column="ENTREZID")) %>%
na.omit %>%
distinct(gene_name, .keep_all=TRUE) %>%
column_to_rownames("gene_name")
ds <- dsD$log2FoldChange
names(ds) <- rownames(dsD)
ds <- ds %>% sort(decreasing=TRUE)

p("Running MSig")
start <- Sys.time()
msig <- GSEA(ds, TERM2GENE=msig_db)
print(Sys.time() - start)

head(msig)
p("Running GO")
start <- Sys.time()
go <- gseGO(ds, ont="ALL", organism, keyType="ENTREZID")
print(Sys.time() - start)

head(go)

if (nrow(go) > 0) {
p(" Plotting")
dir.create("/root/res/Gene Ontology", showWarnings = FALSE, recursive = TRUE)
png(file="/root/res/Gene Ontology/Dot Plot.png", width=960, height=900)
print(dotplot(go, showCategory=20, split=".sign") + facet_grid(.~.sign))
dev.off()

png(file="/root/res/Gene Ontology/Ridge Plot.png", width=960, height=900)
ridgeplot(go) + labs(x = "enrichment distribution")
dev.off()
} else {
warn(paste(
"No statistically significant enriched gene sets (with cutoff p ≤ 0.05) found after ",
"running gene set enrichment analysis (GSEA) on Gene Ontology.",
sep = ""
))
}

p("Running KEGG")
start <- Sys.time()
kks <- gseKEGG(ds, "hsa")
print(Sys.time() - start)


if (nrow(kks) > 0) {
p(" Plotting")
dir.create("/root/res/KEGG", showWarnings = FALSE, recursive = TRUE)
png(file="/root/res/KEGG/Dot Plot.png", width=960, height=900)
print(dotplot(kks, showCategory=20, split=".sign") + facet_grid(.~.sign))
dev.off()

png(file="/root/res/KEGG/Ridge Plot.png", width=960, height=900)
ridgeplot(kks) + labs(x = "enrichment distribution")
dev.off()

entrezIDsToGeneNames <- function (entrezIDs) {
return(mapIds(org.Hs.eg.db, entrezIDs, "SYMBOL", "ENTREZID") %>% paste(collapse = " "))
}

pathways <- kks@result %>%
slice_max(order_by = enrichmentScore, n = num_pathways) %>%
mutate(coreEntrezIDs = core_enrichment) %>%
mutate(entrezList = strsplit(core_enrichment, '/'), .keep = "unused") %>%
mutate(coreEnrichedGenes = unlist(lapply(entrezList, entrezIDsToGeneNames)), .keep = "unused")

write.csv(pathways, "/root/res/KEGG/table.csv", row.names = FALSE)

dir.create("/root/tempres", showWarnings = FALSE, recursive = TRUE)
genesets_path <- "/root/tempres/genesets.txt"
geneSets <- kks@geneSets
write("PATHWAYIDS", genesets_path)
lapply(names(geneSets), write, genesets_path, append=TRUE, ncolumns=100000)
write("ENTREZIDS", genesets_path, append=TRUE)
lapply(geneSets, write, genesets_path, append=TRUE, ncolumns=100000)
write("NAMES", genesets_path, append=TRUE)
geneNames <- lapply(geneSets, entrezIDsToGeneNames)
lapply(geneNames, write, genesets_path, append=TRUE, ncolumns=100000)

for (pathwayID in pathways$ID) {
p(paste(" Running pathview on", pathwayID))
pathview(gene.data=ds, pathway.id=pathwayID, species="Homo sapiens")
}
} else {
warn(paste(
"No statistically significant enriched gene sets (with cutoff p ≤ 0.05) found after ",
"running gene set enrichment analysis (GSEA) on KEGG.",
sep = ""
))
}


p(" Plotting")
dir.create("/root/res/MSig", showWarnings = FALSE, recursive = TRUE)
png(file="/root/res/MSig/Dot Plot.png", width=960, height=900)
print(dotplot(msig, showCategory=20, split=".sign") + facet_grid(.~.sign))
dev.off()

png(file="/root/res/MSig/Ridge Plot.png", width=960, height=900)
ridgeplot(msig) + labs(x = "enrichment distribution")
dev.off()
23 changes: 23 additions & 0 deletions report/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.js

# testing
/coverage

# production
/build

# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local

npm-debug.log*
yarn-debug.log*
yarn-error.log*
Loading

0 comments on commit 1c2115a

Please sign in to comment.