Skip to content

Commit

Permalink
updated reference and cleaned code
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhi Zhao committed Jan 29, 2023
1 parent c6f1b3a commit a42d8a8
Show file tree
Hide file tree
Showing 41 changed files with 1,194 additions and 545 deletions.
6 changes: 6 additions & 0 deletions .Rproj.user/66308376/sources/prop/60ACF1F2
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"source_window_id": "",
"Source": "Source",
"cursorPosition": "4,3",
"scrollLine": "0"
}
6 changes: 6 additions & 0 deletions .Rproj.user/66308376/sources/prop/7423F278
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"source_window_id": "",
"Source": "Source",
"cursorPosition": "17,22",
"scrollLine": "4"
}
6 changes: 6 additions & 0 deletions .Rproj.user/66308376/sources/prop/A2037DCB
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"source_window_id": "",
"Source": "Source",
"cursorPosition": "2,25",
"scrollLine": "0"
}
6 changes: 6 additions & 0 deletions .Rproj.user/66308376/sources/prop/C86F79FC
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"source_window_id": "",
"Source": "Source",
"cursorPosition": "14,3",
"scrollLine": "0"
}
6 changes: 6 additions & 0 deletions .Rproj.user/66308376/sources/prop/DA976360
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"source_window_id": "",
"Source": "Source",
"cursorPosition": "16,3",
"scrollLine": "5"
}
6 changes: 6 additions & 0 deletions .Rproj.user/66308376/sources/prop/E3921AFF
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"source_window_id": "",
"Source": "Source",
"cursorPosition": "6,61",
"scrollLine": "0"
}
6 changes: 6 additions & 0 deletions .Rproj.user/66308376/sources/prop/INDEX
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
~%2Fzz%2FEnrichIntersect%2FNEWS.md="A2037DCB"
~%2Fzz%2FEnrichIntersect%2FR%2Fcancers_drug_groups.R="E3921AFF"
~%2Fzz%2FEnrichIntersect%2FR%2Fcancers_genes_drugs.R="C86F79FC"
~%2Fzz%2FEnrichIntersect%2FR%2Fenrichment.R="60ACF1F2"
~%2Fzz%2FEnrichIntersect%2FR%2FintersectSankey.R="DA976360"
~%2Fzz%2FEnrichIntersect%2Ftest.R="7423F278"
26 changes: 26 additions & 0 deletions .Rproj.user/66308376/sources/session-1fd69c21/178A6B36
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"id": "178A6B36",
"path": "~/zz/EnrichIntersect/test.R",
"project_path": "test.R",
"type": "r_source",
"hash": "0",
"contents": "",
"dirty": false,
"created": 1675023587877.0,
"source_on_save": false,
"relative_order": 2,
"properties": {
"source_window_id": "",
"Source": "Source",
"cursorPosition": "17,22",
"scrollLine": "4"
},
"folds": "",
"lastKnownWriteTime": 1675026552,
"encoding": "UTF-8",
"collab_server": "",
"source_window": "",
"last_content_update": 1675026552083,
"read_only": false,
"read_only_alternatives": []
}
24 changes: 24 additions & 0 deletions .Rproj.user/66308376/sources/session-1fd69c21/178A6B36-contents
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
library(styler)
style_pkg(pkg = "/Users/zhiz/zz/EnrichIntersect/R")
library(lintr)
lint_dir(path = "/Users/zhiz/zz/EnrichIntersect/R")

rm(list = ls())
remove.packages("EnrichIntersect")
# Build the package
#devtools::document("/Users/zhiz/zz/EnrichIntersect")
#devtools::build("/Users/zhiz/zz/EnrichIntersect",vignettes=TRUE)
#devtools::check("/Users/zhiz/zz/EnrichIntersect", cran=TRUE)

#"Use terminal to build up package, otherwise problematic to show vignette"
#zhiz$ R CMD build /Users/zhiz/zz/EnrichIntersect
#zhiz$ R CMD INSTALL EnrichIntersect_0.5.tar.gz
#zhiz$ R CMD check EnrichIntersect_0.5.tar.gz --as-cran --run-donttest

## Install the package
install.packages("/Users/zhiz/EnrichIntersect_0.5.tar.gz",repos = NULL,type = "source",build_vignettes = T)


pdf("drug_gene.pdf",width = 4,height = 3.5)
enrich <- enrichment(x, custom.set)
dev.off()
26 changes: 26 additions & 0 deletions .Rproj.user/66308376/sources/session-1fd69c21/3211F4B8
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"id": "3211F4B8",
"path": "~/zz/EnrichIntersect/R/enrichment.R",
"project_path": "R/enrichment.R",
"type": "r_source",
"hash": "1703716571",
"contents": "",
"dirty": false,
"created": 1675023787625.0,
"source_on_save": false,
"relative_order": 5,
"properties": {
"source_window_id": "",
"Source": "Source",
"cursorPosition": "4,3",
"scrollLine": "0"
},
"folds": "",
"lastKnownWriteTime": 1675024549,
"encoding": "UTF-8",
"collab_server": "",
"source_window": "",
"last_content_update": 1675024549359,
"read_only": false,
"read_only_alternatives": []
}
261 changes: 261 additions & 0 deletions .Rproj.user/66308376/sources/session-1fd69c21/3211F4B8-contents
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
#' @title Plot enrichment map
#' @description
#' Plot enrichment map through a vector (matrix) of scores and a self-defined
#' set that summarizes a few groups of the names (rownames) of the vector
#' (matrix)
#'
#' @name enrichment
#'
#' @import ggplot2
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @importFrom stats p.adjust
#'
#' @param x a vector (matrix) of scores to be enriched
#' @param custom.set a self-defined set that summarizes a few groups of the
#' names (rownames) of \code{x}
#' @param alpha exponent weight of the score of ordered features. Default is
#' \code{0} for calculating enrichment score via classic Kolmogorov-Smirnov
#' statistic
#' @param normalize logic value to determine if normalizing enrichment scores,
#' accounting for custom set size. Default is \code{TRUE}
#' @param permute.n number of custom-set permutations for significance
#' testing. Default is 100
#' @param padj.method correction method, one of \code{"holm"},
#' \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"}, \code{"BH"},
#' \code{"BY"}, \code{"fdr"}, \code{"none"}. Default is \code{"none"}
#' @param pvalue.cutoff a cutoff for both unadjusted and adjusted p-value to
#' mark significantly enriched classes. Default is 0.05
#' @param angle angle of rotating x-axis labels. Default is 45
#' @param ... other arguments
#'
#' @return Return a list including a matrix of (normalized) enrichment score,
#' a matrix of corresponding p-value and ggplot object:
#' \itemize{
#' \item S - a matrix of calculated enrichment scores.
#' \item pvalue - a matrix of p-values using permuation test for the calculated enrichment scores.
#' \item g - a ggplot object for visualising the results of an enrichment analysis.
#' }
#'
#' @references Reimand J, Isserlin R, Voisin V, et al (2019). \emph{Pathway
#' enrichment analysis and visualization of omics data using g:profiler, gsea,
#' cytoscape and enrichmentmap}. Nature protocols, 14:482–517.
#'
#' @examples
#' # Data set 'cancers_drug_groups' is a list including a score dataframe with 147 drugs as rows
#' # and 19 cancer types as columns, and a dataframe with 9 self-defined drug groups (1st column)
#' # of the 147 drugs (2nd column).
#' data(cancers_drug_groups, package = "EnrichIntersect")
#'
#' x <- cancers_drug_groups$score
#' custom.set <- cancers_drug_groups$custom.set
#' set.seed(123)
#' enrich <- enrichment(x, custom.set, permute.n = 5)
#'
#' @export
enrichment <- function(x, custom.set, alpha = 0, normalize = TRUE,
permute.n = 100, padj.method = "none",
pvalue.cutoff = 0.05, angle = 45, ...) {
if (is.matrix(x) || is.data.frame(x)) {
if (any(colSums(is.na(x)) == ncol(x)) && ncol(x) > 1) {
stop("The argument 'x' matrix has some columns with all missing values!")
}
} else {
x <- as.matrix(x)
}
if (is.null(colnames(x))) {
colnames(x) <- "X"
}

if (is.matrix(custom.set) || is.data.frame(custom.set)) {
if (dim(custom.set)[2] != 2) {
stop("The argument 'custom.set' has to have two columns!")
}
} else {
stop("The argument 'custom.set' has to be a matrix or dataframe!")
}

if (length(unique(custom.set[[1]])) < length(unique(custom.set[[1]]))) {
stop("The argument 'custom.set' should have more sets/groups (2nd component
of 'custom.set') than unique symbols (1st component of 'custom.set')!")
}

features <- intersect(rownames(x), custom.set[[1]])
groups <- unique(custom.set$group[custom.set[[1]] %in% features])
n_groups <- length(groups)

# initialize some enrichment scores and pvalues
S <- matrix(nrow = ncol(x), ncol = n_groups)
rownames(S) <- colnames(x)
colnames(S) <- groups
pvalue <- S_norm <- S

# Initializes the progress bar
if (ncol(x) > 1) {
pb <-
txtProgressBar(min = 0, max = ncol(x), style = 3, width = 50, char = "=")
} else {
pb <-
txtProgressBar(min = 0, max = n_groups, style = 3, width = 50, char = "=")
}

for (i in seq_len(ncol(x))) {
## define costom sets 'myList'
cutOff <- -Inf # this parameter remains for unordered features for the future
myList <- x[, i]
names(myList) <- features
myList <- sort(myList, decreasing = TRUE)
myList <- myList[myList > cutOff]
n <- length(myList)

for (k in 1:n_groups) { # group index

idx <- rep(NA, n)
for (j in 1:n) {
idx[j] <- names(myList[j]) %in%
custom.set[custom.set$group == groups[k], 1]
}
if (sum(idx)) {
F1 <- cumsum(abs(myList)^alpha * as.numeric(idx)) /
sum(abs(myList)^alpha * as.numeric(idx))
F2 <- cumsum(as.numeric(!idx)) /
(n - sum(custom.set$group == groups[k] &
features %in% names(myList)))

S[i, k] <- max(F1 - F2, na.rm = TRUE)
} else {
S[i, k] <- NA
}

# permutation test
permute_S <- rep(NA, permute.n)
for (permute_i in 1:permute.n) {
permutationIdx <- sample(1:n, n) # n removes missing spearman's x
myList_permute <- myList
names(myList_permute) <- names(myList)[permutationIdx]
for (j in 1:n) {
idx[j] <- names(myList_permute[j]) %in%
custom.set[custom.set$group == groups[k], 1]
}
if (sum(idx)) {
F1 <- cumsum(abs(myList_permute)^alpha * as.numeric(idx)) /
sum(abs(myList_permute)^alpha * as.numeric(idx))
F2 <- cumsum(as.numeric(!idx)) /
(n - sum(custom.set$group == groups[k] &
features %in% names(myList)))

permute_S[permute_i] <- max(F1 - F2, na.rm = TRUE)
}
}
## i) p-value for enrichment score
pvalue[i, k] <- sum(permute_S >= S[i, k], na.rm = TRUE) /
(permute.n - sum(is.na(permute_S)))


## ii) normalized enrichment score
if (normalize) {
S_norm <- S
if (!is.na(S[i, k])) {
if (S[i, k] >= 0) {
S_norm[i, k] <-
S[i, k] / mean(permute_S[permute_S >= 0], na.rm = TRUE)
} else {
S_norm[i, k] <-
S[i, k] / mean(permute_S[permute_S < 0], na.rm = TRUE)
}
}
S <- S_norm
permute_S_norm <- rep(NA, permute.n)
for (permute_i in 1:permute.n) {
if (!is.na(permute_S[permute_i])) {
if (permute_S[permute_i] >= 0) {
permute_S_norm[permute_i] <- permute_S[permute_i] /
mean(permute_S[permute_S >= 0], na.rm = TRUE)
} else {
permute_S_norm[permute_i] <- permute_S[permute_i] /
mean(permute_S[permute_S < 0], na.rm = TRUE)
}
}
}
pvalue[i, k] <- sum(permute_S_norm >= S_norm[i, k], na.rm = TRUE) /
(permute.n - sum(is.na(permute_S_norm)))
}
}
# Sets the progress bar to the current state
setTxtProgressBar(pb, i)
}
close(pb) # Close the connection

pvalue[is.na(S)] <- NA
pvalue <- t(pvalue)
if (!padj.method == "none") {
pvalue <- p.adjust(pvalue, method = padj.method)
}
S <- t(S)

# define a dataframe
dat <- data.frame(
x = factor(rep(colnames(S), each = nrow(S))),
y = rep(rownames(S), ncol(S)),
ks = as.vector(S),
pvalue = as.vector(pvalue)
)
dat$y <-
factor(dat$y, levels = levels(factor(dat$y))[c(length(unique(dat$y)):1)])
ks.min <- min(dat$ks, na.rm = TRUE)
dat$ks[dat$ks < 0 & !is.na(dat$ks)] <- ks.min - 0.001
dat$ks <- dat$ks - (ks.min - 0.001)
dat$ks[which.max(dat$ks)] <- round(sort(dat$ks, decreasing = TRUE)[2] + 0.5)
dat$border <- rep("red", nrow(dat))
dat$border[dat$pvalue >= pvalue.cutoff] <- "gray"

# globalVariables(c("y", "border", "ks"))
y <- border <- ks <- NULL

if (normalize) {
ES.name <- "Normalized\nEnrichment\nScore"
} else {
ES.name <- "Enrichment\nScore"
}
if (any(dat$pvalue < pvalue.cutoff)) {
g <- ggplot(data = dat) +
geom_point(aes(x = x, y = y, color = border, fill = pvalue, size = ks),
shape = 21) +
scale_fill_gradientn(
name = "p-value", na.value = "black", colours = c("blue", "white"),
limits = c(0, 1), guide = guide_colorbar(barheight = 3, barwidth = 1)) +
geom_point(aes(x = x, y = y, size = ks), color = dat$border, shape = 21) +
guides(colour = guide_legend(override.aes = list(size = 5))) +
scale_size(name = ES.name, range = c(1, 5), breaks = c(0, 1, 2, 3),
guide = guide_legend(keyheight = .8)) +
theme(axis.text.x = element_text(size = 8, angle = angle,
vjust = 1, hjust = 1)) +
xlab("") + ylab("")

if (pvalue.cutoff == 0.05) {
g <- g + scale_color_manual(name = NULL, values = c("p<0.05" = "red"))
} else {
if (pvalue.cutoff == 0.1) {
g <- g + scale_color_manual(name = NULL, values = c("p<0.1" = "red"))
} else {
g <- g + scale_color_manual(name = NULL, values = c("gray", "red"),
labels = paste0("p", c(">=", "<"),
pvalue.cutoff))
}
}
} else {
g <- ggplot(data = dat) +
geom_point(aes(x = x, y = y, fill = pvalue, size = ks), shape = 21) +
scale_fill_gradientn(
name = "p-value", na.value = "black", colours = c("blue", "white"),
limits = c(round(min(dat$pvalue), 2), 1),
guide = guide_colorbar(barheight = 3, barwidth = 1)) +
scale_size(name = ES.name, range = c(1, 5), breaks = c(0, 1, 2, 3),
guide = guide_legend(keyheight = .8)) +
theme(axis.text.x =
element_text(size = 8, angle = 45, vjust = 1, hjust = 1)) +
xlab("") + ylab("")
}
print(g)

return(list(S = S, pvalue = pvalue, g = g))
}
Loading

0 comments on commit a42d8a8

Please sign in to comment.