updated reference and cleaned code

ocbe-uio · Jan 29, 2023 · a42d8a8 · a42d8a8
1 parent c6f1b3a
commit a42d8a8
Show file tree

Hide file tree

Showing 41 changed files with 1,194 additions and 545 deletions.
diff --git a/.Rproj.user/66308376/sources/prop/60ACF1F2 b/.Rproj.user/66308376/sources/prop/60ACF1F2
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "4,3",
+    "scrollLine": "0"
+}
diff --git a/.Rproj.user/66308376/sources/prop/7423F278 b/.Rproj.user/66308376/sources/prop/7423F278
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "17,22",
+    "scrollLine": "4"
+}
diff --git a/.Rproj.user/66308376/sources/prop/A2037DCB b/.Rproj.user/66308376/sources/prop/A2037DCB
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "2,25",
+    "scrollLine": "0"
+}
diff --git a/.Rproj.user/66308376/sources/prop/C86F79FC b/.Rproj.user/66308376/sources/prop/C86F79FC
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "14,3",
+    "scrollLine": "0"
+}
diff --git a/.Rproj.user/66308376/sources/prop/DA976360 b/.Rproj.user/66308376/sources/prop/DA976360
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "16,3",
+    "scrollLine": "5"
+}
diff --git a/.Rproj.user/66308376/sources/prop/E3921AFF b/.Rproj.user/66308376/sources/prop/E3921AFF
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "6,61",
+    "scrollLine": "0"
+}
diff --git a/.Rproj.user/66308376/sources/prop/INDEX b/.Rproj.user/66308376/sources/prop/INDEX
@@ -0,0 +1,6 @@
+~%2Fzz%2FEnrichIntersect%2FNEWS.md="A2037DCB"
+~%2Fzz%2FEnrichIntersect%2FR%2Fcancers_drug_groups.R="E3921AFF"
+~%2Fzz%2FEnrichIntersect%2FR%2Fcancers_genes_drugs.R="C86F79FC"
+~%2Fzz%2FEnrichIntersect%2FR%2Fenrichment.R="60ACF1F2"
+~%2Fzz%2FEnrichIntersect%2FR%2FintersectSankey.R="DA976360"
+~%2Fzz%2FEnrichIntersect%2Ftest.R="7423F278"
diff --git a/.Rproj.user/66308376/sources/session-1fd69c21/178A6B36 b/.Rproj.user/66308376/sources/session-1fd69c21/178A6B36
@@ -0,0 +1,26 @@
+{
+    "id": "178A6B36",
+    "path": "~/zz/EnrichIntersect/test.R",
+    "project_path": "test.R",
+    "type": "r_source",
+    "hash": "0",
+    "contents": "",
+    "dirty": false,
+    "created": 1675023587877.0,
+    "source_on_save": false,
+    "relative_order": 2,
+    "properties": {
+        "source_window_id": "",
+        "Source": "Source",
+        "cursorPosition": "17,22",
+        "scrollLine": "4"
+    },
+    "folds": "",
+    "lastKnownWriteTime": 1675026552,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1675026552083,
+    "read_only": false,
+    "read_only_alternatives": []
+}
diff --git a/.Rproj.user/66308376/sources/session-1fd69c21/178A6B36-contents b/.Rproj.user/66308376/sources/session-1fd69c21/178A6B36-contents
@@ -0,0 +1,24 @@
+library(styler)
+style_pkg(pkg = "/Users/zhiz/zz/EnrichIntersect/R")
+library(lintr)
+lint_dir(path = "/Users/zhiz/zz/EnrichIntersect/R")
+
+rm(list = ls())
+remove.packages("EnrichIntersect")
+# Build the package
+#devtools::document("/Users/zhiz/zz/EnrichIntersect")
+#devtools::build("/Users/zhiz/zz/EnrichIntersect",vignettes=TRUE)
+#devtools::check("/Users/zhiz/zz/EnrichIntersect", cran=TRUE)
+
+#"Use terminal to build up package, otherwise problematic to show vignette"
+#zhiz$ R CMD build /Users/zhiz/zz/EnrichIntersect
+#zhiz$ R CMD INSTALL EnrichIntersect_0.5.tar.gz
+#zhiz$ R CMD check EnrichIntersect_0.5.tar.gz --as-cran --run-donttest
+
+## Install the package
+install.packages("/Users/zhiz/EnrichIntersect_0.5.tar.gz",repos = NULL,type = "source",build_vignettes = T)
+
+
+pdf("drug_gene.pdf",width = 4,height = 3.5)
+enrich <- enrichment(x, custom.set)
+dev.off()
diff --git a/.Rproj.user/66308376/sources/session-1fd69c21/3211F4B8 b/.Rproj.user/66308376/sources/session-1fd69c21/3211F4B8
@@ -0,0 +1,26 @@
+{
+    "id": "3211F4B8",
+    "path": "~/zz/EnrichIntersect/R/enrichment.R",
+    "project_path": "R/enrichment.R",
+    "type": "r_source",
+    "hash": "1703716571",
+    "contents": "",
+    "dirty": false,
+    "created": 1675023787625.0,
+    "source_on_save": false,
+    "relative_order": 5,
+    "properties": {
+        "source_window_id": "",
+        "Source": "Source",
+        "cursorPosition": "4,3",
+        "scrollLine": "0"
+    },
+    "folds": "",
+    "lastKnownWriteTime": 1675024549,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1675024549359,
+    "read_only": false,
+    "read_only_alternatives": []
+}
diff --git a/.Rproj.user/66308376/sources/session-1fd69c21/3211F4B8-contents b/.Rproj.user/66308376/sources/session-1fd69c21/3211F4B8-contents
@@ -0,0 +1,261 @@
+#' @title Plot enrichment map
+#' @description
+#' Plot enrichment map through a vector (matrix) of scores and a self-defined 
+#' set that summarizes a few groups of the names (rownames) of the vector 
+#' (matrix)
+#'
+#' @name enrichment
+#'
+#' @import ggplot2
+#' @importFrom utils txtProgressBar setTxtProgressBar
+#' @importFrom stats p.adjust
+#'
+#' @param x a vector (matrix) of scores to be enriched
+#' @param custom.set a self-defined set that summarizes a few groups of the 
+#' names (rownames) of \code{x}
+#' @param alpha exponent weight of the score of ordered features. Default is 
+#' \code{0} for calculating enrichment score via classic Kolmogorov-Smirnov 
+#' statistic
+#' @param normalize logic value to determine if normalizing enrichment scores, 
+#' accounting for custom set size. Default is \code{TRUE}
+#' @param permute.n number of custom-set permutations for significance 
+#' testing. Default is 100
+#' @param padj.method correction method, one of \code{"holm"}, 
+#' \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"}, \code{"BH"}, 
+#' \code{"BY"}, \code{"fdr"}, \code{"none"}. Default is \code{"none"}
+#' @param pvalue.cutoff a cutoff for both unadjusted and adjusted p-value to 
+#' mark significantly enriched classes. Default is 0.05
+#' @param angle angle of rotating x-axis labels. Default is 45
+#' @param ... other arguments
+#'
+#' @return Return a list including a matrix of (normalized) enrichment score, 
+#' a matrix of corresponding p-value and ggplot object:
+#' \itemize{
+#' \item S - a matrix of calculated enrichment scores.
+#' \item pvalue - a matrix of p-values using permuation test for the calculated enrichment scores.
+#' \item g - a ggplot object for visualising the results of an enrichment analysis.
+#' }
+#'
+#' @references Reimand J, Isserlin R, Voisin V, et al (2019). \emph{Pathway 
+#' enrichment analysis and visualization of omics data using g:profiler, gsea, 
+#' cytoscape and enrichmentmap}. Nature protocols, 14:482–517.
+#'
+#' @examples
+#' # Data set 'cancers_drug_groups' is a list including a score dataframe with 147 drugs as rows
+#' # and 19 cancer types as columns, and a dataframe with 9 self-defined drug groups (1st column)
+#' # of the 147 drugs (2nd column).
+#' data(cancers_drug_groups, package = "EnrichIntersect")
+#'
+#' x <- cancers_drug_groups$score
+#' custom.set <- cancers_drug_groups$custom.set
+#' set.seed(123)
+#' enrich <- enrichment(x, custom.set, permute.n = 5)
+#'
+#' @export
+enrichment <- function(x, custom.set, alpha = 0, normalize = TRUE, 
+                       permute.n = 100, padj.method = "none", 
+                       pvalue.cutoff = 0.05, angle = 45, ...) {
+  if (is.matrix(x) || is.data.frame(x)) {
+    if (any(colSums(is.na(x)) == ncol(x)) && ncol(x) > 1) {
+      stop("The argument 'x' matrix has some columns with all missing values!")
+    }
+  } else {
+    x <- as.matrix(x)
+  }
+  if (is.null(colnames(x))) {
+    colnames(x) <- "X"
+  }
+
+  if (is.matrix(custom.set) || is.data.frame(custom.set)) {
+    if (dim(custom.set)[2] != 2) {
+      stop("The argument 'custom.set' has to have two columns!")
+    }
+  } else {
+    stop("The argument 'custom.set' has to be a matrix or dataframe!")
+  }
+
+  if (length(unique(custom.set[[1]])) < length(unique(custom.set[[1]]))) {
+    stop("The argument 'custom.set' should have more sets/groups (2nd component 
+         of 'custom.set') than unique symbols (1st component of 'custom.set')!")
+  }
+
+  features <- intersect(rownames(x), custom.set[[1]])
+  groups <- unique(custom.set$group[custom.set[[1]] %in% features])
+  n_groups <- length(groups)
+
+  # initialize some enrichment scores and pvalues
+  S <- matrix(nrow = ncol(x), ncol = n_groups)
+  rownames(S) <- colnames(x)
+  colnames(S) <- groups
+  pvalue <- S_norm <- S
+
+  # Initializes the progress bar
+  if (ncol(x) > 1) {
+    pb <- 
+      txtProgressBar(min = 0, max = ncol(x), style = 3, width = 50, char = "=")
+  } else {    
+    pb <- 
+      txtProgressBar(min = 0, max = n_groups, style = 3, width = 50, char = "=")
+  }
+
+  for (i in seq_len(ncol(x))) {
+    ## define costom sets 'myList'
+    cutOff <- -Inf # this parameter remains for unordered features for the future
+    myList <- x[, i]
+    names(myList) <- features
+    myList <- sort(myList, decreasing = TRUE)
+    myList <- myList[myList > cutOff]
+    n <- length(myList)
+
+    for (k in 1:n_groups) { # group index
+
+      idx <- rep(NA, n)
+      for (j in 1:n) {
+        idx[j] <- names(myList[j]) %in% 
+          custom.set[custom.set$group == groups[k], 1]
+      }
+      if (sum(idx)) {
+        F1 <- cumsum(abs(myList)^alpha * as.numeric(idx)) / 
+          sum(abs(myList)^alpha * as.numeric(idx))
+        F2 <- cumsum(as.numeric(!idx)) / 
+          (n - sum(custom.set$group == groups[k] & 
+                     features %in% names(myList)))
+
+        S[i, k] <- max(F1 - F2, na.rm = TRUE) 
+      } else {
+        S[i, k] <- NA
+      }
+
+      # permutation test
+      permute_S <- rep(NA, permute.n)
+      for (permute_i in 1:permute.n) {
+        permutationIdx <- sample(1:n, n) # n removes missing spearman's x
+        myList_permute <- myList
+        names(myList_permute) <- names(myList)[permutationIdx]
+        for (j in 1:n) {
+          idx[j] <- names(myList_permute[j]) %in% 
+            custom.set[custom.set$group == groups[k], 1]
+        }
+        if (sum(idx)) {
+          F1 <- cumsum(abs(myList_permute)^alpha * as.numeric(idx)) / 
+            sum(abs(myList_permute)^alpha * as.numeric(idx))
+          F2 <- cumsum(as.numeric(!idx)) / 
+            (n - sum(custom.set$group == groups[k] & 
+                       features %in% names(myList)))
+
+          permute_S[permute_i] <- max(F1 - F2, na.rm = TRUE)
+        }
+      }
+      ## i) p-value for enrichment score
+      pvalue[i, k] <- sum(permute_S >= S[i, k], na.rm = TRUE) / 
+        (permute.n - sum(is.na(permute_S)))
+
+
+      ## ii) normalized enrichment score
+      if (normalize) {
+        S_norm <- S
+        if (!is.na(S[i, k])) {
+          if (S[i, k] >= 0) {
+            S_norm[i, k] <- 
+              S[i, k] / mean(permute_S[permute_S >= 0], na.rm = TRUE)
+          } else {
+            S_norm[i, k] <- 
+              S[i, k] / mean(permute_S[permute_S < 0], na.rm = TRUE)
+          }
+        }
+        S <- S_norm
+        permute_S_norm <- rep(NA, permute.n)
+        for (permute_i in 1:permute.n) {
+          if (!is.na(permute_S[permute_i])) {
+            if (permute_S[permute_i] >= 0) {
+              permute_S_norm[permute_i] <- permute_S[permute_i] / 
+                mean(permute_S[permute_S >= 0], na.rm = TRUE)
+            } else {
+              permute_S_norm[permute_i] <- permute_S[permute_i] / 
+                mean(permute_S[permute_S < 0], na.rm = TRUE)
+            }
+          }
+        }
+        pvalue[i, k] <- sum(permute_S_norm >= S_norm[i, k], na.rm = TRUE) / 
+          (permute.n - sum(is.na(permute_S_norm)))
+      }
+    }
+    # Sets the progress bar to the current state
+    setTxtProgressBar(pb, i)
+  }
+  close(pb) # Close the connection
+
+  pvalue[is.na(S)] <- NA
+  pvalue <- t(pvalue)
+  if (!padj.method == "none") {
+    pvalue <- p.adjust(pvalue, method = padj.method)
+  }
+  S <- t(S)
+
+  # define a dataframe
+  dat <- data.frame(
+    x = factor(rep(colnames(S), each = nrow(S))),
+    y = rep(rownames(S), ncol(S)),
+    ks = as.vector(S),
+    pvalue = as.vector(pvalue)
+  )
+  dat$y <- 
+    factor(dat$y, levels = levels(factor(dat$y))[c(length(unique(dat$y)):1)])
+  ks.min <- min(dat$ks, na.rm = TRUE)
+  dat$ks[dat$ks < 0 & !is.na(dat$ks)] <- ks.min - 0.001
+  dat$ks <- dat$ks - (ks.min - 0.001)
+  dat$ks[which.max(dat$ks)] <- round(sort(dat$ks, decreasing = TRUE)[2] + 0.5)
+  dat$border <- rep("red", nrow(dat))
+  dat$border[dat$pvalue >= pvalue.cutoff] <- "gray"
+
+  # globalVariables(c("y", "border", "ks"))
+  y <- border <- ks <- NULL
+
+  if (normalize) {
+    ES.name <- "Normalized\nEnrichment\nScore"
+  } else {
+    ES.name <- "Enrichment\nScore"
+  }
+  if (any(dat$pvalue < pvalue.cutoff)) {
+    g <- ggplot(data = dat) +
+      geom_point(aes(x = x, y = y, color = border, fill = pvalue, size = ks), 
+                 shape = 21) +
+      scale_fill_gradientn(
+        name = "p-value", na.value = "black", colours = c("blue", "white"),
+        limits = c(0, 1), guide = guide_colorbar(barheight = 3, barwidth = 1)) +
+      geom_point(aes(x = x, y = y, size = ks), color = dat$border, shape = 21) +
+      guides(colour = guide_legend(override.aes = list(size = 5))) +
+      scale_size(name = ES.name, range = c(1, 5), breaks = c(0, 1, 2, 3), 
+                 guide = guide_legend(keyheight = .8)) +
+      theme(axis.text.x = element_text(size = 8, angle = angle, 
+                                       vjust = 1, hjust = 1)) +
+      xlab("") + ylab("")
+
+    if (pvalue.cutoff == 0.05) {
+      g <- g + scale_color_manual(name = NULL, values = c("p<0.05" = "red"))
+    } else {
+      if (pvalue.cutoff == 0.1) {
+        g <- g + scale_color_manual(name = NULL, values = c("p<0.1" = "red"))
+      } else {
+        g <- g + scale_color_manual(name = NULL, values = c("gray", "red"), 
+                                    labels = paste0("p", c(">=", "<"), 
+                                                    pvalue.cutoff))
+      }
+    }
+  } else {
+    g <- ggplot(data = dat) +
+      geom_point(aes(x = x, y = y, fill = pvalue, size = ks), shape = 21) +
+      scale_fill_gradientn(
+        name = "p-value", na.value = "black", colours = c("blue", "white"),
+        limits = c(round(min(dat$pvalue), 2), 1), 
+        guide = guide_colorbar(barheight = 3, barwidth = 1)) +
+      scale_size(name = ES.name, range = c(1, 5), breaks = c(0, 1, 2, 3), 
+                 guide = guide_legend(keyheight = .8)) +
+      theme(axis.text.x = 
+              element_text(size = 8, angle = 45, vjust = 1, hjust = 1)) +
+      xlab("") + ylab("")
+  }
+  print(g)
+
+  return(list(S = S, pvalue = pvalue, g = g))
+}