first working version!!!!

DavisWeaver · Jan 19, 2021 · 76a6d18 · 76a6d18
1 parent 17f4ff2
commit 76a6d18
Show file tree

Hide file tree

Showing 5 changed files with 89 additions and 23 deletions.
diff --git a/R/compute_crosstalk.R b/R/compute_crosstalk.R
@@ -9,10 +9,47 @@
 #'
 #' @export
 
-compute_crosstalk <- function(seed_proteins, ppi = "stringdb", n = 10000,
+compute_crosstalk <- function(seed_proteins, ppi = "stringdb", n = 1000,
                               gamma=0.6, eps = 1e-10, tmax = 1000,
                               norm = TRUE, set_seed,
                               cache, seed_name = NULL,
-                              ncores = 1)  {
+                              ncores = 1, significance_level = 0.95,
+                              p_adjust = "bonferroni")  {
+  if(ppi == "biogrid") {
+    g <- prep_biogrid(cache = cache)
+  } else if (ppi == "stringdb") {
+    g <- prep_stringdb(cache = cache)
+  } else {
+    stop("ppi must be either 'biogrid' or 'stringdb'")
+  }
+
+  w <- igraph::as_adjacency_matrix(g) #sparse adjacency matrix.
+
+  #Compute p given seed proteins
+  p_seed <- sparseRWR(w = w, seed_proteins = seed_proteins, gamma = gamma,
+                      eps = eps, tmax = tmax, norm = norm)
+  p_vec <- p_seed[[1]]
+  p_df <- tibble::as_tibble(p_vec, rownames = "gene_id")
+  colnames(p_df)[colnames(p_df) == "value"] <- "p_test"
+
+  #compute null distribution
+  null_dist <- bootstrap_null(seed_proteins = seed_proteins, g = g, n = n,
+                              gamma = gamma, eps = eps, tmax = tmax,
+                              norm = norm, set_seed = set_seed, cache = cache,
+                              seed_name = seed_name, ncores = ncores)
+  null_df <- null_dist[[1]]
+
+  df <- dplyr::left_join(null_df, p_df)
+
+  #compute the Z-score and p-value
+  df <- dplyr::mutate(df,
+                      Z = (p_test - mean_p)/ stdev_p,
+                      p_value = 2*pnorm(-abs(Z)),
+                      adj_p_value = p.adjust(p_value, method = p_adjust))
+  df <- dplyr::filter(df, adj_p_value < 1-significance_level)
+
+  return(df)
+
 
 }
+
diff --git a/R/create_null.R b/R/create_null.R
@@ -19,7 +19,7 @@
 #'
 #' @export
 
-bootstrap_null <- function(seed_proteins, ppi = "stringdb", n = 10000,
+bootstrap_null <- function(seed_proteins, g, n = 1000,
                            gamma=0.6, eps = 1e-10, tmax = 1000,
                            norm = TRUE, set_seed,
                            cache, seed_name = NULL,
@@ -29,15 +29,6 @@ bootstrap_null <- function(seed_proteins, ppi = "stringdb", n = 10000,
   if(file.exists(paste0(cache, "/", seed_name, "null_dist.Rda"))) {
     load(file = paste0(cache, "/", seed_name, "null_dist.Rda"))
   } else{
-
-    if(ppi == "biogrid") {
-      g <- prep_biogrid(cache = cache)
-    } else if (ppi == "stringdb") {
-      g <- prep_stringdb(cache = cache)
-    } else {
-      stop("ppi must be either 'biogrid' or 'stringdb'")
-    }
-
     w <- igraph::as_adjacency_matrix(g) #sparse adjacency matrix.
 
     #generate list of degree-similar seed protein vectors.
@@ -62,7 +53,8 @@ bootstrap_null <- function(seed_proteins, ppi = "stringdb", n = 10000,
       null_dist <- dplyr::bind_rows(null_dist)
     }
 
-    null_dist <- dist_calc(null_dist, ncores = ncores)
+    null_dist <- dist_calc(null_dist, ncores = ncores,
+                           seed_proteins = seed_proteins)
 
     out <- list(null_dist, seed_proteins)
 
@@ -129,7 +121,7 @@ match_seeds <- function(g, seed_proteins, n, set_seed = NULL) {
 #' @param df : numeric vector
 #' @return a 3-column dataframe (gene, )
 
-dist_calc <- function(df, ncores) {
+dist_calc <- function(df, ncores, seed_proteins) {
   if(ncores > 1){
     null_dist <- tidyr::pivot_longer(df, cols = -run_number, names_to = "gene_id", values_to = "p")
   } else {
@@ -139,8 +131,7 @@ dist_calc <- function(df, ncores) {
     dplyr::group_by(gene_id) %>%
     dplyr::summarise(mean_p = mean(p),
                      stdev_p = sd(p),
-                     nobs = dplyr::n())
-
+                     nobs = dplyr::n()) %>%
+    dplyr::mutate(seed = ifelse(gene_id %in% seed_proteins, "yes", "no"))
   return(null_dist)
-
 }
diff --git a/R/ppi_ingest.R b/R/ppi_ingest.R
@@ -94,6 +94,9 @@ prep_biogrid <- function(cache = NULL) {
 #' @inheritParams prep_stringdb
 #'
 #' @return directory on users computer containing the different adjacency matrices for future use.
+#'
+#' @export
+#'
 setup_init <- function(cache = NULL) {
 
   #Functons are written to return a tibble - this use will ensure a df is not printed

diff --git a/man/bootstrap_null.Rd b/man/bootstrap_null.Rd
diff --git a/man/prep_stringdb.Rd b/man/prep_stringdb.Rd