update multi_cor to handle block & correlations correctly

jdreyf · Sep 11, 2024 · 0ed7d52 · 0ed7d52
1 parent ea0355f
commit 0ed7d52
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 16 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: ezlimma
 Type: Package
 Title: Streamlines and extends limma package
-Version: 0.2.6.9002
+Version: 0.2.6.9003
 Authors@R: c(person("Jonathan", "Dreyfuss", role = c("aut", "cre"), email = "jdreyf@bu.edu"),
              person("Hui", "Pan", role = "aut"),
 			 person("Grace", "Daher", role = "ctb"))

diff --git a/R/multi_cor.R b/R/multi_cor.R
@@ -8,10 +8,12 @@
 #' @param method Character string indicating which association is to be used 
 #'   for the test. One of \code{"pearson"}, \code{"spearman"}, \code{"kendall"}, 
 #'   from \code{\link[stats]{cor.test}} or \code{"limma"} for \code{\link{limma_cor}}.
-#' @param check.names Logical; should \code{rownames(pheno.tab)=colnames(object)} be checked?
-#' @param limma.cols If \code{method="limma"}, \code{cols} from \code{\link{limma_cor}} to include.
+#' @param correlation Numeric vector of inter-duplicate or inter-technical replicate correlations. Must be given if 
+#' \code{!is.null(block)}. Its length should be the same as the number of columns of \code{pheno.tab}.
 #' @param covariates If \code{method="limma"}, numeric vector or matrix of covariates to include in 
 #' \code{\link{limma_cor}} \code{design} matrix.
+#' @param check.names Logical; should \code{rownames(pheno.tab)=colnames(object)} be checked?
+#' @param limma.cols If \code{method="limma"}, \code{cols} from \code{\link{limma_cor}} to include.
 #' @inheritParams limma_contrasts
 #' @inheritParams limma_cor
 #' @return Data frame with several statistical columns corresponding to each phenotype and one row per feature.
@@ -25,7 +27,8 @@ multi_cor <- function(object, pheno.tab, method=c("pearson", "spearman", "kendal
                      limma.cols=c("AveExpr", "P.Value", "adj.P.Val", "logFC")){
   method <- match.arg(method)
   if (is.null(dim(pheno.tab))) stop("pheno.tab needs to have rows and columns.")
-  stopifnot(ncol(object)==nrow(pheno.tab), is.null(covariates) || limma::isNumeric(covariates), colMeans(is.na(pheno.tab)) < 1)
+  stopifnot(ncol(object)==nrow(pheno.tab), is.null(covariates) || limma::isNumeric(covariates), colMeans(is.na(pheno.tab)) < 1,
+            is.null(block) || length(correlation) == ncol(pheno.tab))
   if (check.names){
     stopifnot(rownames(pheno.tab)==colnames(object))
   }
@@ -35,22 +38,18 @@ multi_cor <- function(object, pheno.tab, method=c("pearson", "spearman", "kendal
     prefix.tmp <- ifelse(!is.null(prefix), paste(prefix, colnames(pheno.tab)[ind], sep="."), colnames(pheno.tab)[ind])
     if (method=="limma"){
       # na.omit() missing phenotypes in pheno.tab
-      ph.idx <- 1:nrow(pheno.tab)
-      if (any(is.na(pheno.tab[,ind]))){
-        ph.idx <- which(!is.na(pheno.tab[,ind]))
-        if (!is.null(block)) block <- block[ph.idx]
-      }
+      ph.idx <- which(!is.na(pheno.tab[,ind]))
 
-      # block is on samples, so should be modified in case of NAs, but NULL[ph.idx] is NULL
+      # block is on samples, so should be modified in case of NAs, & NULL[ph.idx] is still NULL
       if (is.null(covariates)){
-        cor.tmp <- data.matrix(limma_cor(object[, ph.idx], pheno.tab[ph.idx, ind], reorder.rows=FALSE, prefix=prefix.tmp, block = block,
-                                         correlation = correlation, cols=limma.cols))
+        cor.tmp <- data.matrix(limma_cor(object[, ph.idx], pheno.tab[ph.idx, ind], reorder.rows=FALSE, prefix=prefix.tmp, block = block[ph.idx],
+                                         correlation = correlation[ind], cols=limma.cols))
       } else {
         # model.matrix.lm, but not model.matrix, respects na.action
         # https://stackoverflow.com/questions/5616210/model-matrix-with-na-action-null
         des.tmp <- stats::model.matrix.lm(~1+pheno.tab[, ind]+covariates, na.action = stats::na.omit)
-        cor.tmp <- data.matrix(limma_cor(object[, ph.idx], design=des.tmp, reorder.rows=FALSE, prefix=prefix.tmp, block = block,
-                                         correlation = correlation, cols=limma.cols))
+        cor.tmp <- data.matrix(limma_cor(object[, ph.idx], design=des.tmp, reorder.rows=FALSE, prefix=prefix.tmp, block = block[ph.idx],
+                                         correlation = correlation[ind], cols=limma.cols))
       }
     } else {
       if (!is.null(covariates) || !is.null(block) || !is.null(correlation)){

diff --git a/man/multi_cor.Rd b/man/multi_cor.Rd
diff --git a/tests/testthat/test-multi_cor.R b/tests/testthat/test-multi_cor.R
@@ -22,6 +22,17 @@ test_that("matches limma", {
   res.mc3 <- multi_cor(M, cbind(a=pheno2, b=pheno.v), method="limma", reorder.rows = FALSE)
   expect_equal(res.mc3[rownames(res.mc2), "b.p"], res.mc2$b.p)
   expect_equal(res.mc3[rownames(res.lc2), "a.p"], res.lc2$p)
+
+  # with block
+  block.tmp <- c("a", letters[1:5])
+  res.lc3 <- limma_cor(M, phenotype = pheno.v, block = block.tmp, correlation = -0.5, reorder.rows = FALSE)
+  res.mc4 <- multi_cor(M, cbind(a=pheno2, b=pheno2, c=pheno.v, d=pheno.v), method="limma", block = block.tmp, 
+                       correlation = c(0, 0.9, -0.5, 0.5), reorder.rows = FALSE)
+  expect_equal(res.lc3$p, res.mc4$c.p)
+  expect_equal(res.lc3$slope, res.mc4$c.slope)
+  # block should matter for pheno.v but not for pheno2, since it only one sample per block
+  expect_equal(res.mc4$a.p, res.mc4$b.p)
+  expect_true(res.mc4$c.p[1] != res.mc4$d.p[1])
 })
 
 test_that("rows get reordered", {