version 0.1.2

amitfrish · Oct 21, 2018 · 60def18 · 60def18
1 parent fc84ee4
commit 60def18
Show file tree

Hide file tree

Showing 4 changed files with 73 additions and 22 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: scBio
 Type: Package
 Title: Single Cell Genomics for Enhancing Cell Composition Inference
         from Bulk Genomics Data
-Version: 0.1.1
+Version: 0.1.2
 Author: Amit Frishberg [aut, cre], Naama Peshes-Yaloz [aut], Irit Gat-Viks [aut]
 Maintainer: Amit Frishberg <amfrishberg@gmail.com>
 Description: Cellular population mapping (CPM) a deconvolution algorithm in which single-cell genomics is required in only one or a few samples, where in other samples of the same tissue, only bulk genomics is measured and the underlying fine resolution cellular heterogeneity is inferred.
@@ -15,6 +15,6 @@ Depends: R (>= 2.10)
 Imports: sp, foreach, parallel, doSNOW, raster, fields, LiblineaR,
         limma
 NeedsCompilation: no
-Packaged: 2018-10-13 14:43:51 UTC; IritGNB5
+Packaged: 2018-10-21 11:38:40 UTC; IritGNB5
 Repository: CRAN
-Date/Publication: 2018-10-13 22:50:09 UTC
+Date/Publication: 2018-10-21 15:40:02 UTC
diff --git a/MD5 b/MD5
@@ -1,12 +1,12 @@
-76d96bccc280fcbce7faff18548044cf *DESCRIPTION
+edc01e28a90be031a363ba3025895769 *DESCRIPTION
 db145741783207b24a533c64dac574a9 *NAMESPACE
-007f52b3eafdc72fd47a7b8ca773af1a *R/scBio.R
+ff58b1faef6e519948dc6a1667f55812 *R/scBio.R
 f7a8c36644f3864097e2e68bd2c81269 *data/BulkFlu.rda
 6920ed4a966ac46acb7d1e838d904afa *data/SCCellSpace.rda
 df99aadcef5a5a5176bd5f7fd1d42ba4 *data/SCFlu.rda
 e0a67ea8cd52f7e565a5515a14ca3bf9 *data/SCLabels.rda
 4716ee887904a59726fe613eeb82fa55 *man/BulkFlu.Rd
-7aee0e4620122df2fb4a7d5da57b5366 *man/CPM.Rd
+1b31edc2773407a8fcfe90abac80abe1 *man/CPM.Rd
 801dd8f0b202d876468939fe4c1c2214 *man/SCCellSpace.Rd
 f2cb8e053802b00f3374cc65ca96ca8e *man/SCFlu.Rd
 77024789030ff90461dc1d1065d254c0 *man/SCLabels.Rd
diff --git a/R/scBio.R b/R/scBio.R
@@ -231,7 +231,7 @@ choseCellsForRuns = function(XY, refNames, modelSize, minSelection, neighborhood
 ########## CPM main part
 #' @keywords internal
 CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeigList ,numOfRuns, modelSize, neighborhoodSize,
-                     no_cores, genePercents){
+                     no_cores, genePercents, quantifyTypes, calculateCI){
   YReduced = Y[row.names(Y) %in% row.names(refference),]
 
   ##### Revome genes low in reference data  #####
@@ -304,7 +304,7 @@ CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeig
   close(pb)
 
   ##### Combining cell predictions #####
-  print("Finalizing...")
+  print("Combining CPM iterations")
   predictedCells = matrix(0, nrow = dim(YReduced)[2], ncol = dim(refferenceSmaller)[2])
   predictedCellsCounts = matrix(0, nrow = dim(YReduced)[2], ncol = dim(refferenceSmaller)[2])
 
@@ -317,13 +317,57 @@ CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeig
   predictedCellsFinal = predictedCells/predictedCellsCounts
 
   ##### Smoothing #####
+  print("Smoothing")
   allClusterMeansMatrix = t(do.call(rbind,lapply(1:length(refferenceNames),function(cell){
     rowMeans(predictedCellsFinal[,chosenCellNeigList[[cell]]])
   })))
   colnames(allClusterMeansMatrix) = colnames(refference)
   row.names(allClusterMeansMatrix) = colnames(Y)
 
-  allClusterMeansMatrix
+  cellTypeRes = NULL
+  seRes = NULL
+  confMatrix = NULL
+
+  #### Cell type prediction ####
+  if(quantifyTypes){
+    print("Calculating cell type quantities")
+    cellTypeRes = do.call(cbind,lapply(unique(refferenceNames),function(currCluster){
+      rowMeans(allClusterMeansMatrix[,currCluster==refferenceNames])
+    }))
+    colnames(cellTypeRes) = unique(refferenceNames)
+  }
+
+  #### Standard error prediction ####
+  if(calculateCI){
+    print("Calculating the confidence interval matrix")
+
+    resultOriginalSizeMatrixes = lapply(resultSmallMatrixes, function(resultSmallMatrix){
+      completeResultMatrix = matrix(NA, nrow = dim(resultSmallMatrix)[2], ncol = dim(refferenceSmaller)[2])
+      completeResultMatrix[,match(colnames(allClusterMeansMatrix)[as.numeric(as.matrix(row.names(resultSmallMatrix)))],colnames(refferenceSmaller))] = t(resultSmallMatrix)
+      completeResultMatrix
+    })
+
+    seRes <- do.call(rbind,lapply(colnames(YReduced), function(sample){
+      sampleMatrix = do.call(rbind, lapply(resultOriginalSizeMatrixes,function(currRes){
+        currRes[which(colnames(YReduced)==sample),]
+      }))
+      apply(sampleMatrix,2,function(x){
+        sd(x[!is.na(x)])/sqrt(length(which(!is.na(x))))
+      })
+    }))
+
+    seResNorm = t(do.call(rbind,lapply(1:length(refferenceNames),function(cell){
+      rowMeans(seRes[,chosenCellNeigList[[cell]]])
+    })))
+
+    confMatrix = matrix(paste(allClusterMeansMatrix-1.96*seResNorm,allClusterMeansMatrix+1.96*seResNorm,sep = " <-> "),ncol = dim(allClusterMeansMatrix)[2])
+
+    colnames(seRes) = colnames(confMatrix) = colnames(refference)
+    row.names(seRes) = row.names(confMatrix) = colnames(Y)
+  }
+
+  print("Done")
+  list(predictions = allClusterMeansMatrix, cellTypePredictions = cellTypeRes, sePredictions = seRes, confMatrix = confMatrix)
 }
 
 ########## CPM
@@ -334,15 +378,18 @@ CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeig
 #' @param SCData A matrix containing the single-cell RNA-seq data. Each row corresponds to a certain gene and each column to a certain cell.
 #' @param SCLabels A vector containing the labels of each of the cells.
 #' @param BulkData A matrix containing heterogenous RNA-seq data for one or more samples. Each row corresponds to a certain gene and each column to a certain sample.
-#' @param cellSpace The cell space corresponding to the single-cell data. It can be a vector for a 1-dim space or a matrix for a multidimensional space where each column represents a different dimension.
+#' @param cellSpace The cell state space corresponding to the single-cell RNA-seq data. It can be a vector for a 1-dim space or a matrix for a multidimensional space where each column represents a different dimension.
 #' @param no_cores A number for the amount of cores which will be used for the analysis. The defalt (NULL) is total number of cores minus 1.
 #' @param neighborhoodSize Cell neighborhood size which will be used for the analysis. The defalt is 10.
 #' @param modelSize The reference subset size. The defalt is 50.
-#' @param minSelection The minimum selection times allowed for each cell. Increasing this value might have a large effect on the algorithm's running time. The defalt is 5.
-#' @param genePercents Percentage of genes randomely selected in each deconvolution repeat. The defalt is 0.4.
+#' @param minSelection The minimum number of times in which each reference cell is selected. Increasing this value might have a large effect on the algorithm's running time. The defalt is 5.
+#' @param quantifyTypes A boolean parameter indicating whether the prediction of cell type quantities is needed. The default is FALSE.
+#' @param calculateCI A boolean parameter indicating whether the calculation of confidence itervals is needed. The default is FALSE.
 #' @return A list including:
 #' \item{predicted}{CPM predicted cell abundance matrix. Each row represnts a sample and each column a single cell}
-#' \item{numOfRuns}{The number of deconvolution repeats preformed by CPM }
+#' \item{cellTypePredictions}{CPM predicted cell-type abundance matrix. Each row represnts a sample and each column a single cell-type. This is calculated if quantifyTypes = TRUE.}
+#' \item{confIntervals}{A matrix containing the confidence iterval for each cell and sample. Each row represnts a sample and each column a single cell. This is calculated if calculateCI = TRUE.}
+#' \item{numOfRuns}{The number of deconvolution repeats preformed by CPM. }
 #' @examples
 #' data(SCLabels)
 #' data(SCFlu)
@@ -368,7 +415,8 @@ CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeig
 #' @importFrom "utils" "setTxtProgressBar"
 #' @importFrom "stats" "sd" "var"
 #' @importFrom "grDevices" "chull"
-CPM = function(SCData, SCLabels, BulkData, cellSpace, no_cores = NULL, neighborhoodSize = 10, modelSize = 50, minSelection = 5, genePercents = 0.4){
+CPM = function(SCData, SCLabels, BulkData, cellSpace, no_cores = NULL, neighborhoodSize = 10, modelSize = 50, minSelection = 5, quantifyTypes = F, calculateCI = F){
+  genePercents = 0.4
   if(!is.null(SCData) & !is.null(SCLabels) & !is.null(BulkData) & !is.null(cellSpace)){
     print("Selecting cells for each iteration")
   }
@@ -378,8 +426,8 @@ CPM = function(SCData, SCLabels, BulkData, cellSpace, no_cores = NULL, neighborh
   cellSelectionList = cellSelection$chosenCellList
   cellNeigSelectionList = cellSelection$chosenNeigList
   print("Running CPM, this may take a few minutes")
-  deconvolutionRes = CPMMain(SCData, SCLabels,BulkData, cellSelectionList, cellNeigSelectionList, numOfRunsToUse,modelSize, neighborhoodSize, no_cores, genePercents)
-  list(predicted = deconvolutionRes, numOfRuns = numOfRunsToUse)
+  deconvolutionRes = CPMMain(SCData, SCLabels,BulkData, cellSelectionList, cellNeigSelectionList, numOfRunsToUse,modelSize, neighborhoodSize, no_cores, genePercents, quantifyTypes, calculateCI)
+  list(predicted = deconvolutionRes$predictions, cellTypePredictions = deconvolutionRes$cellTypePredictions, confIntervals = deconvolutionRes$confMatrix, numOfRuns = numOfRunsToUse)
 }
 
 #' Gene expression profiles of flu and pbs sample.
@@ -413,4 +461,3 @@ CPM = function(SCData, SCLabels, BulkData, cellSpace, no_cores = NULL, neighborh
 #' @format A matrix with 349 rows (cells) and 2 columns (dimensions).
 #' @source \url{http://www.diamondse.info/}
 "SCCellSpace"
-
diff --git a/man/CPM.Rd b/man/CPM.Rd