Skip to content

Commit

Permalink
version 0.1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
amitfrish authored and cran-robot committed Oct 21, 2018
1 parent fc84ee4 commit 60def18
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 22 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: scBio
Type: Package
Title: Single Cell Genomics for Enhancing Cell Composition Inference
from Bulk Genomics Data
Version: 0.1.1
Version: 0.1.2
Author: Amit Frishberg [aut, cre], Naama Peshes-Yaloz [aut], Irit Gat-Viks [aut]
Maintainer: Amit Frishberg <amfrishberg@gmail.com>
Description: Cellular population mapping (CPM) a deconvolution algorithm in which single-cell genomics is required in only one or a few samples, where in other samples of the same tissue, only bulk genomics is measured and the underlying fine resolution cellular heterogeneity is inferred.
Expand All @@ -15,6 +15,6 @@ Depends: R (>= 2.10)
Imports: sp, foreach, parallel, doSNOW, raster, fields, LiblineaR,
limma
NeedsCompilation: no
Packaged: 2018-10-13 14:43:51 UTC; IritGNB5
Packaged: 2018-10-21 11:38:40 UTC; IritGNB5
Repository: CRAN
Date/Publication: 2018-10-13 22:50:09 UTC
Date/Publication: 2018-10-21 15:40:02 UTC
6 changes: 3 additions & 3 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
76d96bccc280fcbce7faff18548044cf *DESCRIPTION
edc01e28a90be031a363ba3025895769 *DESCRIPTION
db145741783207b24a533c64dac574a9 *NAMESPACE
007f52b3eafdc72fd47a7b8ca773af1a *R/scBio.R
ff58b1faef6e519948dc6a1667f55812 *R/scBio.R
f7a8c36644f3864097e2e68bd2c81269 *data/BulkFlu.rda
6920ed4a966ac46acb7d1e838d904afa *data/SCCellSpace.rda
df99aadcef5a5a5176bd5f7fd1d42ba4 *data/SCFlu.rda
e0a67ea8cd52f7e565a5515a14ca3bf9 *data/SCLabels.rda
4716ee887904a59726fe613eeb82fa55 *man/BulkFlu.Rd
7aee0e4620122df2fb4a7d5da57b5366 *man/CPM.Rd
1b31edc2773407a8fcfe90abac80abe1 *man/CPM.Rd
801dd8f0b202d876468939fe4c1c2214 *man/SCCellSpace.Rd
f2cb8e053802b00f3374cc65ca96ca8e *man/SCFlu.Rd
77024789030ff90461dc1d1065d254c0 *man/SCLabels.Rd
69 changes: 58 additions & 11 deletions R/scBio.R
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ choseCellsForRuns = function(XY, refNames, modelSize, minSelection, neighborhood
########## CPM main part
#' @keywords internal
CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeigList ,numOfRuns, modelSize, neighborhoodSize,
no_cores, genePercents){
no_cores, genePercents, quantifyTypes, calculateCI){
YReduced = Y[row.names(Y) %in% row.names(refference),]

##### Revome genes low in reference data #####
Expand Down Expand Up @@ -304,7 +304,7 @@ CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeig
close(pb)

##### Combining cell predictions #####
print("Finalizing...")
print("Combining CPM iterations")
predictedCells = matrix(0, nrow = dim(YReduced)[2], ncol = dim(refferenceSmaller)[2])
predictedCellsCounts = matrix(0, nrow = dim(YReduced)[2], ncol = dim(refferenceSmaller)[2])

Expand All @@ -317,13 +317,57 @@ CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeig
predictedCellsFinal = predictedCells/predictedCellsCounts

##### Smoothing #####
print("Smoothing")
allClusterMeansMatrix = t(do.call(rbind,lapply(1:length(refferenceNames),function(cell){
rowMeans(predictedCellsFinal[,chosenCellNeigList[[cell]]])
})))
colnames(allClusterMeansMatrix) = colnames(refference)
row.names(allClusterMeansMatrix) = colnames(Y)

allClusterMeansMatrix
cellTypeRes = NULL
seRes = NULL
confMatrix = NULL

#### Cell type prediction ####
if(quantifyTypes){
print("Calculating cell type quantities")
cellTypeRes = do.call(cbind,lapply(unique(refferenceNames),function(currCluster){
rowMeans(allClusterMeansMatrix[,currCluster==refferenceNames])
}))
colnames(cellTypeRes) = unique(refferenceNames)
}

#### Standard error prediction ####
if(calculateCI){
print("Calculating the confidence interval matrix")

resultOriginalSizeMatrixes = lapply(resultSmallMatrixes, function(resultSmallMatrix){
completeResultMatrix = matrix(NA, nrow = dim(resultSmallMatrix)[2], ncol = dim(refferenceSmaller)[2])
completeResultMatrix[,match(colnames(allClusterMeansMatrix)[as.numeric(as.matrix(row.names(resultSmallMatrix)))],colnames(refferenceSmaller))] = t(resultSmallMatrix)
completeResultMatrix
})

seRes <- do.call(rbind,lapply(colnames(YReduced), function(sample){
sampleMatrix = do.call(rbind, lapply(resultOriginalSizeMatrixes,function(currRes){
currRes[which(colnames(YReduced)==sample),]
}))
apply(sampleMatrix,2,function(x){
sd(x[!is.na(x)])/sqrt(length(which(!is.na(x))))
})
}))

seResNorm = t(do.call(rbind,lapply(1:length(refferenceNames),function(cell){
rowMeans(seRes[,chosenCellNeigList[[cell]]])
})))

confMatrix = matrix(paste(allClusterMeansMatrix-1.96*seResNorm,allClusterMeansMatrix+1.96*seResNorm,sep = " <-> "),ncol = dim(allClusterMeansMatrix)[2])

colnames(seRes) = colnames(confMatrix) = colnames(refference)
row.names(seRes) = row.names(confMatrix) = colnames(Y)
}

print("Done")
list(predictions = allClusterMeansMatrix, cellTypePredictions = cellTypeRes, sePredictions = seRes, confMatrix = confMatrix)
}

########## CPM
Expand All @@ -334,15 +378,18 @@ CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeig
#' @param SCData A matrix containing the single-cell RNA-seq data. Each row corresponds to a certain gene and each column to a certain cell.
#' @param SCLabels A vector containing the labels of each of the cells.
#' @param BulkData A matrix containing heterogenous RNA-seq data for one or more samples. Each row corresponds to a certain gene and each column to a certain sample.
#' @param cellSpace The cell space corresponding to the single-cell data. It can be a vector for a 1-dim space or a matrix for a multidimensional space where each column represents a different dimension.
#' @param cellSpace The cell state space corresponding to the single-cell RNA-seq data. It can be a vector for a 1-dim space or a matrix for a multidimensional space where each column represents a different dimension.
#' @param no_cores A number for the amount of cores which will be used for the analysis. The defalt (NULL) is total number of cores minus 1.
#' @param neighborhoodSize Cell neighborhood size which will be used for the analysis. The defalt is 10.
#' @param modelSize The reference subset size. The defalt is 50.
#' @param minSelection The minimum selection times allowed for each cell. Increasing this value might have a large effect on the algorithm's running time. The defalt is 5.
#' @param genePercents Percentage of genes randomely selected in each deconvolution repeat. The defalt is 0.4.
#' @param minSelection The minimum number of times in which each reference cell is selected. Increasing this value might have a large effect on the algorithm's running time. The defalt is 5.
#' @param quantifyTypes A boolean parameter indicating whether the prediction of cell type quantities is needed. The default is FALSE.
#' @param calculateCI A boolean parameter indicating whether the calculation of confidence itervals is needed. The default is FALSE.
#' @return A list including:
#' \item{predicted}{CPM predicted cell abundance matrix. Each row represnts a sample and each column a single cell}
#' \item{numOfRuns}{The number of deconvolution repeats preformed by CPM }
#' \item{cellTypePredictions}{CPM predicted cell-type abundance matrix. Each row represnts a sample and each column a single cell-type. This is calculated if quantifyTypes = TRUE.}
#' \item{confIntervals}{A matrix containing the confidence iterval for each cell and sample. Each row represnts a sample and each column a single cell. This is calculated if calculateCI = TRUE.}
#' \item{numOfRuns}{The number of deconvolution repeats preformed by CPM. }
#' @examples
#' data(SCLabels)
#' data(SCFlu)
Expand All @@ -368,7 +415,8 @@ CPMMain = function(refference,refferenceNames, Y, chosenCellList, chosenCellNeig
#' @importFrom "utils" "setTxtProgressBar"
#' @importFrom "stats" "sd" "var"
#' @importFrom "grDevices" "chull"
CPM = function(SCData, SCLabels, BulkData, cellSpace, no_cores = NULL, neighborhoodSize = 10, modelSize = 50, minSelection = 5, genePercents = 0.4){
CPM = function(SCData, SCLabels, BulkData, cellSpace, no_cores = NULL, neighborhoodSize = 10, modelSize = 50, minSelection = 5, quantifyTypes = F, calculateCI = F){
genePercents = 0.4
if(!is.null(SCData) & !is.null(SCLabels) & !is.null(BulkData) & !is.null(cellSpace)){
print("Selecting cells for each iteration")
}
Expand All @@ -378,8 +426,8 @@ CPM = function(SCData, SCLabels, BulkData, cellSpace, no_cores = NULL, neighborh
cellSelectionList = cellSelection$chosenCellList
cellNeigSelectionList = cellSelection$chosenNeigList
print("Running CPM, this may take a few minutes")
deconvolutionRes = CPMMain(SCData, SCLabels,BulkData, cellSelectionList, cellNeigSelectionList, numOfRunsToUse,modelSize, neighborhoodSize, no_cores, genePercents)
list(predicted = deconvolutionRes, numOfRuns = numOfRunsToUse)
deconvolutionRes = CPMMain(SCData, SCLabels,BulkData, cellSelectionList, cellNeigSelectionList, numOfRunsToUse,modelSize, neighborhoodSize, no_cores, genePercents, quantifyTypes, calculateCI)
list(predicted = deconvolutionRes$predictions, cellTypePredictions = deconvolutionRes$cellTypePredictions, confIntervals = deconvolutionRes$confMatrix, numOfRuns = numOfRunsToUse)
}

#' Gene expression profiles of flu and pbs sample.
Expand Down Expand Up @@ -413,4 +461,3 @@ CPM = function(SCData, SCLabels, BulkData, cellSpace, no_cores = NULL, neighborh
#' @format A matrix with 349 rows (cells) and 2 columns (dimensions).
#' @source \url{http://www.diamondse.info/}
"SCCellSpace"

14 changes: 9 additions & 5 deletions man/CPM.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 60def18

Please sign in to comment.