-
Notifications
You must be signed in to change notification settings - Fork 3
/
TM_Process_droplet.R
54 lines (41 loc) · 1.91 KB
/
TM_Process_droplet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# R3.3
ann <- read.table("droplet_annotation.csv.gz", sep=",", header=TRUE)
ann[,1] <- paste(ann[,1], "-1", sep="")
meta <- read.table("droplet_metadata.csv.gz", sep=",", header=TRUE)
tissues <- levels(meta$tissue)
for ( t in tissues ) {
files <- Sys.glob(paste("droplet/",t,"*/", sep=""))
sparsemats <- list()
cellmats <- list()
for( f in files ) {
require("scater")
require("SingleCellExperiment")
require("Matrix")
cellbarcodes <- read.table(paste(f,"barcodes.tsv", sep=""))
genenames <- read.table(paste(f,"genes.tsv", sep=""))
molecules <- Matrix::readMM(paste(f,"matrix.mtx", sep=""))
tmp <- strsplit(f, "[-/]")[[1]]
rownames(molecules) <- genenames[,1]
colnames(molecules) <- paste(tmp[3], cellbarcodes[,1], sep="_")
mouse <- meta[meta$channel == tmp[3],2]
tissue <- meta[meta$channel == tmp[3],3]
subtissue <- meta[meta$channel == tmp[3],4]
sex <- meta[meta$channel == tmp[3],5]
celltype <- ann[,3]
ann <- ann[!is.na(celltype) & !celltype == "unknown",] # add 29 May 2018
molecules <- molecules[,colnames(molecules) %in% ann[,1]] # add 29 May 2018
ann_subset <- ann[match(colnames(molecules), ann[,1]),]
celltype <- ann_subset[,3]
cell_anns <- data.frame(mouse = rep(mouse, times=ncol(molecules)), cell_type1=celltype, tissue=rep(tissue, times=ncol(molecules)), subtissue = rep(subtissue, times=ncol(molecules)), sex=rep(sex, times=ncol(molecules)))
rownames(cell_anns) <- colnames(molecules);
sparsemats[[f]] <- molecules
cellmats[[f]] <- cell_anns
}
all_molecules <- do.call("cbind", sparsemats)
names(cellmats) <- rep(NULL, times=length(cellmats))
all_anns <- do.call("rbind", cellmats)
if (!identical(rownames(all_anns), colnames(all_molecules))) {print("Something is wrong")}
gene_data <- data.frame(feature_symbol=rownames(all_molecules));
sceset <- SingleCellExperiment(assays = list(counts = all_molecules), colData=all_anns, rowData=gene_data)
saveRDS(sceset, file=paste(t,"10X.rds", sep="_"))
}