-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmyRead10X.R
133 lines (133 loc) · 5.54 KB
/
myRead10X.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
myRead10X <- function (data.dir, gene.column = 2, cell.column = 1, unique.features = TRUE,
strip.suffix = FALSE,prefixed='') #优化版本的Read10X函数,可以用于处理下载到的带前缀的10X文件。如sample2barcodes.tsv.gz这样的文件名
{
full.data <- list()
has_dt <- requireNamespace("data.table", quietly = TRUE) &&
requireNamespace("R.utils", quietly = TRUE) && requireNamespace('Matrix', quietly = TRUE)
for (i in seq_along(along.with = data.dir)) {
run <- data.dir[i]
if (!dir.exists(paths = run)) {
stop("Directory provided does not exist")
}
barcode.loc <- file.path(run, paste0(prefixed,"barcodes.tsv"))
gene.loc <- file.path(run, paste0(prefixed,"genes.tsv"))
features.loc <- file.path(run, paste0(prefixed,"features.tsv.gz"))
matrix.loc <- file.path(run, paste0(prefixed,"matrix.mtx"))
pre_ver_3 <- file.exists(gene.loc)
if (!pre_ver_3) {
addgz <- function(s) {
return(paste0(s, ".gz"))
}
barcode.loc <- addgz(s = barcode.loc)
matrix.loc <- addgz(s = matrix.loc)
gene.loc <- addgz(s = gene.loc)
pre_ver_3 <- file.exists(gene.loc)#在此添加了对genes.tsv.gz的处理
}
if (!file.exists(barcode.loc)) {
stop("Barcode file missing. Expecting ", basename(path = barcode.loc))
}
if (!pre_ver_3 && !file.exists(features.loc)) {
stop("Gene name or features file missing. Expecting ",
basename(path = features.loc))
}
if (!file.exists(matrix.loc)) {
stop("Expression matrix file missing. Expecting ",
basename(path = matrix.loc))
}
data <- readMM(file = matrix.loc)
if (has_dt) {
cell.barcodes <- as.data.frame(data.table::fread(barcode.loc,
header = FALSE))
}
else {
cell.barcodes <- read.table(file = barcode.loc, header = FALSE,
sep = "\t", row.names = NULL)
}
if (ncol(x = cell.barcodes) > 1) {
cell.names <- cell.barcodes[, cell.column]
}
else {
cell.names <- readLines(con = barcode.loc)
}
if (all(grepl(pattern = "\\-1$", x = cell.names)) & strip.suffix) {
cell.names <- as.vector(x = as.character(x = sapply(X = cell.names,
FUN = ExtractField, field = 1, delim = "-")))
}
if (is.null(x = names(x = data.dir))) {
if (length(x = data.dir) < 2) {
colnames(x = data) <- cell.names
}
else {
colnames(x = data) <- paste0(i, "_", cell.names)
}
}
else {
colnames(x = data) <- paste0(names(x = data.dir)[i],
"_", cell.names)
}
if (has_dt) {
feature.names <- as.data.frame(data.table::fread(ifelse(test = pre_ver_3,
yes = gene.loc, no = features.loc), header = FALSE))
}
else {
feature.names <- read.delim(file = ifelse(test = pre_ver_3,
yes = gene.loc, no = features.loc), header = FALSE,
stringsAsFactors = FALSE)
}
if (any(is.na(x = feature.names[, gene.column]))) {
warning("Some features names are NA. Replacing NA names with ID from the opposite column requested",
call. = FALSE, immediate. = TRUE)
na.features <- which(x = is.na(x = feature.names[,
gene.column]))
replacement.column <- ifelse(test = gene.column ==
2, yes = 1, no = 2)
feature.names[na.features, gene.column] <- feature.names[na.features,
replacement.column]
}
if (unique.features) {
fcols = ncol(x = feature.names)
if (fcols < gene.column) {
stop(paste0("gene.column was set to ", gene.column,
" but feature.tsv.gz (or genes.tsv) only has ",
fcols, " columns.", " Try setting the gene.column argument to a value <= to ",
fcols, "."))
}
rownames(x = data) <- make.unique(names = feature.names[,
gene.column])
}
if (ncol(x = feature.names) > 2) {
data_types <- factor(x = feature.names$V3)
lvls <- levels(x = data_types)
if (length(x = lvls) > 1 && length(x = full.data) ==
0) {
message("10X data contains more than one type and is being returned as a list containing matrices of each type.")
}
expr_name <- "Gene Expression"
if (expr_name %in% lvls) {
lvls <- c(expr_name, lvls[-which(x = lvls ==
expr_name)])
}
data <- lapply(X = lvls, FUN = function(l) {
return(data[data_types == l, , drop = FALSE])
})
names(x = data) <- lvls
}
else {
data <- list(data)
}
full.data[[length(x = full.data) + 1]] <- data
}
list_of_data <- list()
for (j in 1:length(x = full.data[[1]])) {
list_of_data[[j]] <- do.call(cbind, lapply(X = full.data,
FUN = `[[`, j))
list_of_data[[j]] <- as.sparse(x = list_of_data[[j]])
}
names(x = list_of_data) <- names(x = full.data[[1]])
if (length(x = list_of_data) == 1) {
return(list_of_data[[1]])
}
else {
return(list_of_data)
}
}