-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcreate_background.R
117 lines (116 loc) Β· 4.83 KB
/
create_background.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#' Create gene background
#'
#' Create a gene background as the union/intersect of
#' all orthologs between input species (\code{species1} and \code{species2}),
#' and the \code{output_species}.
#' This can be useful when generating random lists of background genes
#' to test against in analyses with data from multiple species
#' (e.g. enrichment of mouse cell-type markers gene sets in
#' human GWAS-derived gene sets).
#'
#' @param species1 First species.
#' @param species2 Second species.
#' @param output_species Species to convert all genes from
#' \code{species1} and \code{species2} to first.
#' \code{Default="human"}, but can be to either any species
#' supported by \pkg{orthogene}, including
#' \code{species1} or \code{species2}.
#' @param as_output_species Return background gene list as
#' \code{output_species} orthologs, instead of the
#' gene names of the original input species.
#' @param use_intersect When \code{species1} and \code{species2} are both
#' different from \code{output_species}, this argument will determine whether
#' to use the intersect (\code{TRUE}) or union (\code{FALSE}) of all genes
#' from \code{species1} and \code{species2}.
#' @param bg User supplied background list that will be returned to the
#' user after removing duplicate genes.
#' @param gene_map User-supplied \code{gene_map} data table from
#' \link[orthogene]{map_orthologs} or \link[orthogene]{map_genes}.
#' @inheritParams convert_orthologs
#'
#' @returns Background gene list.
#'
#' @export
#' @examples
#' bg <- orthogene::create_background(species1 = "mouse",
#' species2 = "rat",
#' output_species = "human")
create_background <- function(species1,
species2,
output_species = "human",
as_output_species = TRUE,
use_intersect = TRUE,
bg = NULL,
gene_map = NULL,
method = "homologene",
non121_strategy = "drop_both_species",
verbose = TRUE) {
# devoptera::args2vars(create_background)
species_list <- c(species1,species2)
gene_var <- if(as_output_species) "ortholog_gene" else "input_gene"
if(all(species_list==output_species)){
if(is.null(bg)){
#### If all species are the same, just use all_genes ####
gene_map <- all_genes(species = output_species,
method = method,
verbose = verbose)
bg <- gene_map$Gene.Symbol
messager("Returning",formatC(length(bg), big.mark = ","),
"unique genes from entire",output_species,"genome.",
v=verbose)
} else {
bg <- unique(bg)
messager("Returning",formatC(length(bg), big.mark = ","),
"unique genes from the user-supplied bg.",v=verbose)
}
return(bg)
}
if (is.null(bg)) {
messager("Generating gene background for",
paste0(species1," x ",species2," ==>"),
output_species,
v=verbose)
#### Species 1 ####
gene_map1 <- report_orthologs(
target_species = species1,
reference_species = output_species,
method_all_genes = method,
method_convert_orthologs = method,
non121_strategy = non121_strategy,
return_report = FALSE,
verbose = verbose
)
#### Species 2 ####
if(species1==species2){
gene_map2 <- gene_map1
} else {
gene_map2 <- report_orthologs(
target_species = species2,
reference_species = output_species,
method_all_genes = method,
method_convert_orthologs = method,
non121_strategy = non121_strategy,
return_report = FALSE,
verbose = verbose
)
}
#### Use intersect/union ####
if(use_intersect){
bg <- intersect(gene_map1[[gene_var]],
gene_map2[[gene_var]])
messager(formatC(length(bg), big.mark = ","),
"intersect background genes used.",v=verbose)
} else {
bg <- union(gene_map1[[gene_var]],
gene_map2[[gene_var]])
messager(formatC(length(bg), big.mark = ","),
"union background genes used.",v=verbose)
}
bg <- unique(bg)
} else {
bg <- unique(bg)
messager("Returning",formatC(length(bg), big.mark = ","),
"unique genes from the user-supplied bg.",v=verbose)
}
return(bg)
}