-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcreate_background.Rd
102 lines (93 loc) Β· 3.61 KB
/
create_background.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/create_background.R
\name{create_background}
\alias{create_background}
\title{Create gene background}
\usage{
create_background(
species1,
species2,
output_species = "human",
as_output_species = TRUE,
use_intersect = TRUE,
bg = NULL,
gene_map = NULL,
method = "homologene",
non121_strategy = "drop_both_species",
verbose = TRUE
)
}
\arguments{
\item{species1}{First species.}
\item{species2}{Second species.}
\item{output_species}{Species to convert all genes from
\code{species1} and \code{species2} to first.
\code{Default="human"}, but can be to either any species
supported by \pkg{orthogene}, including
\code{species1} or \code{species2}.}
\item{as_output_species}{Return background gene list as
\code{output_species} orthologs, instead of the
gene names of the original input species.}
\item{use_intersect}{When \code{species1} and \code{species2} are both
different from \code{output_species}, this argument will determine whether
to use the intersect (\code{TRUE}) or union (\code{FALSE}) of all genes
from \code{species1} and \code{species2}.}
\item{bg}{User supplied background list that will be returned to the
user after removing duplicate genes.}
\item{gene_map}{User-supplied \code{gene_map} data table from
\link[orthogene]{map_orthologs} or \link[orthogene]{map_genes}.}
\item{method}{R package to use for gene mapping:
\itemize{
\item{\code{"gprofiler"} : Slower but more species and genes.}
\item{\code{"homologene"} : Faster but fewer species and genes.}
\item{\code{"babelgene"} : Faster but fewer species and genes.
Also gives consensus scores for each gene mapping based on a
several different data sources.}
}}
\item{non121_strategy}{How to handle genes that don't have
1:1 mappings between \code{input_species}:\code{output_species}.
Options include:\cr
\itemize{
\item{\code{"drop_both_species" or "dbs" or 1} : \cr}{
Drop genes that have duplicate
mappings in either the \code{input_species} or \code{output_species} \cr
(\emph{DEFAULT}).}
\item{\code{"drop_input_species" or "dis" or 2} : \cr}{
Only drop genes that have duplicate
mappings in the \code{input_species}.}
\item{\code{"drop_output_species" or "dos" or 3} : \cr}{
Only drop genes that have duplicate
mappings in the \code{output_species}.}
\item{\code{"keep_both_species" or "kbs" or 4} : \cr}{
Keep all genes regardless of whether
they have duplicate mappings in either species.}
\item{\code{"keep_popular" or "kp" or 5} : \cr}{
Return only the most "popular" interspecies ortholog mappings.
This procedure tends to yield a greater number of returned genes
but at the cost of many of them not being true biological 1:1 orthologs.}
\item{\code{"sum","mean","median","min" or "max"} : \cr}{
When \code{gene_df} is a matrix and \code{gene_output="rownames"},
these options will aggregate many-to-one gene mappings
(\code{input_species}-to-\code{output_species})
after dropping any duplicate genes in the \code{output_species}.
}
}}
\item{verbose}{Print messages.}
}
\value{
Background gene list.
}
\description{
Create a gene background as the union/intersect of
all orthologs between input species (\code{species1} and \code{species2}),
and the \code{output_species}.
This can be useful when generating random lists of background genes
to test against in analyses with data from multiple species
(e.g. enrichment of mouse cell-type markers gene sets in
human GWAS-derived gene sets).
}
\examples{
bg <- orthogene::create_background(species1 = "mouse",
species2 = "rat",
output_species = "human")
}