-
Notifications
You must be signed in to change notification settings - Fork 0
/
Brassicaceae.R
110 lines (92 loc) · 4.6 KB
/
Brassicaceae.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# This script is an example of how to use the GeneDiscoveR package to identify genes associated with a phenotype in the Brassicaceae family.
# For generate the input data for this example, you can use the following commands:
# Install OrthoFinder
# conda install -c bioconda orthofinder
# One execution of OrthoFinder with default parameters
# orthofinder -f OrthoFinder/ExampleData
# In the ExampleData directory, you locate the annotated coding sequences of your species for analysis.
# Import and install the GeneDiscoveR package
invisible(lapply(c("usethis", "devtools"), library, character.only = TRUE))
devtools::install_github("AtilioRausch/GeneDiscoveR")
library(GeneDiscoveR)
N0Dir <- system.file("extdata", "Brassicaceae", package = "GeneDiscoveR")
dataTSV <- system.file("extdata", "Brassicaceae", "table_traits_selfcomp.tsv", package = "GeneDiscoveR")
# Create a GeneDiscoveR object with Brassicaceae data from one execution of OrthoFinder
# In this case, because we are unique execution, we use the uniqueInflation parameter and GeneDiscoveR set automatically the active run.
GeneDiscoveRobject <- GeneDiscoveR(
N0sDir = N0Dir,
dataFile = dataTSV,
uniqueInflation = 1.5,
orthologsTool = "OrthoFinder"
)
# Select species by phenotype. You can perform this step with different phenotypes.
GeneDiscoveRobject <- select_species_by_phenotype(
GeneDiscoveRobject = GeneDiscoveRobject,
columnPhenotype = "Self-compatible",
columnID = "OrthofinderID",
type = "0"
)
GeneDiscoveRobject <- select_species_by_phenotype(
GeneDiscoveRobject = GeneDiscoveRobject,
columnPhenotype = "Self-compatible",
columnID = "OrthofinderID",
type = "1"
)
# Identify genes by phenotype. You can perform this step with different phenotypes.
GeneDiscoveRobject <- gene_identification_by_phenotype(
GeneDiscoveRobject = GeneDiscoveRobject,
formula = as.formula("1 ~ 0"),
statistic = "Fisher",
name = "Self-incompatible",
cores = 8
)
# Select genes by phenotype with p-value <= 0.05 and odds ratio >= 1
GeneDiscoveRobject <- select_genes_by_phenotype(GeneDiscoveRobject,
pvalue = 0.05,
oddsRatio = 1,
sign = ">",
name = "Self-incompatible"
)
# Set annotation file
# Import Arabidopsis thaliana annotation file from TAIR10
annotationFile <- system.file("extdata", "Brassicaceae", "TAIR10_functional_descriptions", package = "GeneDiscoveR")
GeneDiscoveRobject <- set_annotation_file(GeneDiscoveRobject, annotationFile = annotationFile)
indexFilteredGenes <- select_filtered_gene_index(GeneDiscoveRobject, name = "Self-incompatible", pvalue = 0.05, oddsRatio = 1, sign = ">")
# Map annotation to the filtered genes. indexFilteredGenes is the index of the filtered genes, if NULL, the annotation is mapped to the complete table
GeneDiscoveRobject <- map_annotation(
GeneDiscoveRobject = GeneDiscoveRobject,
indexFilteredGenes = indexFilteredGenes,
specieWithAnnotation = "Athaliana_447_Araport11.protein_primaryTranscriptOnly",
oneColumn = FALSE
)
# Map annotation to the complete table. indexFilteredGenes is NULLTools
GeneDiscoveRobject <- map_annotation(
GeneDiscoveRobject = GeneDiscoveRobject,
indexFilteredGenes = NULL,
specieWithAnnotation = "Athaliana_447_Araport11.protein_primaryTranscriptOnly",
oneColumn = FALSE
)
# plot volcano
plot_genediscover_volcano(GeneDiscoveRobject, name = "Self-incompatible")
# Export the filtered genes table with annotation
write_csv(get_filtered_genes_table(GeneDiscoveRobject, name = "Self-compatible", pvalue = 0.05, oddsRatio = 1, sign = ">"), "/home/atilio/Descargas/Results_Jun02/filtered_genes_table-2.csv")
# Export the complete table with identification
write_csv(get_complete_table(GeneDiscoveRobject), "/home/path/to/complete_table-2.csv")
# Show the filtered genes table
head(get_filtered_genes_table(GeneDiscoveRobject, name = "Self-incompatible", pvalue = 0.05, oddsRatio = 1, sign = ">"))
# OG of the gene AT5G44220.1
GeneID <- c("AT5G44220.1")
OrthoFinderID <- c("Athaliana_447_Araport11.protein_primaryTranscriptOnly")
AT5G44220_OG <- obtain_OG_from_gene(GeneDiscoveRobject, GeneID, OrthoFinderID)
AT5G44220_OG # Identify the Orthologous Group (OG) of the gene
AT5G44220_OG <- AT5G44220_OG %>% mutate(HOG = "OG0000495")
# Plot the volcano plot with the TPS genes -----------------------------------------------------------
title <- "Fisher's Exact Test for Self-incompatibility"
plot_genediscover_detector_volcano(GeneDiscoveRobject,
annotationTable = AT5G44220_OG, title = title,
name = "Self-incompatible", type = "HOG",
categories = "OG0000495"
)
# Run the GeneDiscoveR web app
run_genediscover_web_app()
# End of the script