-
Notifications
You must be signed in to change notification settings - Fork 0
/
mediator.py
239 lines (168 loc) · 7.16 KB
/
mediator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
from functions import *
from settings import *
import pandas as pd
import os
import json
# Compute the path to the databases and the documentation
GENE_TABLE_PATH = os.path.join(os.getcwd(), GENE_TABLE_PATH)
DISEASE_TABLE_PATH = os.path.join(os.getcwd(), DISEASE_TABLE_PATH)
DOCS_PATH = os.path.join(os.getcwd(), DOCS_PATH)
# Instantiate the classes from functions.py
geneTable = GeneTable(GENE_TABLE_PATH)
diseaseTable = DiseaseTable(DISEASE_TABLE_PATH)
test = Testing(GENE_TABLE_PATH, DISEASE_TABLE_PATH)
def getInfoGenes():
"""Return a dictionary containing information of geneTable
:return: Info about geneTable
:rtype: dict"""
gene_data = {'nrows': geneTable.get_dimensions()[0],
'ncols': geneTable.get_dimensions()[1],
'labels': geneTable.get_labels(),
'head': geneTable.get_head().values.tolist(),
'tail': geneTable.get_tail().values.tolist()}
return gene_data
def getInfoDiseases():
"""Return a dictionary containing information of diseaseTable
:return Info about diseaseTable
:rtype dict"""
disease_data = {'nrows': diseaseTable.get_dimensions()[0],
'ncols': diseaseTable.get_dimensions()[1],
'labels': diseaseTable.get_labels(),
'head': diseaseTable.get_head().values.tolist(),
'tail': diseaseTable.get_tail().values.tolist()}
return disease_data
def getInfo():
"""
Returns two dictionaries containing information on the two datasets
:return two dictionaries
:rtype tuple(dict, dict)
"""
return getInfoGenes(), getInfoDiseases()
def getDiseaseTableList(start=0, end=None, step=1):
"""
Return a list containing the rows of Disease Table from start index to end index. It works like as slicing.
:rtype: list
"""
return diseaseTable[start:end:step].values.tolist()
def getGeneTableList(start=0, end=None, step=1):
"""
Return a list containing the rows of Gene Table from start index to end index. It works like as slicing.
:rtype: list
"""
return geneTable[start:end:step].values.tolist()
def getDistinctGenes():
table = geneTable.distinct()
data = {'labels': table.columns.values.tolist(),
'rows': table.values.tolist(),
'length': table.shape[0]}
return data
def getDistinctDiseases():
table = diseaseTable.distinct()
data = {'labels': table.columns.values.tolist(),
'rows': table.values.tolist(),
'length': table.shape[0]}
return data
def getGeneEvidences(gene):
"""Receives as input a geneid or a gene_symbol and returns a dictionary with the
sentences that relates the COVID-19 with the gene.
:param gene: the geneID or gene symbol input
:type gene: str
:returns: dictionary of sentences related with COVID-19 about the gene input
:rtype: dict
"""
try:
gene = int(gene)
except ValueError:
gene = str(gene)
table = geneTable.evidence(gene)
data = {'labels': table.columns.values.tolist(),
'rows': table.values.tolist(),
'length': table.shape[0]}
return data
def getDiseaseEvidences(disease):
"""Receives as input a geneid or a gene_symbol and returns a dictionary with the
sentences that relates the COVID-19 with the gene.
:param disease: the geneID or gene symbol input
:type disease: str
:returns: dictionary of sentences related with COVID-19 about the gene input
:rtype: dict
"""
table = diseaseTable.evidence(disease)
data = {'labels': table.columns.values.tolist(),
'rows': table.values.tolist(),
'length': table.shape[0]}
return data
def getCorrelation(num_rows, min_occurrences):
"""Returns a dict with the correlations between genes and diseases sorted by the highest number of occurrences.
It allows to customize the number of correlations and the minimum occurrence.
:return: A dictionary, the key for the rows is 'rows'
:rtype: dict
"""
# get the dataframe of the correlations
corr = test.correlation_gene_disease()
# create a dictionary containing the information and the rows of the dataframe
data = {'labels': corr.columns.values.tolist(),
'rows': corr.values.tolist(),
'length': corr.shape[0],
'min_occurrences': min_occurrences}
# if min_occurrences is at its default value (0) it means that the user hasn't input any min_occurrences
if min_occurrences == 0:
# if num_rows == 0 it means the user wants to see all the correlations, thus returns all the data
if num_rows == 0:
return data
# in case "rows" is higher than the number of correlations it will throw IndexError, and it will return all data
try:
# Select only the first [num_rows] rows from the dataframe
data['rows'] = corr.iloc[:num_rows].values.tolist()
data['length'] = len(data['rows'])
return data
except IndexError:
return data
else:
# If min_occurrences is not zero the user wants only the correlations which occur more than min_occurrences.
# Select only the rows with occurrences higher than min_occurrences
corr = corr.loc[corr['occurrences'] >= min_occurrences]
# if the user wants to see at most [num_rows] of rows, but we have more correlation, then are returned only
# the num of correlations the user has selected. If num_rows == 0 it means the user has proactively chosen
# that he wants to see all correlations
if num_rows < len(corr) and num_rows != 0:
data['rows'] = corr.iloc[:num_rows].values.tolist()
data['length'] = len(data['rows'])
return data
else:
data['rows'] = corr.values.tolist()
data['length'] = len(data['rows'])
return data
def getDiseasesRelatedToGene(gene):
table = test.find_diseases_related_to_gene(gene)
data = {'labels': table.columns.values.tolist(),
'rows': table.values.tolist(),
'length': table.shape[0]}
return data
def getGenesRelatedToDisease(disease):
table = test.find_genes_related_to_disease(disease)
data = {'labels': table.columns.values.tolist(),
'rows': table.values.tolist(),
'length': table.shape[0]}
return data
def getDocumentation(path, name_file=''):
"""Reads the documentation from .json files and return a dict.
You can either input the whole path, or the folder and the name of the file.
:param path: The path to the file or the path to the folder
:type path: str
:param name_file: The name of the file. It can be either with extension or without.
It's optional if you input the path to the file in "path"
:type name_file: str
:return: The documentation
:rtype: dict
"""
if path.endswith('.json'):
docs_path = path
else:
if name_file.endswith('.json'):
docs_path = os.path.join(path, name_file)
else:
docs_path = os.path.join(path, name_file + '.json')
with open(docs_path) as f:
docs = json.load(f)
return docs