-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_cleaning.py
86 lines (69 loc) · 2.07 KB
/
data_cleaning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from datetime import datetime, date
now = date.today()
#for enty in entities
dicc = {
"entities": [
{
"text": "me duele la cabeza",
"category": "dolor-cabeza",
"offset": 0,
"length": 18,
"confidenceScore": 1
},
{
"text": "me duele la panza",
"category": "dolor-panza",
"offset": 21,
"length": 17,
"confidenceScore": 1
}
]
}
# for i in entities:
# d= entities["entities"][0]["text"]
# f= entities["entities"][1]["text"]
# #print(d)
# #print(f)
# sin_1 = entities["entities"][0]["text"]
# cat_1 = entities["entities"][0]["category"]
# sin_2 = entities["entities"][1]["text"]
# cat_2 = entities["entities"][1]["category"]
def clean_dict(dic):
cleaned = {}
# for enti in dic["entities"]:
# cleaned["text"] = enti["entities"][0]["text"]
# cleaned["category"]= enti["entities"][0]["category"]
# print(cleaned)
# return cleaned
for count, values in enumerate(dic['entities']):
# print(count)
# print(values)
#print('sintoma:',count, values['text'])
cleaned["symptoms"] = values['text']
cleaned["category"] = values['category']
# print(cleaned)
# cleaned["text"] = dic["entities"][0]["text"]
# cleaned["category"]= dic["entities"][0]["category"]
# cleaned["text"] = dic["entities"][1]["text"]
# cleaned["category"]= dic["entities"][1]["category"]
# my_dict = {'sintoma': [],
# 'categoria': [],
# 'fecha': []
# }
# def add_to_dic():
# my_dict['sintoma'].append(sin_1)
# my_dict['sintoma'].append(sin_2)
# my_dict['categoria'].append(cat_1)
# my_dict['categoria'].append(cat_2)
# my_dict['fecha'].append(now)
# my_dict['fecha'].append(now)
# add_to_dic()
# new_date = datetime(2019, 2, 28,)
# print(new_date)
# full_dates = '1/1/17'
# objDate = datetime.strptime(full_dates, '%m/%d/%y')
# print(objDate)
# for full_date in full_dates:
#print(my_dict)
if __name__ == "__main__":
clean_dict(dicc)