-
Notifications
You must be signed in to change notification settings - Fork 1
/
ncboann.py
92 lines (70 loc) · 2.62 KB
/
ncboann.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import urllib2
import requests
import httplib2
import json
import os
from pprint import pprint
import ann_utils as utils
import codecs
REST_URL = "http://data.bioontology.org"
# Honghan's api key
API_KEY = "5db4a03d-144f-4903-9933-aaf326dd7786"
ontologies = 'BTO,DRON,NDDF'
# create the url for http get
def construct_url(text, ontologies):
u = REST_URL + "/annotator?text=" + urllib2.quote(text)
op = ''
for o in ontologies:
op += o + ","
if len(op) > 0:
op = op[:len(op)-1]
u = u + "&ontologies=" + op
return u
# create the data object (dictionary) for http post
def construct_postobj(text, ontos):
return {'text': text, 'ontologies': ontos, 'apikey': API_KEY}
# post to get annotation
def post_json(postobj):
r = requests.post(REST_URL + "/annotator", data=postobj)
response = r.content
return json.loads(response)
# httpget to get annotation
def get_json(url):
opener = urllib2.build_opener()
opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)]
return json.loads(opener.open(url).read())
# annotate a text
def annotate(text, ontos = None):
ann_ontos = ontos
if ontos is None:
ann_ontos = ontologies
return post_json(construct_postobj(text, ann_ontos))
# test
def test():
text_to_annotate = "12 While oral corticosteroids are the cornerstone of management of acute, moderate or severe asthma,6 several reports have recently shaken the belief that they are equally effective for all patients with asthma, showing that children with viral-induced wheezing21 and smoking adults22 are corticosteroid-resistant."
# Annotate using the provided text
annotations = annotate(text_to_annotate)
print(json.dumps(annotations))
# annotations = get_json(construct_url(text_to_annotate, ontologies))
def match_concepts(text, concepts):
t = text.lower()
ret = []
for c in concepts:
p = t.find(c)
if p >= 0:
ret.append([c, p])
return ret
def file_match_concepts(ann_file, concepts):
anns = utils.load_json_data(ann_file)
for ann in anns:
ret = match_concepts(ann['text'], concepts)
if len(ret) > 0:
print ret, ann['sid']
def load_brain_regions(f):
concepts = []
with codecs.open(f, encoding='utf-8') as rf:
concepts = [r.split('\t')[1].replace('\n', '') for r in rf.readlines()]
return concepts
if __name__ == "__main__":
concepts = load_brain_regions('./resources/brain-regions-without-fma.tsv')
file_match_concepts('./anns_v2/Chechko et al., (2014) - Neural correlates of unsuccessful memory performance in MCI_annotated_ann.json', concepts)