-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluation_evanil.py
141 lines (94 loc) · 4.2 KB
/
evaluation_evanil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import argparse
import os
import json
from src.REEL.utils import stringMatcher
from src.NILINKER.predict_nilinker import load_model
from src.utils.kbs import KnowledgeBase
from tqdm import tqdm
def calculate_metrics(tp,fp,fn):
precision = tp / (tp+fp)
recall = tp / (tp+fn)
f1_score = 2 * (precision * recall) / (precision + recall)
return precision, recall, f1_score
def filter_top_pred(predictions, true_kb_id):
"""If the top prediction is associated with the true_kb_id, the top
prediction is replaced by the first prediction that is not associated with
the true_kb_id"""
prediction_found = False
top_prediction = None
for pred in predictions:
if pred[0] != true_kb_id:
prediction_found = True
top_prediction = pred[0]
if prediction_found:
break
return top_prediction
def import_input(filepath):
"""Import mentions from json file into dict"""
gold_standard = {}
with open(filepath, 'r') as in_file:
gold_standard = json.load(in_file)
in_file.close()
return gold_standard
if __name__ == '__main__':
# Evaluate BioSyn, NILINKER or StringMatcher models on the EvaNIL dataset
parser = argparse.ArgumentParser()
parser.add_argument("-partition", type=str, required=True)
parser.add_argument("-model", type=str, required=True)
parser.add_argument("--refined", type=bool, default=False)
args = parser.parse_args()
if args.model == 'nilinker' or args.model == 'stringMatcher':
# Load selected model
loaded_model = None
if args.model == 'nilinker':
nilinker = load_model(args.partition, top_k=10)
elif args.model == 'stringMatcher':
obo_list = ['hp', 'chebi', 'medic', 'go_bp']
tsv_list = ['ctd_chem', 'ctd_anat']
kb_data = KnowledgeBase(args.partition)
if args.partition in obo_list:
kb_data.load_obo(kb_data.kb)
elif args.partition in tsv_list:
kb_data.load_tsv(kb_data.kb)
named_to_id = kb_data.name_to_id
# Import mentions of the test set
test_filepath = ''
if args.refined:
test_filepath = 'data/evanil/{}/test_refined.json'.format(args.partition)
else:
test_filepath = 'data/evanil/{}/test.json'.format(args.partition)
gold_standard = import_input(test_filepath)
# Iterate over each mention and apply model
tp = 0
fp = 0
fn = 0
pbar = tqdm(total=len(gold_standard.keys()))
for doc in gold_standard:
mentions = gold_standard[doc]
for mention in mentions.keys():
ancestor_id = mentions[mention][1]
true_kb_id = mentions[mention][0]
if args.model == 'nilinker':
predictions = nilinker.prediction(mention)
top_pred_id = filter_top_pred(predictions, true_kb_id)
elif args.model == 'stringMatcher':
predictions = stringMatcher(mention, named_to_id, 10)
top_pred_id = filter_top_pred(predictions, true_kb_id)
if top_pred_id == ancestor_id:
tp += 1
elif top_pred_id != ancestor_id:
if top_pred_id == None or top_pred_id == '':
fn += 1
else:
fp += 1
pbar.update(1)
pbar.close()
# Output metrics
print("tp:", tp, "\nfp:", fp, "\nfn:", fn)
precision, recall, f1_score = calculate_metrics(tp,fp,fn)
print("Results\nPrecision: {}\nRecall: {}\nF1-score: {}".format(
precision, recall, f1_score))
elif args.model == 'biosyn':
comm = 'python inference.py -dataset evanil --partition -subset {} -model_name_or_path tmp/biosyn-biobert-{} --dictionary_path datasets/evanil/{}/test_dictionary.txt --use_cuda --show_predictions'.\
format(args.partition, args.subset, args.partition, args.partition)
os.system(comm)