-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsemeval.py
75 lines (59 loc) · 2.44 KB
/
semeval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
This module implements the necessary functions to load and evaluate
on the SemEval-2012 Task 2 dataset (https://sites.google.com/site/semeval2012task2/)
Danushka Bollegala
23/12/2014
"""
import string
import subprocess
import os
class SemEval:
def __init__(self, pkg_dir):
#self.data_path = os.path.join(pkg_dir, "../benchmarks/semeval")
self.data_path = pkg_dir
self.load_dataset()
pass
def load_dataset(self):
"""
Load the word-pairs for each relation.
"""
# Load sub-categories and paradigms.
self.data = []
subcat_file = open("%s/../benchmarks/semeval/subcategories-paradigms.txt" % self.data_path)
for line in subcat_file:
relation = {}
p = [x.strip() for x in line.strip().split(',')]
relation["filename"] = "%s%s" % (p[0], p[1])
relation["category"] = p[2]
relation["sub-category"] = p[3]
relation["paradigms"] = [tuple(x.split(':')) for x in p[4:]]
self.data.append(relation)
subcat_file.close()
# load word pairs for each relation.
for Q in self.data:
wpair_file = open("%s/../benchmarks/semeval/Phase1Answers/Phase1Answers-%s.txt" % (self.data_path, Q["filename"]))
Q["wpairs"] = [tuple([x.strip() for x in line.strip().replace('"', '').split(':')]) for line in wpair_file]
wpair_file.close()
pass
def get_accuracy(self, fname, file_id):
"""
Evaluate the result.
"""
acc = None
#print("sh %s/semeval.sh %s %s %s/../benchmarks/semeval > /dev/null" % (self.data_path, fname, file_id, self.data_path))
#subprocess.call("sh %s/semeval.sh %s %s %s > /dev/null" % (self.pkg_dir, fname, file_id, self.data_path), shell=True)
subprocess.call("sh %s/semeval.sh %s %s %s > /dev/null" % (self.data_path, fname, file_id, self.data_path), shell=True)
F = open("%s/../work/semeval-tmp/MaxDiffFinal-%s.txt" % (self.data_path, file_id))
for line in F:
if line.startswith("Overall Accuracy:"):
acc = float(line.strip().split(':')[1].split('%')[0])
F.close()
if acc is None:
raise("Could not read accuracy from file = %s" % fname, ValueError)
return acc
def process():
S = SemEval("../benchmarks/semeval")
print(S.data[0]["wpairs"])
pass
if __name__ == "__main__":
process()