-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretrieval_efficiency.py
70 lines (62 loc) · 1.73 KB
/
retrieval_efficiency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
## Evaluation file for handling
given_file = "qrels_test.txt"
my_result = "rs11.txt"
relevant_docs = {}
my_docs = {}
qnums = set()
with open(given_file, 'r') as gf:
lines = gf.readlines()
for line in lines:
#print(line)
words = line.split()
qnums.add(int(words[0]))
#print(words)
if(int(words[3]) > 0):
doc_name = words[2]
qnum = int(words[0])
if qnum in relevant_docs.keys():
relevant_docs[qnum].add(doc_name)
else:
relevant_docs[qnum] = set()
relevant_docs[qnum].add(doc_name)
with open(my_result, 'r') as mr:
lines = mr.readlines()
for line in lines:
words = line.split()
doc_name = words[2]
qnum = int(words[0])
if qnum in my_docs.keys():
my_docs[qnum].add(doc_name)
else:
my_docs[qnum] = set()
my_docs[qnum].add(doc_name)
precisions = []
recalls = []
f1s = []
numToQnum = sorted(qnums)
#print(numToQnum)
for i in range(len(qnums)):
#for i in range(2):
a = len(relevant_docs[numToQnum[i]])/100
precisions.append(a)
# true and positive =
trueAndPositive = len(relevant_docs[numToQnum[i]] & my_docs[numToQnum[i]])
positive = len(relevant_docs[numToQnum[i]])
b = trueAndPositive/positive
recalls.append(b)
c = (2*a*b)/(a+b)
f1s.append(c)
# print(a)
# print(trueAndPositive)
# print(positive)
print("F1 score for doc = " + str(numToQnum[i]) + " is " + str(c) )
f1init = 0.0
precisionInit = 0.0
reInit = 0.0
for i in range(len(f1s)):
f1init += f1s[i]
precisionInit += precisions[i]
reInit += recalls[i]
print("Average F1 score = " + str(f1init/100))
print("Average Precision = " + str(precisionInit/100))
print("Average Recall = " + str(reInit/100))