-
Notifications
You must be signed in to change notification settings - Fork 0
/
SecondaryScoring.py
77 lines (49 loc) · 1.84 KB
/
SecondaryScoring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import math
import re
def total_scoring(data):
score_output = []
for k, v in sorted(compute_total_scores(data).items(), key=lambda x: x[1][1], reverse=True):
score_output.append([k, v[1]])
return score_output
def log10_scoring(data):
"""
Log10 of all scores and rank
"""
total_scores = compute_total_scores(data)
# compute log10 of all scores:
# computed_data = {k: math.log10(v) for k, v in total_scores.items()}
computed_data = {k: [v[0], 1 + math.log10(v[1])] for k, v in total_scores.items()}
ranked_output = []
for k, v in sorted(computed_data.items(), key=lambda x: x[1][1], reverse=True):
ranked_output.append([k, v[1]])
return ranked_output
def count_scoring(data):
count_output = []
for k, v in sorted(compute_total_scores(data).items(), key=lambda x: x[1][0], reverse=True):
count_output.append([k, v[0]])
return count_output
def highest_relevancy(data):
output_list = []
computed_result = sorted(compute_total_scores(data).items(), key=lambda x: x[1][1], reverse=True)
total_score = 0
no_of_words = len(computed_result)
for k, v in computed_result:
total_score += v[1]
average = total_score / no_of_words
for k, v in computed_result:
if v[1] > average:
output_list.append([k, v[1]])
print("Average is {0}".format(average))
return output_list
def compute_total_scores(data):
pattern_PF_word = re.compile(r'^[0-9]*_([A-Za-z0-9_]*)')
total_scores = {}
for result in data:
score = result['score']
PF_word = re.findall(pattern_PF_word, result['id'])[0]
if total_scores.__contains__(PF_word):
total_scores[PF_word][0] += 1
total_scores[PF_word][1] += score
else:
total_scores[PF_word] = [1, score]
return total_scores