forked from jw995/AIML-recommanding-system-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Ranker.py
71 lines (54 loc) · 2.55 KB
/
Ranker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Ranker
import logging
import numpy as np
# rank the items from each recommendation module
# highly influenced by business strategy and varies from system to system
from DatabaseInterface import DatabaseInterface
class Ranker(object):
logging.basicConfig(level=logging.INFO)
def __init__(self, numberToServe, database):
self.numberToServe = numberToServe
self.userHistoryDB = database.extract(DatabaseInterface.HISTORY_KEY) # who rated what
self.log = logging.getLogger(__name__)
def _getUsedItems(self, userId):
# return a python set of all the movies that have been seen
if userId == -1 :
return set([])
else:
return set(self.userHistoryDB[self.userHistoryDB.loc[:,"user_id"]==userId].loc[:,"item_id"])
def rerank(self,recommendationsTuple):
# recommendationTupe is a tuple of (userId, recommendations)
# recommendations is a dictionary of lists {RecType: Items}, RecType can be "online", "offline", "popular"
# return the ranked recommendation
# here is the strategy:
# if the userId is -1, it means it is from anonymous user.
# else remove the watched item and
userId = recommendationsTuple[0]
recommendations = recommendationsTuple[1]
usedItems = self._getUsedItems(userId)
self.log.info("Recommendations received in Ranker: %s" %recommendations)
self.log.info("Recommendation types received in Ranker: %s" %recommendations.keys())
results = []
if "online" in recommendations: # online exists as long as user has been active
results.extend(recommendations["online"][:self.numberToServe]) # should only has one
if "offline" in recommendations: # offline exist only if user are registered, the recs could be from CF or LR
results.extend(recommendations["offline"][:self.numberToServe])
if "popular" in recommendations: # most popular should always exist
# if there is no personalized recs, the remaining should be filled by most popular
results.extend(recommendations["popular"][:self.numberToServe])
else:
self.log.error("recommendations do not contain popular items")
try:
# remove the already visited items
results = np.random.choice(list(set(results)-usedItems), self.numberToServe, replace=False)
except ValueError:
# sometimes the user may watched a lot
# this is apparently not a good strategy, why?
results = np.random.choice(results, self.numberToServe, replace=False)
return results
if __name__=="__main__":
from DatabaseInterface import DatabaseInterface
db = DatabaseInterface("DATA")
db.startEngine()
ranker = Ranker(numberToServe=10, database=db)
print sorted(ranker._getUsedItems(1))