maciejkula · maciejkula · Oct 14, 2017 · Sep 7, 2017 · Sep 7, 2017 · Sep 7, 2017
diff --git a/.DS_Store b/.DS_Store
diff --git a/spotlight/evaluation.py b/spotlight/evaluation.py
@@ -102,6 +102,83 @@ def sequence_mrr_score(model, test, exclude_preceding=False):
     return np.array(mrrs)
 
 
+def _get_precision_recall(predictions, targets, k):
+
+    predictions = predictions[:k]
+    num_hit = len(set(predictions).intersection(set(targets)))
+
+    return float(num_hit) / len(predictions), float(num_hit) / len(targets)
+
+
+def precision_recall_score(model, test, train=None, k=10):
+    """
+    Compute Precision@k and Recall@k scores. One score
+    is given for every user with interactions in the test
+    set, representing the Precision@k and Recall@k of all their
+    test items.
+
+    Parameters
+    ----------
+
+    model: fitted instance of a recommender model
+        The model to evaluate.
+    test: :class:`spotlight.interactions.Interactions`
+        Test interactions.
+    train: :class:`spotlight.interactions.Interactions`, optional
+        Train interactions. If supplied, scores of known
+        interactions will not affect the computed metrics.
+    k: int or array of int,
+        The maximum number of predicted items
+    Returns
+    -------
+
+    (Precision@k, Recall@k): numpy array of shape (num_users, len(k))
+        A tuple of Precisions@k and Recalls@k for each user in test.
+        If k is a scalar, will return a tuple of vectors. If k is an
+        array, will return a tuple of arrays, where each row corresponds
+        to a user and each column corresponds to a value of k.
+    """
+
+    test = test.tocsr()
+
+    if train is not None:
+        train = train.tocsr()
+
+    if np.isscalar(k):
+        k = np.array([k])
+
+    precision = []
+    recall = []
+
+    for user_id, row in enumerate(test):
+
+        if not len(row.indices):
+            continue
+
+        predictions = -model.predict(user_id)
+
+        if train is not None:
+            rated = train[user_id].indices
+            predictions[rated] = FLOAT_MAX
+
+        predictions = predictions.argsort()
+
+        targets = row.indices
+
+        user_precision, user_recall = zip(*[
+            _get_precision_recall(predictions, targets, x)
+            for x in k
+        ])
+
+        precision.append(user_precision)
+        recall.append(user_recall)
+
+    precision = np.array(precision).squeeze()
+    recall = np.array(recall).squeeze()
+
+    return precision, recall
+
+
 def rmse_score(model, test):
     """
     Compute RMSE score for test interactions.

diff --git a/tests/factorization/test_implicit.py b/tests/factorization/test_implicit.py
@@ -11,7 +11,6 @@
 from spotlight.factorization.representations import BilinearNet
 from spotlight.layers import BloomEmbedding
 
-
 RANDOM_STATE = np.random.RandomState(42)
 CUDA = bool(os.environ.get('SPOTLIGHT_CUDA', False))
 

diff --git a/tests/test_evaluation_metrics.py b/tests/test_evaluation_metrics.py
@@ -0,0 +1,56 @@
+import os
+
+import numpy as np
+
+import pytest
+
+from spotlight.evaluation import precision_recall_score
+from spotlight.cross_validation import random_train_test_split
+from spotlight.datasets import movielens
+from spotlight.factorization.implicit import ImplicitFactorizationModel
+
+RANDOM_STATE = np.random.RandomState(42)
+CUDA = bool(os.environ.get('SPOTLIGHT_CUDA', False))
+
+
+@pytest.fixture(scope='module')
+def data():
+
+    interactions = movielens.get_movielens_dataset('100K')
+
+    train, test = random_train_test_split(interactions,
+                                          random_state=RANDOM_STATE)
+
+    model = ImplicitFactorizationModel(loss='bpr',
+                                       n_iter=1,
+                                       batch_size=1024,
+                                       learning_rate=1e-2,
+                                       l2=1e-6,
+                                       random_state=RANDOM_STATE,
+                                       use_cuda=CUDA)
+    model.fit(train)
+
+    return train, test, model
+
+
+@pytest.mark.parametrize('k', [
+    1,
+    [1, 1],
+    [1, 1, 1]
+])
+def test_precision_recall(data, k):
+
+    (train, test, model) = data
+
+    interactions = movielens.get_movielens_dataset('100K')
+    train, test = random_train_test_split(interactions,
+                                          random_state=RANDOM_STATE)
+
+    precision, recall = precision_recall_score(model, test, train, k=k)
+
+    assert precision.shape == recall.shape
+
+    if not isinstance(k, list):
+        assert len(precision.shape) == 1
+    else:
+        assert precision.shape[1] == len(k)