NTMC-Community · caiyinqiong · Mar 3, 2020 · Mar 3, 2020 · Mar 3, 2020
@@ -1,4 +1,5 @@
 from .precision import Precision
+from .recall import Recall
 from .average_precision import AveragePrecision
 from .discounted_cumulative_gain import DiscountedCumulativeGain
 from .mean_reciprocal_rank import MeanReciprocalRank
@@ -7,6 +8,7 @@
     NormalizedDiscountedCumulativeGain
 
 from .accuracy import Accuracy
+from .f1 import F1
 from .cross_entropy import CrossEntropy
 
 

@@ -0,0 +1,52 @@
+"""F1 metric for Classification."""
+import numpy as np
+
+from matchzoo.engine.base_metric import ClassificationMetric
+
+
+class F1(ClassificationMetric):
+    """F1 metric."""
+
+    ALIAS = ['f1']
+
+    def __init__(self, threshold: float = 0.):
+        """
+        :class:`F1` constructor.
+
+        :param threshold: The threshold of relevance degree.
+        """
+        self._threshold = threshold
+
+    def __repr__(self) -> str:
+        """:return: Formated string representation of the metric."""
+        return f"{self.ALIAS}({self._threshold})"
+
+    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
+        """
+        Calculate f1.
+
+        Example:
+            >>> import numpy as np
+            >>> y_true = np.array([1, 1, 0, 0])
+            >>> y_pred = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]])
+            >>> F1()(y_true, y_pred)
+            0.5
+
+        :param y_true: The ground true label of each document.
+        :param y_pred: The predicted scores of each document.
+        :return: F1.
+        """
+        y_pred = np.argmax(y_pred, axis=1)
+
+        tp, fp, fn = 0., 0., 0.
+        for label, score in zip(y_true, y_pred):
+            if label > self._threshold and score > self._threshold:
+                tp += 1.
+            elif label <= self._threshold and score > self._threshold:
+                fp += 1.
+            elif label > self._threshold and score <= self._threshold:
+                fn += 1.
+        recall = tp / (tp + fn)
+        precision = tp / (tp + fp)
+        f1 = 2 * recall * precision / (recall + precision)
+        return f1
@@ -11,17 +11,21 @@ class MeanAveragePrecision(RankingMetric):
 
     ALIAS = ['mean_average_precision', 'map']
 
-    def __init__(self, threshold: float = 0.):
+    def __init__(self, k: int = 1, threshold: float = 0.):
         """
         :class:`MeanAveragePrecision` constructor.
 
+        :param k: Number of results to consider.
         :param threshold: The threshold of relevance degree.
         """
+        if k <= 0:
+            raise ValueError(f"k must be greater than 0. {k} received.")
+        self._k = k
         self._threshold = threshold
 
     def __repr__(self):
         """:return: Formated string representation of the metric."""
-        return f"{self.ALIAS[0]}({self._threshold})"
+        return f"{self.ALIAS[0]}@{self._k}({self._threshold})"
 
     def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         """
@@ -30,20 +34,21 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         Example:
             >>> y_true = [0, 1, 0, 0]
             >>> y_pred = [0.1, 0.6, 0.2, 0.3]
-            >>> MeanAveragePrecision()(y_true, y_pred)
+            >>> MeanAveragePrecision(k=4)(y_true, y_pred)
             1.0
 
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
-        :return: Mean average precision.
+        :return: Mean average precision @ k.
         """
         result = 0.
         pos = 0
         coupled_pair = sort_and_couple(y_true, y_pred)
         for idx, (label, score) in enumerate(coupled_pair):
             if label > self._threshold:
                 pos += 1.
-                result += pos / (idx + 1.)
+                if idx < self._k:
+                    result += pos / (idx + 1.)
         if pos == 0:
             return 0.
         else:

@@ -11,17 +11,21 @@ class MeanReciprocalRank(RankingMetric):
 
     ALIAS = ['mean_reciprocal_rank', 'mrr']
 
-    def __init__(self, threshold: float = 0.):
+    def __init__(self, k: int = 1, threshold: float = 0.):
         """
         :class:`MeanReciprocalRankMetric`.
 
+        :param k: Number of results to consider.
         :param threshold: The label threshold of relevance degree.
         """
+        if k <= 0:
+            raise ValueError(f"k must be greater than 0. {k} received.")
+        self._k = k
         self._threshold = threshold
 
     def __repr__(self) -> str:
         """:return: Formated string representation of the metric."""
-        return f'{self.ALIAS[0]}({self._threshold})'
+        return f"{self.ALIAS[0]}@{self._k}({self._threshold})"
 
     def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         """
@@ -31,15 +35,17 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
             >>> import numpy as np
             >>> y_pred = np.asarray([0.2, 0.3, 0.7, 1.0])
             >>> y_true = np.asarray([1, 0, 0, 0])
-            >>> MeanReciprocalRank()(y_true, y_pred)
+            >>> MeanReciprocalRank(k=4)(y_true, y_pred)
             0.25
 
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
-        :return: Mean reciprocal rank.
+        :return: Mean reciprocal rank @ k.
         """
         coupled_pair = sort_and_couple(y_true, y_pred)
         for idx, (label, pred) in enumerate(coupled_pair):
+            if idx >= self._k:
+                break
             if label > self._threshold:
                 return 1. / (idx + 1)
         return 0.
@@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.):
         :param k: Number of results to consider.
         :param threshold: the label threshold of relevance degree.
         """
+        if k <= 0:
+            raise ValueError(f"k must be greater than 0. {k} received.")
         self._k = k
         self._threshold = threshold
 
@@ -43,12 +45,8 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
 
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
-        :return: Precision @ k
-        :raises: ValueError: len(r) must be >= k.
+        :return: Precision @ k.
         """
-        if self._k <= 0:
-            raise ValueError(f"k must be greater than 0."
-                             f"{self._k} received.")
         coupled_pair = sort_and_couple(y_true, y_pred)
         precision = 0.0
         for idx, (label, score) in enumerate(coupled_pair):

@@ -0,0 +1,61 @@
+"""Recall for ranking."""
+import numpy as np
+
+from matchzoo.engine.base_metric import (
+    BaseMetric, sort_and_couple, RankingMetric
+)
+
+
+class Recall(RankingMetric):
+    """Recall metric."""
+
+    ALIAS = 'recall'
+
+    def __init__(self, k: int = 1, threshold: float = 0.):
+        """
+        :class:`RecallMetric` constructor.
+
+        :param k: Number of results to consider.
+        :param threshold: the label threshold of relevance degree.
+        """
+        if k <= 0:
+            raise ValueError(f"k must be greater than 0. {k} received.")
+        self._k = k
+        self._threshold = threshold
+
+    def __repr__(self) -> str:
+        """:return: Formated string representation of the metric."""
+        return f"{self.ALIAS}@{self._k}({self._threshold})"
+
+    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
+        """
+        Calculate recall@k.
+
+        Example:
+            >>> y_true = [0, 0, 0, 1]
+            >>> y_pred = [0.2, 0.4, 0.3, 0.1]
+            >>> Recall(k=1)(y_true, y_pred)
+            0.0
+            >>> Recall(k=2)(y_true, y_pred)
+            0.0
+            >>> Recall(k=4)(y_true, y_pred)
+            1.0
+            >>> Recall(k=5)(y_true, y_pred)
+            1.0
+
+        :param y_true: The ground true label of each document.
+        :param y_pred: The predicted scores of each document.
+        :return: Recall @ k.
+        """
+        result = 0.
+        pos = 0
+        coupled_pair = sort_and_couple(y_true, y_pred)
+        for idx, (label, score) in enumerate(coupled_pair):
+            if label > self._threshold:
+                pos += 1.
+                if idx < self._k:
+                    result += 1.
+        if pos == 0:
+            return 0.
+        else:
+            return result / pos
@@ -11,10 +11,10 @@ def test_sort_and_couple():
     assert (c == np.array([(1, 0.4), (2, 0.2), (0, 0.1)])).all()
 
 
-def test_mean_reciprocal_rank():
+def test_mean_reciprocal_rank_at_k():
     label = [0, 1, 2]
     score = [0.1, 0.4, 0.2]
-    assert metrics.MeanReciprocalRank()(label, score) == 1
+    assert metrics.MeanReciprocalRank(k=1)(label, score) == 1
 
 
 def test_precision_at_k():
@@ -25,16 +25,24 @@ def test_precision_at_k():
     assert round(metrics.Precision(k=3)(label, score), 2) == 0.67
 
 
+def test_recall_at_k():
+    label = [0, 1, 2]
+    score = [0.1, 0.4, 0.2]
+    assert metrics.Recall(k=1)(label, score) == 0.5
+    assert metrics.Recall(k=3)(label, score) == 1.
+
+
 def test_average_precision():
     label = [0, 1, 2]
     score = [0.1, 0.4, 0.2]
     assert round(metrics.AveragePrecision()(label, score), 2) == 0.89
 
 
-def test_mean_average_precision():
+def test_mean_average_precision_at_k():
     label = [0, 1, 2]
     score = [0.1, 0.4, 0.2]
-    assert metrics.MeanAveragePrecision()(label, score) == 1.
+    assert metrics.MeanAveragePrecision(k=1)(label, score) == 0.5
+    assert metrics.MeanAveragePrecision(k=3)(label, score) == 1.
 
 
 def test_dcg_at_k():
@@ -61,6 +69,12 @@ def test_accuracy():
     assert metrics.Accuracy()(label, score) == 1
 
 
+def test_f1():
+    label = np.array([1, 1, 0, 0])
+    score = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]])
+    assert metrics.F1()(label, score) == 0.5
+
+
 def test_cross_entropy():
     label = [0, 1]
     score = [[0.25, 0.25], [0.01, 0.90]]