From 7a53e9d3d480e19a53c76cb830983efabf3f9128 Mon Sep 17 00:00:00 2001
From: caiyinqiong <1198593462@qq.com>
Date: Tue, 3 Mar 2020 11:33:21 +0800
Subject: [PATCH 1/3] add_metrics

---
 matchzoo/metrics/__init__.py               |  2 +
 matchzoo/metrics/f1.py                     | 54 +++++++++++++++++++
 matchzoo/metrics/mean_average_precision.py | 17 ++++--
 matchzoo/metrics/mean_reciprocal_rank.py   | 16 ++++--
 matchzoo/metrics/precision.py              |  4 +-
 matchzoo/metrics/recall.py                 | 63 ++++++++++++++++++++++
 tests/test_metrics.py                      | 22 ++++++--
 7 files changed, 163 insertions(+), 15 deletions(-)
 create mode 100644 matchzoo/metrics/f1.py
 create mode 100644 matchzoo/metrics/recall.py

diff --git a/matchzoo/metrics/__init__.py b/matchzoo/metrics/__init__.py
index cf1a4d1..e9ec560 100644
--- a/matchzoo/metrics/__init__.py
+++ b/matchzoo/metrics/__init__.py
@@ -1,4 +1,5 @@
 from .precision import Precision
+from .recall import Recall
 from .average_precision import AveragePrecision
 from .discounted_cumulative_gain import DiscountedCumulativeGain
 from .mean_reciprocal_rank import MeanReciprocalRank
@@ -7,6 +8,7 @@
     NormalizedDiscountedCumulativeGain
 
 from .accuracy import Accuracy
+from .f1 import F1
 from .cross_entropy import CrossEntropy
 
 
diff --git a/matchzoo/metrics/f1.py b/matchzoo/metrics/f1.py
new file mode 100644
index 0000000..8c7d5ac
--- /dev/null
+++ b/matchzoo/metrics/f1.py
@@ -0,0 +1,54 @@
+"""F1 metric for Classification."""
+import numpy as np
+
+from matchzoo.engine.base_metric import ClassificationMetric
+
+
+class F1(ClassificationMetric):
+    """F1 metric."""
+
+    ALIAS = ['f1']
+
+    def __init__(self, threshold: float = 0.):
+        """
+        :class:`F1` constructor.
+
+        :param threshold: The threshold of relevance degree.
+        """
+        self._threshold = threshold
+
+    def __repr__(self) -> str:
+        """:return: Formated string representation of the metric."""
+        return f"{self.ALIAS}({self._threshold})"
+
+    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
+        """
+        Calculate f1.
+
+        Example:
+            >>> import numpy as np
+            >>> y_true = np.array([1, 1, 0, 0])
+            >>> y_pred = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]])
+            >>> F1()(y_true, y_pred)
+            0.5
+
+        :param y_true: The ground true label of each document.
+        :param y_pred: The predicted scores of each document.
+        :return: F1.
+        """
+        y_pred = np.argmax(y_pred, axis=1)
+
+        tp = 0.
+        fp = 0.
+        fn = 0.
+        for label, score in zip(y_true, y_pred):
+            if label > self._threshold and score > self._threshold:
+                tp += 1.
+            elif label <= self._threshold and score > self._threshold:
+                fp += 1.
+            elif label > self._threshold and score <= self._threshold:
+                fn += 1.
+        recall = tp / (tp + fn)
+        precision = tp / (tp + fp)
+        f1 = 2 * recall * precision / (recall + precision)
+        return f1
diff --git a/matchzoo/metrics/mean_average_precision.py b/matchzoo/metrics/mean_average_precision.py
index 5640dba..cb647d9 100644
--- a/matchzoo/metrics/mean_average_precision.py
+++ b/matchzoo/metrics/mean_average_precision.py
@@ -11,17 +11,19 @@ class MeanAveragePrecision(RankingMetric):
 
     ALIAS = ['mean_average_precision', 'map']
 
-    def __init__(self, threshold: float = 0.):
+    def __init__(self, k: int = 1, threshold: float = 0.):
         """
         :class:`MeanAveragePrecision` constructor.
 
+        :param k: Number of results to consider.
         :param threshold: The threshold of relevance degree.
         """
+        self._k = k
         self._threshold = threshold
 
     def __repr__(self):
         """:return: Formated string representation of the metric."""
-        return f"{self.ALIAS[0]}({self._threshold})"
+        return f"{self.ALIAS[0]}@{self._k}({self._threshold})"
 
     def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         """
@@ -30,20 +32,25 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         Example:
             >>> y_true = [0, 1, 0, 0]
             >>> y_pred = [0.1, 0.6, 0.2, 0.3]
-            >>> MeanAveragePrecision()(y_true, y_pred)
+            >>> MeanAveragePrecision(k=4)(y_true, y_pred)
             1.0
 
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
-        :return: Mean average precision.
+        :return: Mean average precision @ k.
+        :raises: ValueError: k must be greater than 0.
         """
+        if self._k <= 0:
+            raise ValueError(f"k must be greater than 0."
+                             f"{self._k} received.")
         result = 0.
         pos = 0
         coupled_pair = sort_and_couple(y_true, y_pred)
         for idx, (label, score) in enumerate(coupled_pair):
             if label > self._threshold:
                 pos += 1.
-                result += pos / (idx + 1.)
+                if idx < self._k:
+                    result += pos / (idx + 1.)
         if pos == 0:
             return 0.
         else:
diff --git a/matchzoo/metrics/mean_reciprocal_rank.py b/matchzoo/metrics/mean_reciprocal_rank.py
index dddf9b1..b339d4f 100644
--- a/matchzoo/metrics/mean_reciprocal_rank.py
+++ b/matchzoo/metrics/mean_reciprocal_rank.py
@@ -11,17 +11,19 @@ class MeanReciprocalRank(RankingMetric):
 
     ALIAS = ['mean_reciprocal_rank', 'mrr']
 
-    def __init__(self, threshold: float = 0.):
+    def __init__(self, k: int = 1, threshold: float = 0.):
         """
         :class:`MeanReciprocalRankMetric`.
 
+        :param k: Number of results to consider.
         :param threshold: The label threshold of relevance degree.
         """
+        self._k = k
         self._threshold = threshold
 
     def __repr__(self) -> str:
         """:return: Formated string representation of the metric."""
-        return f'{self.ALIAS[0]}({self._threshold})'
+        return f"{self.ALIAS[0]}@{self._k}({self._threshold})"
 
     def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         """
@@ -31,15 +33,21 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
             >>> import numpy as np
             >>> y_pred = np.asarray([0.2, 0.3, 0.7, 1.0])
             >>> y_true = np.asarray([1, 0, 0, 0])
-            >>> MeanReciprocalRank()(y_true, y_pred)
+            >>> MeanReciprocalRank(k=4)(y_true, y_pred)
             0.25
 
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
-        :return: Mean reciprocal rank.
+        :return: Mean reciprocal rank @ k.
+        :raises: ValueError: k must be greater than 0.
         """
+        if self._k <= 0:
+            raise ValueError(f"k must be greater than 0."
+                             f"{self._k} received.")
         coupled_pair = sort_and_couple(y_true, y_pred)
         for idx, (label, pred) in enumerate(coupled_pair):
+            if idx >= self._k:
+                break
             if label > self._threshold:
                 return 1. / (idx + 1)
         return 0.
diff --git a/matchzoo/metrics/precision.py b/matchzoo/metrics/precision.py
index 72cfd74..cb59da5 100644
--- a/matchzoo/metrics/precision.py
+++ b/matchzoo/metrics/precision.py
@@ -43,8 +43,8 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
 
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
-        :return: Precision @ k
-        :raises: ValueError: len(r) must be >= k.
+        :return: Precision @ k.
+        :raises: ValueError: k must be greater than 0.
         """
         if self._k <= 0:
             raise ValueError(f"k must be greater than 0."
diff --git a/matchzoo/metrics/recall.py b/matchzoo/metrics/recall.py
new file mode 100644
index 0000000..007c691
--- /dev/null
+++ b/matchzoo/metrics/recall.py
@@ -0,0 +1,63 @@
+"""Recall for ranking."""
+import numpy as np
+
+from matchzoo.engine.base_metric import (
+    BaseMetric, sort_and_couple, RankingMetric
+)
+
+
+class Recall(RankingMetric):
+    """Recall metric."""
+
+    ALIAS = 'recall'
+
+    def __init__(self, k: int = 1, threshold: float = 0.):
+        """
+        :class:`RecallMetric` constructor.
+
+        :param k: Number of results to consider.
+        :param threshold: the label threshold of relevance degree.
+        """
+        self._k = k
+        self._threshold = threshold
+
+    def __repr__(self) -> str:
+        """:return: Formated string representation of the metric."""
+        return f"{self.ALIAS}@{self._k}({self._threshold})"
+
+    def __call__(self, y_true: np.array, y_pred: np.array) -> float:
+        """
+        Calculate recall@k.
+
+        Example:
+            >>> y_true = [0, 0, 0, 1]
+            >>> y_pred = [0.2, 0.4, 0.3, 0.1]
+            >>> Recall(k=1)(y_true, y_pred)
+            0.0
+            >>> Recall(k=2)(y_true, y_pred)
+            0.0
+            >>> Recall(k=4)(y_true, y_pred)
+            1.0
+            >>> Recall(k=5)(y_true, y_pred)
+            1.0
+
+        :param y_true: The ground true label of each document.
+        :param y_pred: The predicted scores of each document.
+        :return: Recall @ k.
+        :raises: ValueError: k must be greater than 0.
+        """
+        if self._k <= 0:
+            raise ValueError(f"k must be greater than 0."
+                             f"{self._k} received.")
+        result = 0.
+        pos = 0.
+        coupled_pair = sort_and_couple(y_true, y_pred)
+        for idx, (label, score) in enumerate(coupled_pair):
+            if label > self._threshold:
+                pos += 1.
+                if idx < self._k:
+                    result += 1.
+        if pos == 0:
+            return 0.
+        else:
+            return result / pos
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index ce8e78c..8f9f378 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -11,10 +11,10 @@ def test_sort_and_couple():
     assert (c == np.array([(1, 0.4), (2, 0.2), (0, 0.1)])).all()
 
 
-def test_mean_reciprocal_rank():
+def test_mean_reciprocal_rank_at_k():
     label = [0, 1, 2]
     score = [0.1, 0.4, 0.2]
-    assert metrics.MeanReciprocalRank()(label, score) == 1
+    assert metrics.MeanReciprocalRank(k=1)(label, score) == 1
 
 
 def test_precision_at_k():
@@ -25,16 +25,24 @@ def test_precision_at_k():
     assert round(metrics.Precision(k=3)(label, score), 2) == 0.67
 
 
+def test_recall_at_k():
+    label = [0, 1, 2]
+    score = [0.1, 0.4, 0.2]
+    assert metrics.Recall(k=1)(label, score) == 0.5
+    assert metrics.Recall(k=3)(label, score) == 1.
+
+
 def test_average_precision():
     label = [0, 1, 2]
     score = [0.1, 0.4, 0.2]
     assert round(metrics.AveragePrecision()(label, score), 2) == 0.89
 
 
-def test_mean_average_precision():
+def test_mean_average_precision_at_k():
     label = [0, 1, 2]
     score = [0.1, 0.4, 0.2]
-    assert metrics.MeanAveragePrecision()(label, score) == 1.
+    assert metrics.MeanAveragePrecision(k=1)(label, score) == 1.
+    assert metrics.MeanAveragePrecision(k=3)(label, score) == 1.
 
 
 def test_dcg_at_k():
@@ -61,6 +69,12 @@ def test_accuracy():
     assert metrics.Accuracy()(label, score) == 1
 
 
+def test_f1():
+    label = np.array([1, 1, 0, 0])
+    score = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]])
+    assert metrics.F1()(label, score) == 0.5
+
+
 def test_cross_entropy():
     label = [0, 1]
     score = [[0.25, 0.25], [0.01, 0.90]]

From 78c6afc6652b2f23e2eba875730ceb7465f92943 Mon Sep 17 00:00:00 2001
From: caiyinqiong <1198593462@qq.com>
Date: Tue, 3 Mar 2020 12:17:10 +0800
Subject: [PATCH 2/3] fix test

---
 tests/test_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 8f9f378..853a063 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -41,7 +41,7 @@ def test_average_precision():
 def test_mean_average_precision_at_k():
     label = [0, 1, 2]
     score = [0.1, 0.4, 0.2]
-    assert metrics.MeanAveragePrecision(k=1)(label, score) == 1.
+    assert metrics.MeanAveragePrecision(k=1)(label, score) == 0.5
     assert metrics.MeanAveragePrecision(k=3)(label, score) == 1.
 
 

From 72db9a9199e428588de0ca6dc2e140f25f29c3e1 Mon Sep 17 00:00:00 2001
From: caiyinqiong <1198593462@qq.com>
Date: Tue, 3 Mar 2020 21:58:15 +0800
Subject: [PATCH 3/3] fix raise ValueError

---
 matchzoo/metrics/f1.py                     | 4 +---
 matchzoo/metrics/mean_average_precision.py | 6 ++----
 matchzoo/metrics/mean_reciprocal_rank.py   | 6 ++----
 matchzoo/metrics/precision.py              | 6 ++----
 matchzoo/metrics/recall.py                 | 8 +++-----
 5 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/matchzoo/metrics/f1.py b/matchzoo/metrics/f1.py
index 8c7d5ac..8470b77 100644
--- a/matchzoo/metrics/f1.py
+++ b/matchzoo/metrics/f1.py
@@ -38,9 +38,7 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         """
         y_pred = np.argmax(y_pred, axis=1)
 
-        tp = 0.
-        fp = 0.
-        fn = 0.
+        tp, fp, fn = 0., 0., 0.
         for label, score in zip(y_true, y_pred):
             if label > self._threshold and score > self._threshold:
                 tp += 1.
diff --git a/matchzoo/metrics/mean_average_precision.py b/matchzoo/metrics/mean_average_precision.py
index cb647d9..82d0ecd 100644
--- a/matchzoo/metrics/mean_average_precision.py
+++ b/matchzoo/metrics/mean_average_precision.py
@@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.):
         :param k: Number of results to consider.
         :param threshold: The threshold of relevance degree.
         """
+        if k <= 0:
+            raise ValueError(f"k must be greater than 0. {k} received.")
         self._k = k
         self._threshold = threshold
 
@@ -38,11 +40,7 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
         :return: Mean average precision @ k.
-        :raises: ValueError: k must be greater than 0.
         """
-        if self._k <= 0:
-            raise ValueError(f"k must be greater than 0."
-                             f"{self._k} received.")
         result = 0.
         pos = 0
         coupled_pair = sort_and_couple(y_true, y_pred)
diff --git a/matchzoo/metrics/mean_reciprocal_rank.py b/matchzoo/metrics/mean_reciprocal_rank.py
index b339d4f..5e77adb 100644
--- a/matchzoo/metrics/mean_reciprocal_rank.py
+++ b/matchzoo/metrics/mean_reciprocal_rank.py
@@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.):
         :param k: Number of results to consider.
         :param threshold: The label threshold of relevance degree.
         """
+        if k <= 0:
+            raise ValueError(f"k must be greater than 0. {k} received.")
         self._k = k
         self._threshold = threshold
 
@@ -39,11 +41,7 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
         :return: Mean reciprocal rank @ k.
-        :raises: ValueError: k must be greater than 0.
         """
-        if self._k <= 0:
-            raise ValueError(f"k must be greater than 0."
-                             f"{self._k} received.")
         coupled_pair = sort_and_couple(y_true, y_pred)
         for idx, (label, pred) in enumerate(coupled_pair):
             if idx >= self._k:
diff --git a/matchzoo/metrics/precision.py b/matchzoo/metrics/precision.py
index cb59da5..d5829b3 100644
--- a/matchzoo/metrics/precision.py
+++ b/matchzoo/metrics/precision.py
@@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.):
         :param k: Number of results to consider.
         :param threshold: the label threshold of relevance degree.
         """
+        if k <= 0:
+            raise ValueError(f"k must be greater than 0. {k} received.")
         self._k = k
         self._threshold = threshold
 
@@ -44,11 +46,7 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
         :return: Precision @ k.
-        :raises: ValueError: k must be greater than 0.
         """
-        if self._k <= 0:
-            raise ValueError(f"k must be greater than 0."
-                             f"{self._k} received.")
         coupled_pair = sort_and_couple(y_true, y_pred)
         precision = 0.0
         for idx, (label, score) in enumerate(coupled_pair):
diff --git a/matchzoo/metrics/recall.py b/matchzoo/metrics/recall.py
index 007c691..758e2d5 100644
--- a/matchzoo/metrics/recall.py
+++ b/matchzoo/metrics/recall.py
@@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.):
         :param k: Number of results to consider.
         :param threshold: the label threshold of relevance degree.
         """
+        if k <= 0:
+            raise ValueError(f"k must be greater than 0. {k} received.")
         self._k = k
         self._threshold = threshold
 
@@ -44,13 +46,9 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
         :param y_true: The ground true label of each document.
         :param y_pred: The predicted scores of each document.
         :return: Recall @ k.
-        :raises: ValueError: k must be greater than 0.
         """
-        if self._k <= 0:
-            raise ValueError(f"k must be greater than 0."
-                             f"{self._k} received.")
         result = 0.
-        pos = 0.
+        pos = 0
         coupled_pair = sort_and_couple(y_true, y_pred)
         for idx, (label, score) in enumerate(coupled_pair):
             if label > self._threshold: