diff --git a/matchzoo/metrics/__init__.py b/matchzoo/metrics/__init__.py index cf1a4d1..e9ec560 100644 --- a/matchzoo/metrics/__init__.py +++ b/matchzoo/metrics/__init__.py @@ -1,4 +1,5 @@ from .precision import Precision +from .recall import Recall from .average_precision import AveragePrecision from .discounted_cumulative_gain import DiscountedCumulativeGain from .mean_reciprocal_rank import MeanReciprocalRank @@ -7,6 +8,7 @@ NormalizedDiscountedCumulativeGain from .accuracy import Accuracy +from .f1 import F1 from .cross_entropy import CrossEntropy diff --git a/matchzoo/metrics/f1.py b/matchzoo/metrics/f1.py new file mode 100644 index 0000000..8470b77 --- /dev/null +++ b/matchzoo/metrics/f1.py @@ -0,0 +1,52 @@ +"""F1 metric for Classification.""" +import numpy as np + +from matchzoo.engine.base_metric import ClassificationMetric + + +class F1(ClassificationMetric): + """F1 metric.""" + + ALIAS = ['f1'] + + def __init__(self, threshold: float = 0.): + """ + :class:`F1` constructor. + + :param threshold: The threshold of relevance degree. + """ + self._threshold = threshold + + def __repr__(self) -> str: + """:return: Formated string representation of the metric.""" + return f"{self.ALIAS}({self._threshold})" + + def __call__(self, y_true: np.array, y_pred: np.array) -> float: + """ + Calculate f1. + + Example: + >>> import numpy as np + >>> y_true = np.array([1, 1, 0, 0]) + >>> y_pred = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]]) + >>> F1()(y_true, y_pred) + 0.5 + + :param y_true: The ground true label of each document. + :param y_pred: The predicted scores of each document. + :return: F1. + """ + y_pred = np.argmax(y_pred, axis=1) + + tp, fp, fn = 0., 0., 0. + for label, score in zip(y_true, y_pred): + if label > self._threshold and score > self._threshold: + tp += 1. + elif label <= self._threshold and score > self._threshold: + fp += 1. + elif label > self._threshold and score <= self._threshold: + fn += 1. + recall = tp / (tp + fn) + precision = tp / (tp + fp) + f1 = 2 * recall * precision / (recall + precision) + return f1 diff --git a/matchzoo/metrics/mean_average_precision.py b/matchzoo/metrics/mean_average_precision.py index 5640dba..82d0ecd 100644 --- a/matchzoo/metrics/mean_average_precision.py +++ b/matchzoo/metrics/mean_average_precision.py @@ -11,17 +11,21 @@ class MeanAveragePrecision(RankingMetric): ALIAS = ['mean_average_precision', 'map'] - def __init__(self, threshold: float = 0.): + def __init__(self, k: int = 1, threshold: float = 0.): """ :class:`MeanAveragePrecision` constructor. + :param k: Number of results to consider. :param threshold: The threshold of relevance degree. """ + if k <= 0: + raise ValueError(f"k must be greater than 0. {k} received.") + self._k = k self._threshold = threshold def __repr__(self): """:return: Formated string representation of the metric.""" - return f"{self.ALIAS[0]}({self._threshold})" + return f"{self.ALIAS[0]}@{self._k}({self._threshold})" def __call__(self, y_true: np.array, y_pred: np.array) -> float: """ @@ -30,12 +34,12 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: Example: >>> y_true = [0, 1, 0, 0] >>> y_pred = [0.1, 0.6, 0.2, 0.3] - >>> MeanAveragePrecision()(y_true, y_pred) + >>> MeanAveragePrecision(k=4)(y_true, y_pred) 1.0 :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. - :return: Mean average precision. + :return: Mean average precision @ k. """ result = 0. pos = 0 @@ -43,7 +47,8 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: for idx, (label, score) in enumerate(coupled_pair): if label > self._threshold: pos += 1. - result += pos / (idx + 1.) + if idx < self._k: + result += pos / (idx + 1.) if pos == 0: return 0. else: diff --git a/matchzoo/metrics/mean_reciprocal_rank.py b/matchzoo/metrics/mean_reciprocal_rank.py index dddf9b1..5e77adb 100644 --- a/matchzoo/metrics/mean_reciprocal_rank.py +++ b/matchzoo/metrics/mean_reciprocal_rank.py @@ -11,17 +11,21 @@ class MeanReciprocalRank(RankingMetric): ALIAS = ['mean_reciprocal_rank', 'mrr'] - def __init__(self, threshold: float = 0.): + def __init__(self, k: int = 1, threshold: float = 0.): """ :class:`MeanReciprocalRankMetric`. + :param k: Number of results to consider. :param threshold: The label threshold of relevance degree. """ + if k <= 0: + raise ValueError(f"k must be greater than 0. {k} received.") + self._k = k self._threshold = threshold def __repr__(self) -> str: """:return: Formated string representation of the metric.""" - return f'{self.ALIAS[0]}({self._threshold})' + return f"{self.ALIAS[0]}@{self._k}({self._threshold})" def __call__(self, y_true: np.array, y_pred: np.array) -> float: """ @@ -31,15 +35,17 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: >>> import numpy as np >>> y_pred = np.asarray([0.2, 0.3, 0.7, 1.0]) >>> y_true = np.asarray([1, 0, 0, 0]) - >>> MeanReciprocalRank()(y_true, y_pred) + >>> MeanReciprocalRank(k=4)(y_true, y_pred) 0.25 :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. - :return: Mean reciprocal rank. + :return: Mean reciprocal rank @ k. """ coupled_pair = sort_and_couple(y_true, y_pred) for idx, (label, pred) in enumerate(coupled_pair): + if idx >= self._k: + break if label > self._threshold: return 1. / (idx + 1) return 0. diff --git a/matchzoo/metrics/precision.py b/matchzoo/metrics/precision.py index 72cfd74..d5829b3 100644 --- a/matchzoo/metrics/precision.py +++ b/matchzoo/metrics/precision.py @@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.): :param k: Number of results to consider. :param threshold: the label threshold of relevance degree. """ + if k <= 0: + raise ValueError(f"k must be greater than 0. {k} received.") self._k = k self._threshold = threshold @@ -43,12 +45,8 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. - :return: Precision @ k - :raises: ValueError: len(r) must be >= k. + :return: Precision @ k. """ - if self._k <= 0: - raise ValueError(f"k must be greater than 0." - f"{self._k} received.") coupled_pair = sort_and_couple(y_true, y_pred) precision = 0.0 for idx, (label, score) in enumerate(coupled_pair): diff --git a/matchzoo/metrics/recall.py b/matchzoo/metrics/recall.py new file mode 100644 index 0000000..758e2d5 --- /dev/null +++ b/matchzoo/metrics/recall.py @@ -0,0 +1,61 @@ +"""Recall for ranking.""" +import numpy as np + +from matchzoo.engine.base_metric import ( + BaseMetric, sort_and_couple, RankingMetric +) + + +class Recall(RankingMetric): + """Recall metric.""" + + ALIAS = 'recall' + + def __init__(self, k: int = 1, threshold: float = 0.): + """ + :class:`RecallMetric` constructor. + + :param k: Number of results to consider. + :param threshold: the label threshold of relevance degree. + """ + if k <= 0: + raise ValueError(f"k must be greater than 0. {k} received.") + self._k = k + self._threshold = threshold + + def __repr__(self) -> str: + """:return: Formated string representation of the metric.""" + return f"{self.ALIAS}@{self._k}({self._threshold})" + + def __call__(self, y_true: np.array, y_pred: np.array) -> float: + """ + Calculate recall@k. + + Example: + >>> y_true = [0, 0, 0, 1] + >>> y_pred = [0.2, 0.4, 0.3, 0.1] + >>> Recall(k=1)(y_true, y_pred) + 0.0 + >>> Recall(k=2)(y_true, y_pred) + 0.0 + >>> Recall(k=4)(y_true, y_pred) + 1.0 + >>> Recall(k=5)(y_true, y_pred) + 1.0 + + :param y_true: The ground true label of each document. + :param y_pred: The predicted scores of each document. + :return: Recall @ k. + """ + result = 0. + pos = 0 + coupled_pair = sort_and_couple(y_true, y_pred) + for idx, (label, score) in enumerate(coupled_pair): + if label > self._threshold: + pos += 1. + if idx < self._k: + result += 1. + if pos == 0: + return 0. + else: + return result / pos diff --git a/tests/test_metrics.py b/tests/test_metrics.py index ce8e78c..853a063 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -11,10 +11,10 @@ def test_sort_and_couple(): assert (c == np.array([(1, 0.4), (2, 0.2), (0, 0.1)])).all() -def test_mean_reciprocal_rank(): +def test_mean_reciprocal_rank_at_k(): label = [0, 1, 2] score = [0.1, 0.4, 0.2] - assert metrics.MeanReciprocalRank()(label, score) == 1 + assert metrics.MeanReciprocalRank(k=1)(label, score) == 1 def test_precision_at_k(): @@ -25,16 +25,24 @@ def test_precision_at_k(): assert round(metrics.Precision(k=3)(label, score), 2) == 0.67 +def test_recall_at_k(): + label = [0, 1, 2] + score = [0.1, 0.4, 0.2] + assert metrics.Recall(k=1)(label, score) == 0.5 + assert metrics.Recall(k=3)(label, score) == 1. + + def test_average_precision(): label = [0, 1, 2] score = [0.1, 0.4, 0.2] assert round(metrics.AveragePrecision()(label, score), 2) == 0.89 -def test_mean_average_precision(): +def test_mean_average_precision_at_k(): label = [0, 1, 2] score = [0.1, 0.4, 0.2] - assert metrics.MeanAveragePrecision()(label, score) == 1. + assert metrics.MeanAveragePrecision(k=1)(label, score) == 0.5 + assert metrics.MeanAveragePrecision(k=3)(label, score) == 1. def test_dcg_at_k(): @@ -61,6 +69,12 @@ def test_accuracy(): assert metrics.Accuracy()(label, score) == 1 +def test_f1(): + label = np.array([1, 1, 0, 0]) + score = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]]) + assert metrics.F1()(label, score) == 0.5 + + def test_cross_entropy(): label = [0, 1] score = [[0.25, 0.25], [0.01, 0.90]]