From 7a53e9d3d480e19a53c76cb830983efabf3f9128 Mon Sep 17 00:00:00 2001 From: caiyinqiong <1198593462@qq.com> Date: Tue, 3 Mar 2020 11:33:21 +0800 Subject: [PATCH 1/3] add_metrics --- matchzoo/metrics/__init__.py | 2 + matchzoo/metrics/f1.py | 54 +++++++++++++++++++ matchzoo/metrics/mean_average_precision.py | 17 ++++-- matchzoo/metrics/mean_reciprocal_rank.py | 16 ++++-- matchzoo/metrics/precision.py | 4 +- matchzoo/metrics/recall.py | 63 ++++++++++++++++++++++ tests/test_metrics.py | 22 ++++++-- 7 files changed, 163 insertions(+), 15 deletions(-) create mode 100644 matchzoo/metrics/f1.py create mode 100644 matchzoo/metrics/recall.py diff --git a/matchzoo/metrics/__init__.py b/matchzoo/metrics/__init__.py index cf1a4d1..e9ec560 100644 --- a/matchzoo/metrics/__init__.py +++ b/matchzoo/metrics/__init__.py @@ -1,4 +1,5 @@ from .precision import Precision +from .recall import Recall from .average_precision import AveragePrecision from .discounted_cumulative_gain import DiscountedCumulativeGain from .mean_reciprocal_rank import MeanReciprocalRank @@ -7,6 +8,7 @@ NormalizedDiscountedCumulativeGain from .accuracy import Accuracy +from .f1 import F1 from .cross_entropy import CrossEntropy diff --git a/matchzoo/metrics/f1.py b/matchzoo/metrics/f1.py new file mode 100644 index 0000000..8c7d5ac --- /dev/null +++ b/matchzoo/metrics/f1.py @@ -0,0 +1,54 @@ +"""F1 metric for Classification.""" +import numpy as np + +from matchzoo.engine.base_metric import ClassificationMetric + + +class F1(ClassificationMetric): + """F1 metric.""" + + ALIAS = ['f1'] + + def __init__(self, threshold: float = 0.): + """ + :class:`F1` constructor. + + :param threshold: The threshold of relevance degree. + """ + self._threshold = threshold + + def __repr__(self) -> str: + """:return: Formated string representation of the metric.""" + return f"{self.ALIAS}({self._threshold})" + + def __call__(self, y_true: np.array, y_pred: np.array) -> float: + """ + Calculate f1. + + Example: + >>> import numpy as np + >>> y_true = np.array([1, 1, 0, 0]) + >>> y_pred = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]]) + >>> F1()(y_true, y_pred) + 0.5 + + :param y_true: The ground true label of each document. + :param y_pred: The predicted scores of each document. + :return: F1. + """ + y_pred = np.argmax(y_pred, axis=1) + + tp = 0. + fp = 0. + fn = 0. + for label, score in zip(y_true, y_pred): + if label > self._threshold and score > self._threshold: + tp += 1. + elif label <= self._threshold and score > self._threshold: + fp += 1. + elif label > self._threshold and score <= self._threshold: + fn += 1. + recall = tp / (tp + fn) + precision = tp / (tp + fp) + f1 = 2 * recall * precision / (recall + precision) + return f1 diff --git a/matchzoo/metrics/mean_average_precision.py b/matchzoo/metrics/mean_average_precision.py index 5640dba..cb647d9 100644 --- a/matchzoo/metrics/mean_average_precision.py +++ b/matchzoo/metrics/mean_average_precision.py @@ -11,17 +11,19 @@ class MeanAveragePrecision(RankingMetric): ALIAS = ['mean_average_precision', 'map'] - def __init__(self, threshold: float = 0.): + def __init__(self, k: int = 1, threshold: float = 0.): """ :class:`MeanAveragePrecision` constructor. + :param k: Number of results to consider. :param threshold: The threshold of relevance degree. """ + self._k = k self._threshold = threshold def __repr__(self): """:return: Formated string representation of the metric.""" - return f"{self.ALIAS[0]}({self._threshold})" + return f"{self.ALIAS[0]}@{self._k}({self._threshold})" def __call__(self, y_true: np.array, y_pred: np.array) -> float: """ @@ -30,20 +32,25 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: Example: >>> y_true = [0, 1, 0, 0] >>> y_pred = [0.1, 0.6, 0.2, 0.3] - >>> MeanAveragePrecision()(y_true, y_pred) + >>> MeanAveragePrecision(k=4)(y_true, y_pred) 1.0 :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. - :return: Mean average precision. + :return: Mean average precision @ k. + :raises: ValueError: k must be greater than 0. """ + if self._k <= 0: + raise ValueError(f"k must be greater than 0." + f"{self._k} received.") result = 0. pos = 0 coupled_pair = sort_and_couple(y_true, y_pred) for idx, (label, score) in enumerate(coupled_pair): if label > self._threshold: pos += 1. - result += pos / (idx + 1.) + if idx < self._k: + result += pos / (idx + 1.) if pos == 0: return 0. else: diff --git a/matchzoo/metrics/mean_reciprocal_rank.py b/matchzoo/metrics/mean_reciprocal_rank.py index dddf9b1..b339d4f 100644 --- a/matchzoo/metrics/mean_reciprocal_rank.py +++ b/matchzoo/metrics/mean_reciprocal_rank.py @@ -11,17 +11,19 @@ class MeanReciprocalRank(RankingMetric): ALIAS = ['mean_reciprocal_rank', 'mrr'] - def __init__(self, threshold: float = 0.): + def __init__(self, k: int = 1, threshold: float = 0.): """ :class:`MeanReciprocalRankMetric`. + :param k: Number of results to consider. :param threshold: The label threshold of relevance degree. """ + self._k = k self._threshold = threshold def __repr__(self) -> str: """:return: Formated string representation of the metric.""" - return f'{self.ALIAS[0]}({self._threshold})' + return f"{self.ALIAS[0]}@{self._k}({self._threshold})" def __call__(self, y_true: np.array, y_pred: np.array) -> float: """ @@ -31,15 +33,21 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: >>> import numpy as np >>> y_pred = np.asarray([0.2, 0.3, 0.7, 1.0]) >>> y_true = np.asarray([1, 0, 0, 0]) - >>> MeanReciprocalRank()(y_true, y_pred) + >>> MeanReciprocalRank(k=4)(y_true, y_pred) 0.25 :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. - :return: Mean reciprocal rank. + :return: Mean reciprocal rank @ k. + :raises: ValueError: k must be greater than 0. """ + if self._k <= 0: + raise ValueError(f"k must be greater than 0." + f"{self._k} received.") coupled_pair = sort_and_couple(y_true, y_pred) for idx, (label, pred) in enumerate(coupled_pair): + if idx >= self._k: + break if label > self._threshold: return 1. / (idx + 1) return 0. diff --git a/matchzoo/metrics/precision.py b/matchzoo/metrics/precision.py index 72cfd74..cb59da5 100644 --- a/matchzoo/metrics/precision.py +++ b/matchzoo/metrics/precision.py @@ -43,8 +43,8 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. - :return: Precision @ k - :raises: ValueError: len(r) must be >= k. + :return: Precision @ k. + :raises: ValueError: k must be greater than 0. """ if self._k <= 0: raise ValueError(f"k must be greater than 0." diff --git a/matchzoo/metrics/recall.py b/matchzoo/metrics/recall.py new file mode 100644 index 0000000..007c691 --- /dev/null +++ b/matchzoo/metrics/recall.py @@ -0,0 +1,63 @@ +"""Recall for ranking.""" +import numpy as np + +from matchzoo.engine.base_metric import ( + BaseMetric, sort_and_couple, RankingMetric +) + + +class Recall(RankingMetric): + """Recall metric.""" + + ALIAS = 'recall' + + def __init__(self, k: int = 1, threshold: float = 0.): + """ + :class:`RecallMetric` constructor. + + :param k: Number of results to consider. + :param threshold: the label threshold of relevance degree. + """ + self._k = k + self._threshold = threshold + + def __repr__(self) -> str: + """:return: Formated string representation of the metric.""" + return f"{self.ALIAS}@{self._k}({self._threshold})" + + def __call__(self, y_true: np.array, y_pred: np.array) -> float: + """ + Calculate recall@k. + + Example: + >>> y_true = [0, 0, 0, 1] + >>> y_pred = [0.2, 0.4, 0.3, 0.1] + >>> Recall(k=1)(y_true, y_pred) + 0.0 + >>> Recall(k=2)(y_true, y_pred) + 0.0 + >>> Recall(k=4)(y_true, y_pred) + 1.0 + >>> Recall(k=5)(y_true, y_pred) + 1.0 + + :param y_true: The ground true label of each document. + :param y_pred: The predicted scores of each document. + :return: Recall @ k. + :raises: ValueError: k must be greater than 0. + """ + if self._k <= 0: + raise ValueError(f"k must be greater than 0." + f"{self._k} received.") + result = 0. + pos = 0. + coupled_pair = sort_and_couple(y_true, y_pred) + for idx, (label, score) in enumerate(coupled_pair): + if label > self._threshold: + pos += 1. + if idx < self._k: + result += 1. + if pos == 0: + return 0. + else: + return result / pos diff --git a/tests/test_metrics.py b/tests/test_metrics.py index ce8e78c..8f9f378 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -11,10 +11,10 @@ def test_sort_and_couple(): assert (c == np.array([(1, 0.4), (2, 0.2), (0, 0.1)])).all() -def test_mean_reciprocal_rank(): +def test_mean_reciprocal_rank_at_k(): label = [0, 1, 2] score = [0.1, 0.4, 0.2] - assert metrics.MeanReciprocalRank()(label, score) == 1 + assert metrics.MeanReciprocalRank(k=1)(label, score) == 1 def test_precision_at_k(): @@ -25,16 +25,24 @@ def test_precision_at_k(): assert round(metrics.Precision(k=3)(label, score), 2) == 0.67 +def test_recall_at_k(): + label = [0, 1, 2] + score = [0.1, 0.4, 0.2] + assert metrics.Recall(k=1)(label, score) == 0.5 + assert metrics.Recall(k=3)(label, score) == 1. + + def test_average_precision(): label = [0, 1, 2] score = [0.1, 0.4, 0.2] assert round(metrics.AveragePrecision()(label, score), 2) == 0.89 -def test_mean_average_precision(): +def test_mean_average_precision_at_k(): label = [0, 1, 2] score = [0.1, 0.4, 0.2] - assert metrics.MeanAveragePrecision()(label, score) == 1. + assert metrics.MeanAveragePrecision(k=1)(label, score) == 1. + assert metrics.MeanAveragePrecision(k=3)(label, score) == 1. def test_dcg_at_k(): @@ -61,6 +69,12 @@ def test_accuracy(): assert metrics.Accuracy()(label, score) == 1 +def test_f1(): + label = np.array([1, 1, 0, 0]) + score = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]]) + assert metrics.F1()(label, score) == 0.5 + + def test_cross_entropy(): label = [0, 1] score = [[0.25, 0.25], [0.01, 0.90]] From 78c6afc6652b2f23e2eba875730ceb7465f92943 Mon Sep 17 00:00:00 2001 From: caiyinqiong <1198593462@qq.com> Date: Tue, 3 Mar 2020 12:17:10 +0800 Subject: [PATCH 2/3] fix test --- tests/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 8f9f378..853a063 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -41,7 +41,7 @@ def test_average_precision(): def test_mean_average_precision_at_k(): label = [0, 1, 2] score = [0.1, 0.4, 0.2] - assert metrics.MeanAveragePrecision(k=1)(label, score) == 1. + assert metrics.MeanAveragePrecision(k=1)(label, score) == 0.5 assert metrics.MeanAveragePrecision(k=3)(label, score) == 1. From 72db9a9199e428588de0ca6dc2e140f25f29c3e1 Mon Sep 17 00:00:00 2001 From: caiyinqiong <1198593462@qq.com> Date: Tue, 3 Mar 2020 21:58:15 +0800 Subject: [PATCH 3/3] fix raise ValueError --- matchzoo/metrics/f1.py | 4 +--- matchzoo/metrics/mean_average_precision.py | 6 ++---- matchzoo/metrics/mean_reciprocal_rank.py | 6 ++---- matchzoo/metrics/precision.py | 6 ++---- matchzoo/metrics/recall.py | 8 +++----- 5 files changed, 10 insertions(+), 20 deletions(-) diff --git a/matchzoo/metrics/f1.py b/matchzoo/metrics/f1.py index 8c7d5ac..8470b77 100644 --- a/matchzoo/metrics/f1.py +++ b/matchzoo/metrics/f1.py @@ -38,9 +38,7 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: """ y_pred = np.argmax(y_pred, axis=1) - tp = 0. - fp = 0. - fn = 0. + tp, fp, fn = 0., 0., 0. for label, score in zip(y_true, y_pred): if label > self._threshold and score > self._threshold: tp += 1. diff --git a/matchzoo/metrics/mean_average_precision.py b/matchzoo/metrics/mean_average_precision.py index cb647d9..82d0ecd 100644 --- a/matchzoo/metrics/mean_average_precision.py +++ b/matchzoo/metrics/mean_average_precision.py @@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.): :param k: Number of results to consider. :param threshold: The threshold of relevance degree. """ + if k <= 0: + raise ValueError(f"k must be greater than 0. {k} received.") self._k = k self._threshold = threshold @@ -38,11 +40,7 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. :return: Mean average precision @ k. - :raises: ValueError: k must be greater than 0. """ - if self._k <= 0: - raise ValueError(f"k must be greater than 0." - f"{self._k} received.") result = 0. pos = 0 coupled_pair = sort_and_couple(y_true, y_pred) diff --git a/matchzoo/metrics/mean_reciprocal_rank.py b/matchzoo/metrics/mean_reciprocal_rank.py index b339d4f..5e77adb 100644 --- a/matchzoo/metrics/mean_reciprocal_rank.py +++ b/matchzoo/metrics/mean_reciprocal_rank.py @@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.): :param k: Number of results to consider. :param threshold: The label threshold of relevance degree. """ + if k <= 0: + raise ValueError(f"k must be greater than 0. {k} received.") self._k = k self._threshold = threshold @@ -39,11 +41,7 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. :return: Mean reciprocal rank @ k. - :raises: ValueError: k must be greater than 0. """ - if self._k <= 0: - raise ValueError(f"k must be greater than 0." - f"{self._k} received.") coupled_pair = sort_and_couple(y_true, y_pred) for idx, (label, pred) in enumerate(coupled_pair): if idx >= self._k: diff --git a/matchzoo/metrics/precision.py b/matchzoo/metrics/precision.py index cb59da5..d5829b3 100644 --- a/matchzoo/metrics/precision.py +++ b/matchzoo/metrics/precision.py @@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.): :param k: Number of results to consider. :param threshold: the label threshold of relevance degree. """ + if k <= 0: + raise ValueError(f"k must be greater than 0. {k} received.") self._k = k self._threshold = threshold @@ -44,11 +46,7 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. :return: Precision @ k. - :raises: ValueError: k must be greater than 0. """ - if self._k <= 0: - raise ValueError(f"k must be greater than 0." - f"{self._k} received.") coupled_pair = sort_and_couple(y_true, y_pred) precision = 0.0 for idx, (label, score) in enumerate(coupled_pair): diff --git a/matchzoo/metrics/recall.py b/matchzoo/metrics/recall.py index 007c691..758e2d5 100644 --- a/matchzoo/metrics/recall.py +++ b/matchzoo/metrics/recall.py @@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.): :param k: Number of results to consider. :param threshold: the label threshold of relevance degree. """ + if k <= 0: + raise ValueError(f"k must be greater than 0. {k} received.") self._k = k self._threshold = threshold @@ -44,13 +46,9 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float: :param y_true: The ground true label of each document. :param y_pred: The predicted scores of each document. :return: Recall @ k. - :raises: ValueError: k must be greater than 0. """ - if self._k <= 0: - raise ValueError(f"k must be greater than 0." - f"{self._k} received.") result = 0. - pos = 0. + pos = 0 coupled_pair = sort_and_couple(y_true, y_pred) for idx, (label, score) in enumerate(coupled_pair): if label > self._threshold: