Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add metrics #132

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions matchzoo/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .precision import Precision
from .recall import Recall
from .average_precision import AveragePrecision
from .discounted_cumulative_gain import DiscountedCumulativeGain
from .mean_reciprocal_rank import MeanReciprocalRank
Expand All @@ -7,6 +8,7 @@
NormalizedDiscountedCumulativeGain

from .accuracy import Accuracy
from .f1 import F1
from .cross_entropy import CrossEntropy


Expand Down
52 changes: 52 additions & 0 deletions matchzoo/metrics/f1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""F1 metric for Classification."""
import numpy as np

from matchzoo.engine.base_metric import ClassificationMetric


class F1(ClassificationMetric):
"""F1 metric."""

ALIAS = ['f1']

def __init__(self, threshold: float = 0.):
"""
:class:`F1` constructor.

:param threshold: The threshold of relevance degree.
"""
self._threshold = threshold

def __repr__(self) -> str:
""":return: Formated string representation of the metric."""
return f"{self.ALIAS}({self._threshold})"

def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""
Calculate f1.

Example:
>>> import numpy as np
>>> y_true = np.array([1, 1, 0, 0])
>>> y_pred = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]])
>>> F1()(y_true, y_pred)
0.5

:param y_true: The ground true label of each document.
:param y_pred: The predicted scores of each document.
:return: F1.
"""
y_pred = np.argmax(y_pred, axis=1)

tp, fp, fn = 0., 0., 0.
for label, score in zip(y_true, y_pred):
if label > self._threshold and score > self._threshold:
tp += 1.
elif label <= self._threshold and score > self._threshold:
fp += 1.
elif label > self._threshold and score <= self._threshold:
fn += 1.
recall = tp / (tp + fn)
precision = tp / (tp + fp)
f1 = 2 * recall * precision / (recall + precision)
return f1
15 changes: 10 additions & 5 deletions matchzoo/metrics/mean_average_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,21 @@ class MeanAveragePrecision(RankingMetric):

ALIAS = ['mean_average_precision', 'map']

def __init__(self, threshold: float = 0.):
def __init__(self, k: int = 1, threshold: float = 0.):
"""
:class:`MeanAveragePrecision` constructor.

:param k: Number of results to consider.
:param threshold: The threshold of relevance degree.
"""
if k <= 0:
raise ValueError(f"k must be greater than 0. {k} received.")
self._k = k
self._threshold = threshold

def __repr__(self):
""":return: Formated string representation of the metric."""
return f"{self.ALIAS[0]}({self._threshold})"
return f"{self.ALIAS[0]}@{self._k}({self._threshold})"

def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""
Expand All @@ -30,20 +34,21 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
Example:
>>> y_true = [0, 1, 0, 0]
>>> y_pred = [0.1, 0.6, 0.2, 0.3]
>>> MeanAveragePrecision()(y_true, y_pred)
>>> MeanAveragePrecision(k=4)(y_true, y_pred)
1.0

:param y_true: The ground true label of each document.
:param y_pred: The predicted scores of each document.
:return: Mean average precision.
:return: Mean average precision @ k.
"""
result = 0.
pos = 0
coupled_pair = sort_and_couple(y_true, y_pred)
for idx, (label, score) in enumerate(coupled_pair):
if label > self._threshold:
pos += 1.
result += pos / (idx + 1.)
if idx < self._k:
result += pos / (idx + 1.)
if pos == 0:
return 0.
else:
Expand Down
14 changes: 10 additions & 4 deletions matchzoo/metrics/mean_reciprocal_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,21 @@ class MeanReciprocalRank(RankingMetric):

ALIAS = ['mean_reciprocal_rank', 'mrr']

def __init__(self, threshold: float = 0.):
def __init__(self, k: int = 1, threshold: float = 0.):
"""
:class:`MeanReciprocalRankMetric`.

:param k: Number of results to consider.
:param threshold: The label threshold of relevance degree.
"""
if k <= 0:
raise ValueError(f"k must be greater than 0. {k} received.")
self._k = k
self._threshold = threshold

def __repr__(self) -> str:
""":return: Formated string representation of the metric."""
return f'{self.ALIAS[0]}({self._threshold})'
return f"{self.ALIAS[0]}@{self._k}({self._threshold})"

def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""
Expand All @@ -31,15 +35,17 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:
>>> import numpy as np
>>> y_pred = np.asarray([0.2, 0.3, 0.7, 1.0])
>>> y_true = np.asarray([1, 0, 0, 0])
>>> MeanReciprocalRank()(y_true, y_pred)
>>> MeanReciprocalRank(k=4)(y_true, y_pred)
0.25

:param y_true: The ground true label of each document.
:param y_pred: The predicted scores of each document.
:return: Mean reciprocal rank.
:return: Mean reciprocal rank @ k.
"""
coupled_pair = sort_and_couple(y_true, y_pred)
for idx, (label, pred) in enumerate(coupled_pair):
if idx >= self._k:
break
if label > self._threshold:
return 1. / (idx + 1)
return 0.
8 changes: 3 additions & 5 deletions matchzoo/metrics/precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def __init__(self, k: int = 1, threshold: float = 0.):
:param k: Number of results to consider.
:param threshold: the label threshold of relevance degree.
"""
if k <= 0:
raise ValueError(f"k must be greater than 0. {k} received.")
self._k = k
self._threshold = threshold

Expand All @@ -43,12 +45,8 @@ def __call__(self, y_true: np.array, y_pred: np.array) -> float:

:param y_true: The ground true label of each document.
:param y_pred: The predicted scores of each document.
:return: Precision @ k
:raises: ValueError: len(r) must be >= k.
:return: Precision @ k.
"""
if self._k <= 0:
raise ValueError(f"k must be greater than 0."
f"{self._k} received.")
coupled_pair = sort_and_couple(y_true, y_pred)
precision = 0.0
for idx, (label, score) in enumerate(coupled_pair):
Expand Down
61 changes: 61 additions & 0 deletions matchzoo/metrics/recall.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Recall for ranking."""
import numpy as np

from matchzoo.engine.base_metric import (
BaseMetric, sort_and_couple, RankingMetric
)


class Recall(RankingMetric):
"""Recall metric."""

ALIAS = 'recall'

def __init__(self, k: int = 1, threshold: float = 0.):
"""
:class:`RecallMetric` constructor.

:param k: Number of results to consider.
:param threshold: the label threshold of relevance degree.
"""
if k <= 0:
raise ValueError(f"k must be greater than 0. {k} received.")
self._k = k
self._threshold = threshold

def __repr__(self) -> str:
""":return: Formated string representation of the metric."""
return f"{self.ALIAS}@{self._k}({self._threshold})"

def __call__(self, y_true: np.array, y_pred: np.array) -> float:
"""
Calculate recall@k.

Example:
>>> y_true = [0, 0, 0, 1]
>>> y_pred = [0.2, 0.4, 0.3, 0.1]
>>> Recall(k=1)(y_true, y_pred)
0.0
>>> Recall(k=2)(y_true, y_pred)
0.0
>>> Recall(k=4)(y_true, y_pred)
1.0
>>> Recall(k=5)(y_true, y_pred)
1.0

:param y_true: The ground true label of each document.
:param y_pred: The predicted scores of each document.
:return: Recall @ k.
"""
result = 0.
pos = 0
coupled_pair = sort_and_couple(y_true, y_pred)
for idx, (label, score) in enumerate(coupled_pair):
if label > self._threshold:
pos += 1.
if idx < self._k:
result += 1.
if pos == 0:
return 0.
else:
return result / pos
22 changes: 18 additions & 4 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ def test_sort_and_couple():
assert (c == np.array([(1, 0.4), (2, 0.2), (0, 0.1)])).all()


def test_mean_reciprocal_rank():
def test_mean_reciprocal_rank_at_k():
label = [0, 1, 2]
score = [0.1, 0.4, 0.2]
assert metrics.MeanReciprocalRank()(label, score) == 1
assert metrics.MeanReciprocalRank(k=1)(label, score) == 1


def test_precision_at_k():
Expand All @@ -25,16 +25,24 @@ def test_precision_at_k():
assert round(metrics.Precision(k=3)(label, score), 2) == 0.67


def test_recall_at_k():
label = [0, 1, 2]
score = [0.1, 0.4, 0.2]
assert metrics.Recall(k=1)(label, score) == 0.5
assert metrics.Recall(k=3)(label, score) == 1.


def test_average_precision():
label = [0, 1, 2]
score = [0.1, 0.4, 0.2]
assert round(metrics.AveragePrecision()(label, score), 2) == 0.89


def test_mean_average_precision():
def test_mean_average_precision_at_k():
label = [0, 1, 2]
score = [0.1, 0.4, 0.2]
assert metrics.MeanAveragePrecision()(label, score) == 1.
assert metrics.MeanAveragePrecision(k=1)(label, score) == 0.5
assert metrics.MeanAveragePrecision(k=3)(label, score) == 1.


def test_dcg_at_k():
Expand All @@ -61,6 +69,12 @@ def test_accuracy():
assert metrics.Accuracy()(label, score) == 1


def test_f1():
label = np.array([1, 1, 0, 0])
score = np.array([[0.2, 0.8], [0.6, 0.4], [0.7, 0.3], [0.3, 0.7]])
assert metrics.F1()(label, score) == 0.5


def test_cross_entropy():
label = [0, 1]
score = [[0.25, 0.25], [0.01, 0.90]]
Expand Down