Skip to content

Commit

Permalink
Merge pull request #32 from jegorus/metrics
Browse files Browse the repository at this point in the history
Metrics: F1Beta, MCC
  • Loading branch information
sharthZ23 authored Mar 30, 2023
2 parents 39629df + 4911a9a commit 390c616
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 3 deletions.
6 changes: 5 additions & 1 deletion rectools/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
-------
`metrics.Precision`
`metrics.Recall`
`metrics.F1Beta`
`metrics.Accuracy`
`metrics.MCC`
`metrics.MAP`
`metrics.NDCG`
`metrics.MRR`
Expand All @@ -39,7 +41,7 @@
`metrics.SparsePairwiseHammingDistanceCalculator`
"""

from .classification import Accuracy, Precision, Recall
from .classification import MCC, Accuracy, F1Beta, Precision, Recall
from .distances import (
PairwiseDistanceCalculator,
PairwiseHammingDistanceCalculator,
Expand All @@ -54,7 +56,9 @@
__all__ = (
"Precision",
"Recall",
"F1Beta",
"Accuracy",
"MCC",
"MAP",
"NDCG",
"MRR",
Expand Down
73 changes: 73 additions & 0 deletions rectools/metrics/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from collections import defaultdict

import attr
import numpy as np
import pandas as pd

from rectools import Columns
Expand Down Expand Up @@ -424,3 +425,75 @@ def make_confusions(reco: pd.DataFrame, interactions: pd.DataFrame, k: int) -> p
merged = merge_reco(reco, interactions)
confusion_df = calc_confusions(merged, k)
return confusion_df


@attr.s
class F1Beta(SimpleClassificationMetric):
"""
Fbeta score for k first recommendations.
See more: https://en.wikipedia.org/wiki/F-score
The f1_beta equals to ``(1 + beta_sqr) * p@k * r@k / (beta_sqr * p@k + r@k)``
where
- beta_sqr equals to beta ** 2
- p@k: precision@k equals to ``tp / k`` where
-``tp`` is the number of relevant recommendations
among first ``k`` items in the top of recommendation list.
- r@k: recall@k equals to ``tp / liked`` where
- ``tp`` is the number of relevant recommendations
among first ``k`` items in the top of recommendation list;
- ``liked`` is the number of items the user has interacted
(bought, liked) with (in period after recommendations were given).
Parameters
----------
k : int
Number of items in top of recommendations list that will be used to calculate metric.
beta : float
Weight of recall. Default value: beta = 1.0
"""

beta: float = attr.ib(default=1.0)

def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame) -> pd.Series:
beta_sqr = self.beta**2
p_k = confusion_df[TP] / self.k
r_k = confusion_df[TP] / confusion_df[LIKED]

f1 = (1 + beta_sqr) * p_k * r_k / (beta_sqr * p_k + r_k)
f1.loc[(p_k == 0.0) & (r_k == 0.0)] = 0.0
return f1


@attr.s
class MCC(ClassificationMetric):
"""
Matthew correlation coefficient calculates correlation between actual and predicted classification.
Min value = -1 (negative correlation), Max value = 1 (positive correlation), zero means no correlation
See more: https://en.wikipedia.org/wiki/Phi_coefficient
The MCC equals to ``(tp * tn - fp * fn) / sqrt((tp + fp)(tp + fn)(tn + fp)(tn + fn))`` where
- ``tp`` is the number of relevant recommendations
among the first ``k`` items in recommendation list;
- ``tn`` is the number of items with which user has not interacted (bought, liked) with
(in period after recommendations were given) and we do not recommend to him
(in the top ``k`` items of recommendation list);
- ``fp`` - number of non-relevant recommendations among the first `k` items of recommendation list;
- ``fn`` - number of items the user has interacted with but that weren't recommended (in top-`k`).
Parameters
----------
k : int
Number of items in top of recommendations list that will be used to calculate metric.
"""

def _calc_per_user_from_confusion_df(self, confusion_df: pd.DataFrame, catalog: Catalog) -> pd.Series:
tp_ = confusion_df[TP]
tn_ = confusion_df[TN]
fp_ = confusion_df[FP]
fn_ = confusion_df[FN]
mcc_numerator = tp_ * tn_ - fp_ * fn_
mcc_denominator = np.sqrt((tp_ + fp_) * (tp_ + fn_) * (tn_ + fp_) * (tn_ + fn_))
mcc = mcc_numerator / mcc_denominator
mcc.loc[mcc_denominator == 0.0] = 0.0 # if denominator == 0 than numerator is also equals 0
return mcc
49 changes: 47 additions & 2 deletions tests/metrics/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import pytest

from rectools import Columns
from rectools.metrics import Accuracy, Precision, Recall
from rectools.metrics import MCC, Accuracy, F1Beta, Precision, Recall
from rectools.metrics.base import MetricAtK
from rectools.metrics.classification import ClassificationMetric, calc_classification_metrics

Expand All @@ -46,7 +46,7 @@ def setup(self) -> None:

def test_calc(self) -> None:
expected_metric_per_user = pd.Series(
[0.5, 0.5, 0, 0],
[0.5, 0.5, 0.0, 0.0],
index=pd.Series([1, 3, 4, 5], name=Columns.User),
)
pd.testing.assert_series_equal(self.metric.calc_per_user(RECO, INTERACTIONS), expected_metric_per_user)
Expand Down Expand Up @@ -110,3 +110,48 @@ def test_raises_when_no_catalog_set_when_needed(self) -> None:
metric = ClassificationMetric(k=1)
with pytest.raises(ValueError):
calc_classification_metrics({"m": metric}, pd.DataFrame(columns=[Columns.User, Columns.Item, Columns.Rank]))


class TestF1Beta:
def setup(self) -> None:
self.metric = F1Beta(k=2, beta=2 ** (1 / 2))

def test_calc(self) -> None:
expected_metric_per_user = pd.Series(
[0.375, 0.75, 0, 0],
index=pd.Series([1, 3, 4, 5], name=Columns.User),
)
pd.testing.assert_series_equal(self.metric.calc_per_user(RECO, INTERACTIONS), expected_metric_per_user)
assert self.metric.calc(RECO, INTERACTIONS) == expected_metric_per_user.mean()

def test_when_no_interactions(self) -> None:
expected_metric_per_user = pd.Series(index=pd.Series(name=Columns.User, dtype=int), dtype=np.float64)
pd.testing.assert_series_equal(self.metric.calc_per_user(RECO, EMPTY_INTERACTIONS), expected_metric_per_user)
assert np.isnan(self.metric.calc(RECO, EMPTY_INTERACTIONS))


class TestMCC:
def setup(self) -> None:
self.metric = MCC(k=2)

def test_calc(self) -> None:

# tp = pd.Series([1, 1, 0, 0])
# tn = pd.Series([6, 8, 7, 7])
# fp = pd.Series([1, 1, 2, 2])
# fn = pd.Series([2, 0, 1, 1])

expected_metric_per_user = pd.Series(
[1 / (21 ** (1 / 2)), 2 / 3, -1 / 6, -1 / 6],
index=pd.Series([1, 3, 4, 5], name=Columns.User),
)
pd.testing.assert_series_equal(self.metric.calc_per_user(RECO, INTERACTIONS, CATALOG), expected_metric_per_user)
assert self.metric.calc(RECO, INTERACTIONS, CATALOG) == expected_metric_per_user.mean()

def test_when_no_interactions(self) -> None:
expected_metric_per_user = pd.Series(index=pd.Series(name=Columns.User, dtype=int), dtype=np.float64)
pd.testing.assert_series_equal(
self.metric.calc_per_user(RECO, EMPTY_INTERACTIONS, CATALOG),
expected_metric_per_user,
)
assert np.isnan(self.metric.calc(RECO, EMPTY_INTERACTIONS, CATALOG))

0 comments on commit 390c616

Please sign in to comment.