Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Corrected f_beta computation #4183

Merged
merged 7 commits into from
Oct 21, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pytorch_lightning/metrics/classification/f_beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import torch
from torch import nn
from pytorch_lightning.metrics.metric import Metric
from pytorch_lightning.metrics.functional.reduction import class_reduce
from pytorch_lightning.metrics.classification.precision_recall import _input_format
from pytorch_lightning.metrics.utils import METRIC_EPS

Expand Down Expand Up @@ -124,9 +125,11 @@ def compute(self):
precision = self.true_positives.sum().float() / (self.predicted_positives.sum() + METRIC_EPS)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we really want to keep our metric systematically unprecise by adding some offset?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can move towards class_reduce function which explicit handles nans, when we unify the class based metrics and functional metrics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, makes sense. Just checked and the tests pass without METRIC_EPS. Pushing the update now.

recall = self.true_positives.sum().float() / (self.actual_positives.sum() + METRIC_EPS)

return (1 + self.beta ** 2) * (precision * recall) / (self.beta ** 2 * precision + recall)
elif self.average == 'macro':
precision = self.true_positives.float() / (self.predicted_positives + METRIC_EPS)
recall = self.true_positives.float() / (self.actual_positives + METRIC_EPS)

return ((1 + self.beta ** 2) * (precision * recall) / (self.beta ** 2 * precision + recall)).mean()
num = (1 + self.beta ** 2) * precision * recall
denom = self.beta ** 2 * precision + recall

return class_reduce(num=num, denom=denom, weights=None, class_reduction='macro')
8 changes: 8 additions & 0 deletions tests/metrics/classification/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@
target=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES))
)

# Generate edge multilabel edge case, where nothing matches (scores are undefined)
__temp_preds = torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES))
__temp_target = abs(__temp_preds - 1)

_multilabel_inputs_no_match = Input(
preds=__temp_preds,
target=__temp_target
)

_multiclass_prob_inputs = Input(
preds=torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES),
Expand Down
2 changes: 2 additions & 0 deletions tests/metrics/classification/test_f_beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
_multidim_multiclass_inputs,
_multidim_multiclass_prob_inputs,
_multilabel_inputs,
_multilabel_inputs_no_match,
_multilabel_prob_inputs,
)
from tests.metrics.utils import NUM_CLASSES, THRESHOLD, MetricTester
Expand Down Expand Up @@ -87,6 +88,7 @@ def _sk_fbeta_multidim_multiclass(preds, target, average='micro', beta=1.0):
(_binary_inputs.preds, _binary_inputs.target, _sk_fbeta_binary, 1, False),
(_multilabel_prob_inputs.preds, _multilabel_prob_inputs.target, _sk_fbeta_multilabel_prob, NUM_CLASSES, True),
(_multilabel_inputs.preds, _multilabel_inputs.target, _sk_fbeta_multilabel, NUM_CLASSES, True),
(_multilabel_inputs_no_match.preds, _multilabel_inputs_no_match.target, _sk_fbeta_multilabel, NUM_CLASSES, True),
(_multiclass_prob_inputs.preds, _multiclass_prob_inputs.target, _sk_fbeta_multiclass_prob, NUM_CLASSES, False),
(_multiclass_inputs.preds, _multiclass_inputs.target, _sk_fbeta_multiclass, NUM_CLASSES, False),
(
Expand Down