[Metrics] Confusion matrix class interface (#4348)

* docs + precision + recall + f_beta + refactor Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * rebase Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * fixes Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * added missing file * docs * docs * extra import * add confusion matrix * add to docs * add test * pep8 + isort * update tests * move util function * unify functional and class * add to init * remove old implementation * update tests * pep8 * add duplicate * fix doctest * Update pytorch_lightning/metrics/classification/confusion_matrix.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * changelog * bullet point args * bullet docs * bullet docs Co-authored-by: ananyahjha93 <ananya@pytorchlightning.ai> Co-authored-by: Teddy Koker <teddy.koker@gmail.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: chaton <thomas@grid.ai> Co-authored-by: Roger Shieh <55400948+s-rog@users.noreply.github.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com>
Lightning-AI · Oct 30, 2020 · e0b856c · e0b856c
1 parent 20a8eaa
commit e0b856c
Show file tree

Hide file tree

Showing 12 changed files with 384 additions and 92 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added support for string values in `Trainer`'s `profiler` parameter ([#3656](https://github.com/PyTorchLightning/pytorch-lightning/pull/3656))
 
 
+- Added `ConfusionMatrix` class interface ([#4348](https://github.com/PyTorchLightning/pytorch-lightning/pull/4348))
+
+
 ### Changed
 
 

diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
@@ -188,6 +188,12 @@ Fbeta
 .. autoclass:: pytorch_lightning.metrics.classification.Fbeta
     :noindex:
 
+ConfusionMatrix
+~~~~~~~~~~~~~~~
+
+.. autoclass:: pytorch_lightning.metrics.classification.ConfusionMatrix
+    :noindex:
+
 Regression Metrics
 ------------------
 
@@ -275,7 +281,7 @@ average_precision [func]
 confusion_matrix [func]
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autofunction:: pytorch_lightning.metrics.functional.classification.confusion_matrix
+.. autofunction:: pytorch_lightning.metrics.functional.confusion_matrix
     :noindex:
 
 

diff --git a/pytorch_lightning/metrics/__init__.py b/pytorch_lightning/metrics/__init__.py
@@ -17,7 +17,8 @@
     Accuracy,
     Precision,
     Recall,
-    Fbeta
+    Fbeta,
+    ConfusionMatrix
 )
 
 from pytorch_lightning.metrics.regression import (

diff --git a/pytorch_lightning/metrics/classification/__init__.py b/pytorch_lightning/metrics/classification/__init__.py
@@ -14,3 +14,4 @@
 from pytorch_lightning.metrics.classification.accuracy import Accuracy
 from pytorch_lightning.metrics.classification.precision_recall import Precision, Recall
 from pytorch_lightning.metrics.classification.f_beta import Fbeta
+from pytorch_lightning.metrics.classification.confusion_matrix import ConfusionMatrix
diff --git a/pytorch_lightning/metrics/classification/accuracy.py b/pytorch_lightning/metrics/classification/accuracy.py
@@ -21,6 +21,7 @@
 import torch
 from torch import nn
 from pytorch_lightning.metrics.metric import Metric
+from pytorch_lightning.metrics.utils import _input_format_classification
 
 
 class Accuracy(Metric):
@@ -60,7 +61,6 @@ class Accuracy(Metric):
         tensor(0.5000)
 
     """
-
     def __init__(
         self,
         threshold: float = 0.5,
@@ -79,21 +79,6 @@ def __init__(
 
         self.threshold = threshold
 
-    def _input_format(self, preds: torch.Tensor, target: torch.Tensor):
-        if not (len(preds.shape) == len(target.shape) or len(preds.shape) == len(target.shape) + 1):
-            raise ValueError(
-                "preds and target must have same number of dimensions, or one additional dimension for preds"
-            )
-
-        if len(preds.shape) == len(target.shape) + 1:
-            # multi class probabilites
-            preds = torch.argmax(preds, dim=1)
-
-        if len(preds.shape) == len(target.shape) and preds.dtype == torch.float:
-            # binary or multilabel probablities
-            preds = (preds >= self.threshold).long()
-        return preds, target
-
     def update(self, preds: torch.Tensor, target: torch.Tensor):
         """
         Update state with predictions and targets.
@@ -102,7 +87,7 @@ def update(self, preds: torch.Tensor, target: torch.Tensor):
             preds: Predictions from model
             target: Ground truth values
         """
-        preds, target = self._input_format(preds, target)
+        preds, target = _input_format_classification(preds, target, self.threshold)
         assert preds.shape == target.shape
 
         self.correct += torch.sum(preds == target)

diff --git a/pytorch_lightning/metrics/classification/confusion_matrix.py b/pytorch_lightning/metrics/classification/confusion_matrix.py
@@ -0,0 +1,111 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional
+
+import torch
+
+from pytorch_lightning.metrics.metric import Metric
+from pytorch_lightning.metrics.functional.confusion_matrix import (
+    _confusion_matrix_update,
+    _confusion_matrix_compute
+)
+
+
+class ConfusionMatrix(Metric):
+    """
+    Computes the confusion matrix. Works with binary, multiclass, and multilabel data.
+    Accepts logits from a model output or integer class values in prediction.
+    Works with multi-dimensional preds and target.
+
+    Forward accepts
+
+    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
+    - ``target`` (long tensor): ``(N, ...)``
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
+    This is the case for binary and multi-label logits.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    Args:
+        num_classes: Number of classes in the dataset.
+        normalize: Normalization mode for confusion matrix. Choose from
+
+            - ``None``: no normalization (default)
+            - ``'true'``: normalization over the targets (most commonly used)
+            - ``'pred'``: normalization over the predictions
+            - ``'all'``: normalization over the whole matrix
+
+        threshold:
+            Threshold value for binary or multi-label logits. default: 0.5
+        compute_on_step:
+            Forward only calls ``update()`` and return None if this is set to False. default: True
+        dist_sync_on_step:
+            Synchronize metric state across processes at each ``forward()``
+            before returning the value at the step. default: False
+        process_group:
+            Specify the process group on which synchronization is called. default: None (which selects the entire world)
+
+    Example:
+
+        >>> from pytorch_lightning.metrics import ConfusionMatrix
+        >>> target = torch.tensor([1, 1, 0, 0])
+        >>> preds = torch.tensor([0, 1, 0, 0])
+        >>> confmat = ConfusionMatrix(num_classes=2)
+        >>> confmat(preds, target)
+        tensor([[2., 0.],
+                [1., 1.]])
+
+    """
+    def __init__(
+        self,
+        num_classes: int,
+        normalize: Optional[str] = None,
+        threshold: float = 0.5,
+        compute_on_step: bool = True,
+        dist_sync_on_step: bool = False,
+        process_group: Optional[Any] = None,
+    ):
+
+        super().__init__(
+            compute_on_step=compute_on_step,
+            dist_sync_on_step=dist_sync_on_step,
+            process_group=process_group,
+        )
+        self.num_classes = num_classes
+        self.normalize = normalize
+        self.threshold = threshold
+
+        allowed_normalize = ('true', 'pred', 'all', None)
+        assert self.normalize in allowed_normalize, \
+            f"Argument average needs to one of the following: {allowed_normalize}"
+
+        self.add_state("confmat", default=torch.zeros(num_classes, num_classes), dist_reduce_fx="sum")
+
+    def update(self, preds: torch.Tensor, target: torch.Tensor):
+        """
+        Update state with predictions and targets.
+
+        Args:
+            preds: Predictions from model
+            target: Ground truth values
+        """
+        confmat = _confusion_matrix_update(preds, target, self.num_classes, self.threshold)
+        self.confmat += confmat
+
+    def compute(self) -> torch.Tensor:
+        """
+        Computes confusion matrix
+        """
+        return _confusion_matrix_compute(self.confmat, self.normalize)
diff --git a/pytorch_lightning/metrics/functional/__init__.py b/pytorch_lightning/metrics/functional/__init__.py
@@ -16,7 +16,6 @@
     auc,
     auroc,
     average_precision,
-    confusion_matrix,
     dice_score,
     f1_score,
     fbeta_score,
@@ -44,3 +43,4 @@
 from pytorch_lightning.metrics.functional.mean_squared_log_error import mean_squared_log_error
 from pytorch_lightning.metrics.functional.psnr import psnr
 from pytorch_lightning.metrics.functional.ssim import ssim
+from pytorch_lightning.metrics.functional.confusion_matrix import confusion_matrix
diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py
@@ -301,48 +301,6 @@ def _confmat_normalize(cm):
     return cm
 
 
-def confusion_matrix(
-        pred: torch.Tensor,
-        target: torch.Tensor,
-        normalize: bool = False,
-        num_classes: Optional[int] = None
-) -> torch.Tensor:
-    """
-    Computes the confusion matrix C where each entry C_{i,j} is the number of observations
-    in group i that were predicted in group j.
-
-    Args:
-        pred: estimated targets
-        target: ground truth labels
-        normalize: normalizes confusion matrix
-        num_classes: number of classes
-
-    Return:
-        Tensor, confusion matrix C [num_classes, num_classes ]
-
-    Example:
-
-        >>> x = torch.tensor([1, 2, 3])
-        >>> y = torch.tensor([0, 2, 3])
-        >>> confusion_matrix(x, y)
-        tensor([[0., 1., 0., 0.],
-                [0., 0., 0., 0.],
-                [0., 0., 1., 0.],
-                [0., 0., 0., 1.]])
-    """
-    num_classes = get_num_classes(pred, target, num_classes)
-
-    unique_labels = (target.view(-1) * num_classes + pred.view(-1)).to(torch.int)
-
-    bins = torch.bincount(unique_labels, minlength=num_classes ** 2)
-    cm = bins.reshape(num_classes, num_classes).squeeze().float()
-
-    if normalize:
-        cm = _confmat_normalize(cm)
-
-    return cm
-
-
 def precision_recall(
         pred: torch.Tensor,
         target: torch.Tensor,

diff --git a/pytorch_lightning/metrics/functional/confusion_matrix.py b/pytorch_lightning/metrics/functional/confusion_matrix.py
@@ -0,0 +1,96 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+import torch
+
+from pytorch_lightning.utilities import rank_zero_warn
+from pytorch_lightning.metrics.utils import _input_format_classification
+
+
+def _confusion_matrix_update(preds: torch.Tensor,
+                             target: torch.Tensor,
+                             num_classes: int,
+                             threshold: float = 0.5) -> torch.Tensor:
+    preds, target = _input_format_classification(preds, target, threshold)
+    unique_mapping = (target.view(-1) * num_classes + preds.view(-1)).to(torch.long)
+    bins = torch.bincount(unique_mapping, minlength=num_classes ** 2)
+    confmat = bins.reshape(num_classes, num_classes)
+    return confmat
+
+
+def _confusion_matrix_compute(confmat: torch.Tensor,
+                              normalize: Optional[str] = None) -> torch.Tensor:
+    allowed_normalize = ('true', 'pred', 'all', None)
+    assert normalize in allowed_normalize, \
+        f"Argument average needs to one of the following: {allowed_normalize}"
+    confmat = confmat.float()
+    if normalize is not None:
+        if normalize == 'true':
+            cm = confmat / confmat.sum(axis=1, keepdim=True)
+        elif normalize == 'pred':
+            cm = confmat / confmat.sum(axis=0, keepdim=True)
+        elif normalize == 'all':
+            cm = confmat / confmat.sum()
+        nan_elements = cm[torch.isnan(cm)].nelement()
+        if nan_elements != 0:
+            cm[torch.isnan(cm)] = 0
+            rank_zero_warn(f'{nan_elements} nan values found in confusion matrix have been replaced with zeros.')
+        return cm
+    return confmat
+
+
+def confusion_matrix(
+        preds: torch.Tensor,
+        target: torch.Tensor,
+        num_classes: int,
+        normalize: Optional[str] = None,
+        threshold: float = 0.5
+) -> torch.Tensor:
+    """
+    Computes the confusion matrix. Works with binary, multiclass, and multilabel data.
+    Accepts logits from a model output or integer class values in prediction.
+    Works with multi-dimensional preds and target.
+
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
+    This is the case for binary and multi-label logits.
+
+    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+
+    Args:
+        preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or
+            ``(N, C, ...)`` where C is the number of classes, tensor with logits/probabilities
+        target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels
+        num_classes: Number of classes in the dataset.
+        normalize: Normalization mode for confusion matrix. Choose from
+
+            - ``None``: no normalization (default)
+            - ``'true'``: normalization over the targets (most commonly used)
+            - ``'pred'``: normalization over the predictions
+            - ``'all'``: normalization over the whole matrix
+
+        threshold:
+            Threshold value for binary or multi-label logits. default: 0.5
+
+    Example:
+
+        >>> from pytorch_lightning.metrics.functional import confusion_matrix
+        >>> target = torch.tensor([1, 1, 0, 0])
+        >>> preds = torch.tensor([0, 1, 0, 0])
+        >>> confusion_matrix(preds, target, num_classes=2)
+        tensor([[2., 0.],
+                [1., 1.]])
+    """
+    confmat = _confusion_matrix_update(preds, target, num_classes, threshold)
+    return _confusion_matrix_compute(confmat, normalize)
diff --git a/pytorch_lightning/metrics/utils.py b/pytorch_lightning/metrics/utils.py
@@ -67,3 +67,31 @@ def _check_same_shape(pred: torch.Tensor, target: torch.Tensor):
     """ Check that predictions and target have the same shape, else raise error """
     if pred.shape != target.shape:
         raise RuntimeError('Predictions and targets are expected to have the same shape')
+
+
+def _input_format_classification(preds: torch.Tensor, target: torch.Tensor, threshold: float):
+    """ Convert preds and target tensors into label tensors
+
+    Args:
+        preds: either tensor with labels, tensor with probabilities/logits or
+            multilabel tensor
+        target: tensor with ground true labels
+        threshold: float used for thresholding multilabel input
+
+    Returns:
+        preds: tensor with labels
+        target: tensor with labels
+    """
+    if not (len(preds.shape) == len(target.shape) or len(preds.shape) == len(target.shape) + 1):
+        raise ValueError(
+            "preds and target must have same number of dimensions, or one additional dimension for preds"
+        )
+
+    if len(preds.shape) == len(target.shape) + 1:
+        # multi class probabilites
+        preds = torch.argmax(preds, dim=1)
+
+    if len(preds.shape) == len(target.shape) and preds.dtype == torch.float:
+        # binary or multilabel probablities
+        preds = (preds >= threshold).long()
+    return preds, target