pytorch · sdesrozis · Mar 8, 2022 · Feb 24, 2022 · Feb 24, 2022 · Feb 24, 2022
diff --git a/ignite/contrib/metrics/precision_recall_curve.py b/ignite/contrib/metrics/precision_recall_curve.py
@@ -1,7 +1,9 @@
-from typing import Any, Callable, Tuple
+from typing import Any, Callable, cast, Tuple
 
 import torch
 
+import ignite.distributed as idist
+from ignite.exceptions import NotComputableError
 from ignite.metrics import EpochMetric
 
 
@@ -73,3 +75,32 @@ def __init__(self, output_transform: Callable = lambda x: x, check_compute_fn: b
         super(PrecisionRecallCurve, self).__init__(
             precision_recall_curve_compute_fn, output_transform=output_transform, check_compute_fn=check_compute_fn
         )
+
+    def compute(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        if len(self._predictions) < 1 or len(self._targets) < 1:
+            raise NotComputableError("EpochMetric must have at least one example before it can be computed.")
+
+        _prediction_tensor = torch.cat(self._predictions, dim=0)
+        _target_tensor = torch.cat(self._targets, dim=0)
+
+        ws = idist.get_world_size()
+        if ws > 1 and not self._is_reduced:
+            # All gather across all processes
+            _prediction_tensor = cast(torch.Tensor, idist.all_gather(_prediction_tensor))
+            _target_tensor = cast(torch.Tensor, idist.all_gather(_target_tensor))
+        self._is_reduced = True
+
+        precision = torch.zeros(len(self._predictions))
+        recall = torch.zeros(len(self._predictions))
+        thresholds = torch.zeros(len(self._predictions) - 1)
+        if idist.get_rank() == 0:
+            # Run compute_fn on zero rank only
+            precision, recall, thresholds = self.compute_fn(_prediction_tensor, _target_tensor)
+
+        if ws > 1:
+            # broadcast result to all processes
+            precision = cast(torch.Tensor, idist.broadcast(precision, src=0))
+            recall = cast(torch.Tensor, idist.broadcast(recall, src=0))
+            thresholds = cast(torch.Tensor, idist.broadcast(thresholds, src=0))
+
+        return precision, recall, thresholds
diff --git a/ignite/metrics/epoch_metric.py b/ignite/metrics/epoch_metric.py
@@ -1,5 +1,5 @@
 import warnings
-from typing import Callable, cast, List, Tuple, Union
+from typing import Any, Callable, cast, List, Tuple, Union
 
 import torch
 
@@ -136,7 +136,7 @@ def update(self, output: Tuple[torch.Tensor, torch.Tensor]) -> None:
             except Exception as e:
                 warnings.warn(f"Probably, there can be a problem with `compute_fn`:\n {e}.", EpochMetricWarning)
 
-    def compute(self) -> float:
+    def compute(self) -> Any:
         if len(self._predictions) < 1 or len(self._targets) < 1:
             raise NotComputableError("EpochMetric must have at least one example before it can be computed.")
 

diff --git a/tests/ignite/contrib/metrics/test_precision_recall_curve.py b/tests/ignite/contrib/metrics/test_precision_recall_curve.py
@@ -1,3 +1,5 @@
+import os
+from typing import Tuple
 from unittest.mock import patch
 
 import numpy as np
@@ -6,6 +8,7 @@
 import torch
 from sklearn.metrics import precision_recall_curve
 
+import ignite.distributed as idist
 from ignite.contrib.metrics.precision_recall_curve import PrecisionRecallCurve
 from ignite.engine import Engine
 from ignite.metrics.epoch_metric import EpochMetricWarning
@@ -124,3 +127,161 @@ def test_check_compute_fn():
 
     em = PrecisionRecallCurve(check_compute_fn=False)
     em.update(output)
+
+
+def _test_distrib_compute(device):
+
+    rank = idist.get_rank()
+    torch.manual_seed(12)
+
+    def _test(y_pred, y, batch_size, metric_device):
+
+        metric_device = torch.device(metric_device)
+        prc = PrecisionRecallCurve(device=metric_device)
+
+        torch.manual_seed(10 + rank)
+
+        prc.reset()
+        if batch_size > 1:
+            n_iters = y.shape[0] // batch_size + 1
+            for i in range(n_iters):
+                idx = i * batch_size
+                prc.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
+        else:
+            prc.update((y_pred, y))
+
+        # gather y_pred, y
+        y_pred = idist.all_gather(y_pred)
+        y = idist.all_gather(y)
+
+        np_y = y.cpu().numpy()
+        np_y_pred = y_pred.cpu().numpy()
+
+        res = prc.compute()
+        assert isinstance(res, Tuple)
+        assert PrecisionRecallCurve(np_y, np_y_pred) == pytest.approx(res)
+        for _ in range(3):
+            _test("cpu")
+            if device.type != "xla":
+                _test(idist.device())
+
+
+def _test_distrib_integration(device):
+
+    rank = idist.get_rank()
 def _test_distrib_integration(device): 
 def _test(n_epochs, metric_device): 
 def _test(n_epochs, metric_device): 
 def _test_distrib_integration(device): 
 def _test(n_epochs, metric_device): 
 def _test(n_epochs, metric_device): 
+    torch.manual_seed(12)
+
+    def _test(n_epochs, metric_device):
+        metric_device = torch.device(metric_device)
+        n_iters = 80
+        size = 151
+        y_true = torch.rand(size=(size,)).to(device)
+        y_preds = torch.rand(size=(size,)).to(device)
+
+        def update(engine, i):
+            return (
+                y_preds[i * size : (i + 1) * size],
+                y_true[i * size : (i + 1) * size],
+            )
+
+        engine = Engine(update)
+
+        prc = PrecisionRecallCurve(device=metric_device)
+        prc.attach(engine, "prc")
+
+        data = list(range(n_iters))
+        engine.run(data=data, max_epochs=n_epochs)
+
+        assert "prc" in engine.state.metrics
+
+        precision, recall, thresholds = engine.state.metrics["prc"]
+
+        np_y_true = y_true.cpu().numpy().ravel()
+        np_y_preds = y_preds.cpu().numpy().ravel()
+
+        sk_precision, sk_recall, sk_thresholds = precision_recall_curve(np_y_true, np_y_preds)
+        assert pytest.approx(precision) == sk_precision
+        assert pytest.approx(recall) == sk_recall
+        assert pytest.approx(thresholds) == sk_thresholds
+
+    metric_devices = ["cpu"]
+    if device.type != "xla":
+        metric_devices.append(idist.device())
+    for metric_device in metric_devices:
+        for _ in range(2):
+            _test(n_epochs=1, metric_device=metric_device)
+            _test(n_epochs=2, metric_device=metric_device)
+
+
+@pytest.mark.distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
+    _test_distrib_compute(device)
+    _test_distrib_integration(device)
+
+
+@pytest.mark.distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
+    _test_distrib_compute(device)
+    _test_distrib_integration(device)
+
+
+@pytest.mark.distributed
+@pytest.mark.skipif(not idist.has_hvd_support, reason="Skip if no Horovod dist support")
+@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
+def test_distrib_hvd(gloo_hvd_executor):
+
+    device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
+    nproc = 4 if not torch.cuda.is_available() else torch.cuda.device_count()
+
+    gloo_hvd_executor(_test_distrib_compute, (device,), np=nproc, do_init=True)
+    gloo_hvd_executor(_test_distrib_integration, (device,), np=nproc, do_init=True)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
+    _test_distrib_compute(device)
+    _test_distrib_integration(device)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
+    _test_distrib_compute(device)
+    _test_distrib_integration(device)
+
+
+@pytest.mark.tpu
+@pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
+@pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
+def test_distrib_single_device_xla():
+    device = idist.device()
+    _test_distrib_compute(device)
+    _test_distrib_integration(device)
+
+
+def _test_distrib_xla_nprocs(index):
+    device = idist.device()
+    _test_distrib_compute(device)
+    _test_distrib_integration(device)
+
+
+@pytest.mark.tpu
+@pytest.mark.skipif("NUM_TPU_WORKERS" not in os.environ, reason="Skip if no NUM_TPU_WORKERS in env vars")
+@pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
+def test_distrib_xla_nprocs(xmp_executor):
+    n = int(os.environ["NUM_TPU_WORKERS"])
+    xmp_executor(_test_distrib_xla_nprocs, args=(), nprocs=n)