diff --git a/src/otx/algorithms/anomaly/adapters/anomalib/strategies/xpu_single.py b/src/otx/algorithms/anomaly/adapters/anomalib/strategies/xpu_single.py index 3c00aeb6747..014132a840e 100644 --- a/src/otx/algorithms/anomaly/adapters/anomalib/strategies/xpu_single.py +++ b/src/otx/algorithms/anomaly/adapters/anomalib/strategies/xpu_single.py @@ -30,7 +30,6 @@ def __init__( checkpoint_io: Optional[CheckpointIO] = None, precision_plugin: Optional[PrecisionPlugin] = None, ): - if not is_xpu_available(): raise MisconfigurationException("`SingleXPUStrategy` requires XPU devices to run") diff --git a/src/otx/algorithms/common/adapters/torch/amp/xpu_grad_scaler.py b/src/otx/algorithms/common/adapters/torch/amp/xpu_grad_scaler.py index f3994050cae..be37f003b78 100644 --- a/src/otx/algorithms/common/adapters/torch/amp/xpu_grad_scaler.py +++ b/src/otx/algorithms/common/adapters/torch/amp/xpu_grad_scaler.py @@ -7,7 +7,11 @@ from typing import List import torch -from intel_extension_for_pytorch.cpu.autocast._grad_scaler import _MultiDeviceReplicator + +from otx.algorithms.common.utils.utils import is_xpu_available + +if is_xpu_available(): + from intel_extension_for_pytorch.cpu.autocast._grad_scaler import _MultiDeviceReplicator from torch.cuda.amp.grad_scaler import GradScaler, _refresh_per_optimizer_state @@ -16,6 +20,8 @@ class XPUGradScaler(GradScaler): def __init__(self, init_scale=2.0**16, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000, enabled=True): self._enabled = enabled + if not is_xpu_available(): + raise RuntimeError("XPU GradScaler requires XPU device.") if self._enabled: assert growth_factor > 1.0, "The growth factor must be > 1.0." diff --git a/tests/unit/algorithms/anomaly/adapters/anomalib/accelerators/xpu.py b/tests/unit/algorithms/anomaly/adapters/anomalib/accelerators/xpu.py index 4b89d101661..d277fc2c6dc 100644 --- a/tests/unit/algorithms/anomaly/adapters/anomalib/accelerators/xpu.py +++ b/tests/unit/algorithms/anomaly/adapters/anomalib/accelerators/xpu.py @@ -10,41 +10,50 @@ from otx.algorithms.common.utils import is_xpu_available -@pytest.mark.skipif(not is_xpu_available(), reason="XPU is not available") class TestXPUAccelerator: @pytest.fixture - def accelerator(self): - return XPUAccelerator() + def accelerator(self, mocker): + mock_torch = mocker.patch("otx.algorithms.anomaly.adapters.anomalib.accelerators.xpu.torch") + return XPUAccelerator(), mock_torch def test_setup_device(self, accelerator): + accelerator, mock_torch = accelerator device = torch.device("xpu") accelerator.setup_device(device) + assert mock_torch.xpu.set_device.called def test_parse_devices(self, accelerator): + accelerator, _ = accelerator devices = [1, 2, 3] parsed_devices = accelerator.parse_devices(devices) assert isinstance(parsed_devices, list) assert parsed_devices == devices - def test_get_parallel_devices(self, accelerator): + def test_get_parallel_devices(self, accelerator, mocker): + accelerator, _ = accelerator devices = [1, 2, 3] parallel_devices = accelerator.get_parallel_devices(devices) assert isinstance(parallel_devices, list) - assert parallel_devices == [torch.device("xpu", idx) for idx in devices] + assert all([isinstance(device, mocker.MagicMock) for device in parallel_devices]) - def test_auto_device_count(self, accelerator): + def test_auto_device_count(self, accelerator, mocker): + accelerator, mock_torch = accelerator count = accelerator.auto_device_count() - assert isinstance(count, int) + assert isinstance(count, mocker.MagicMock) + assert mock_torch.xpu.device_count.called def test_is_available(self, accelerator): + accelerator, _ = accelerator available = accelerator.is_available() assert isinstance(available, bool) assert available == is_xpu_available() def test_get_device_stats(self, accelerator): + accelerator, _ = accelerator device = torch.device("xpu") stats = accelerator.get_device_stats(device) assert isinstance(stats, dict) def test_teardown(self, accelerator): + accelerator, _ = accelerator accelerator.teardown() diff --git a/tests/unit/algorithms/anomaly/adapters/anomalib/strategies/test_xpu_single.py b/tests/unit/algorithms/anomaly/adapters/anomalib/strategies/test_xpu_single.py index 97ff1989ed4..f1e0de3204b 100644 --- a/tests/unit/algorithms/anomaly/adapters/anomalib/strategies/test_xpu_single.py +++ b/tests/unit/algorithms/anomaly/adapters/anomalib/strategies/test_xpu_single.py @@ -7,22 +7,37 @@ import torch import pytorch_lightning as pl from otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single import SingleXPUStrategy -from otx.algorithms.common.utils.utils import is_xpu_available +from pytorch_lightning.utilities.exceptions import MisconfigurationException -@pytest.mark.skipif(not is_xpu_available(), reason="XPU is not available") class TestSingleXPUStrategy: - def test_init(self): + def test_init(self, mocker): + with pytest.raises(MisconfigurationException): + strategy = SingleXPUStrategy(device="xpu:0") + mocked_is_xpu_available = mocker.patch( + "otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single.is_xpu_available", return_value=True + ) strategy = SingleXPUStrategy(device="xpu:0") + assert mocked_is_xpu_available.call_count == 1 assert strategy._root_device.type == "xpu" assert strategy.accelerator is None - def test_is_distributed(self): - strategy = SingleXPUStrategy(device="xpu:0") + @pytest.fixture + def strategy(self, mocker): + mocker.patch( + "otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single.is_xpu_available", return_value=True + ) + return SingleXPUStrategy(device="xpu:0") + + def test_is_distributed(self, strategy): assert not strategy.is_distributed - def test_setup_optimizers(self): - strategy = SingleXPUStrategy(device="xpu:0") + def test_setup_optimizers(self, strategy, mocker): + mocker.patch("otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single.torch") + mocker.patch( + "otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single.torch.xpu.optimize", + return_value=(mocker.MagicMock(), mocker.MagicMock()), + ) trainer = pl.Trainer() # Create mock optimizers and models for testing model = torch.nn.Linear(10, 2) diff --git a/tests/unit/algorithms/classification/adapters/mmcls/api/test_train.py b/tests/unit/algorithms/classification/adapters/mmcls/api/test_train.py index 60aa92eef84..26143b66807 100644 --- a/tests/unit/algorithms/classification/adapters/mmcls/api/test_train.py +++ b/tests/unit/algorithms/classification/adapters/mmcls/api/test_train.py @@ -96,7 +96,6 @@ def test_train_model_multiple_datasets_distributed_training(self, mock_modules, # Call the function train_model(model, [dataset, dataset], mmcv_cfg, distributed=True, validate=True) - @pytest.mark.skipif(is_xpu_available() or not torch.cuda.is_available(), reason="cuda is not available") def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv_cfg, model, dataset): # Create mock inputs _ = mock_modules @@ -107,12 +106,15 @@ def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv # Call the function train_model(model, dataset, mmcv_cfg, timestamp=timestamp, device=device, meta=meta) - @pytest.mark.skipif(not is_xpu_available(), reason="xpu is not available") - def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset): + def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset, mocker): # Create mock inputs _ = mock_modules device = "xpu" mmcv_cfg.device = "xpu" - + mocker.patch("otx.algorithms.classification.adapters.mmcls.apis.train.torch") + mocker.patch( + "otx.algorithms.classification.adapters.mmcls.apis.train.torch.xpu.optimize", + return_value=(mocker.MagicMock(), mocker.MagicMock()), + ) # Call the function train_model(model, dataset, mmcv_cfg, device=device) diff --git a/tests/unit/algorithms/common/adapters/mmcv/hooks/test_xpu_optimizer_hook.py b/tests/unit/algorithms/common/adapters/mmcv/hooks/test_xpu_optimizer_hook.py index dd13f4c6e81..0151ccff104 100644 --- a/tests/unit/algorithms/common/adapters/mmcv/hooks/test_xpu_optimizer_hook.py +++ b/tests/unit/algorithms/common/adapters/mmcv/hooks/test_xpu_optimizer_hook.py @@ -3,18 +3,16 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import pytest -from otx.algorithms.common.utils.utils import is_xpu_available - -@pytest.mark.skipif(not is_xpu_available(), reason="XPU is not available") -def test_init(): +def test_init(mocker): from otx.algorithms.common.adapters.mmcv.hooks.xpu_optimizer_hook import BFp16XPUOptimizerHook - from otx.algorithms.common.adapters.torch.amp import XPUGradScaler + mocker.patch( + "otx.algorithms.common.adapters.mmcv.hooks.xpu_optimizer_hook.XPUGradScaler", return_value=mocker.MagicMock() + ) hook = BFp16XPUOptimizerHook(grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512.0, distributed=True) assert hook.coalesce is True # Check coalesce is True assert hook.bucket_size_mb == -1 # Check bucket size is -1 assert hook._scale_update_param is 512.0 # Check scale update param is 512.0 assert hook.distributed is True # Check distributed is True - assert isinstance(hook.loss_scaler, XPUGradScaler) + assert isinstance(hook.loss_scaler, mocker.MagicMock) diff --git a/tests/unit/algorithms/common/adapters/torch/amp/test_xpu_grad_scaler.py b/tests/unit/algorithms/common/adapters/torch/amp/test_xpu_grad_scaler.py index 107939f50eb..465834b9a37 100644 --- a/tests/unit/algorithms/common/adapters/torch/amp/test_xpu_grad_scaler.py +++ b/tests/unit/algorithms/common/adapters/torch/amp/test_xpu_grad_scaler.py @@ -6,16 +6,14 @@ import pytest import torch -from otx.algorithms.common.utils import is_xpu_available -if is_xpu_available(): - from otx.algorithms.common.adapters.torch.amp.xpu_grad_scaler import XPUGradScaler +from otx.algorithms.common.adapters.torch.amp.xpu_grad_scaler import XPUGradScaler -@pytest.mark.skipif(not is_xpu_available(), reason="XPU is not available") class TestXPUGradScaler: @pytest.fixture - def grad_scaler(self): + def grad_scaler(self, mocker): + mocker.patch("otx.algorithms.common.adapters.torch.amp.xpu_grad_scaler.is_xpu_available", return_value=True) return XPUGradScaler() @pytest.fixture @@ -31,15 +29,11 @@ def test_init(self, grad_scaler): assert grad_scaler._backoff_factor == 0.5 assert grad_scaler._growth_interval == 2000 - def test_scale(self, grad_scaler): - outputs = torch.tensor([1.0, 2.0, 3.0], device="xpu:0") + def test_scale(self, grad_scaler, mocker): + outputs = mocker.MagicMock(torch.Tensor) + outputs.device.type = "xpu" + outputs.device.index = 0 + grad_scaler._lazy_init_scale_growth_tracker = mocker.MagicMock() + grad_scaler._scale = mocker.MagicMock() scaled_outputs = grad_scaler.scale(outputs) - assert scaled_outputs.device.type == "xpu" - assert torch.equal(scaled_outputs, outputs * grad_scaler._scale) - - def test_unscale_grads(self, grad_scaler, optimizer): - inv_scale = 1.0 - found_inf = False - output = grad_scaler._unscale_grads_(optimizer, inv_scale, found_inf, allow_bf16=False) - assert isinstance(output, dict) - assert not output + assert isinstance(scaled_outputs.device.type, mocker.MagicMock) diff --git a/tests/unit/algorithms/detection/adapters/mmdet/api/test_train.py b/tests/unit/algorithms/detection/adapters/mmdet/api/test_train.py index e3a95e2c52a..1048cabbe2c 100644 --- a/tests/unit/algorithms/detection/adapters/mmdet/api/test_train.py +++ b/tests/unit/algorithms/detection/adapters/mmdet/api/test_train.py @@ -88,20 +88,24 @@ def test_train_model_multiple_datasets_distributed_training(self, mock_modules, # Call the function train_detector(model, [dataset, dataset], mmcv_cfg, distributed=True, validate=True) - @pytest.mark.skipif(is_xpu_available() or not torch.cuda.is_available(), reason="cuda is not available") - def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv_cfg, model, dataset): + def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv_cfg, model, dataset, mocker): # Create mock inputs _ = mock_modules timestamp = "2024-01-01" mmcv_cfg.device = "cuda" meta = {"info": "some_info"} + # Call the function train_detector(model, dataset, mmcv_cfg, timestamp=timestamp, meta=meta) - @pytest.mark.skipif(not is_xpu_available(), reason="xpu is not available") - def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset): + def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset, mocker): # Create mock inputs _ = mock_modules mmcv_cfg.device = "xpu" + mocker.patch("otx.algorithms.detection.adapters.mmdet.apis.train.torch") + mocker.patch( + "otx.algorithms.detection.adapters.mmdet.apis.train.torch.xpu.optimize", + return_value=(mocker.MagicMock(), mocker.MagicMock()), + ) # Call the function train_detector(model, dataset, mmcv_cfg) diff --git a/tests/unit/algorithms/segmentation/adapters/mmseg/api/test_train.py b/tests/unit/algorithms/segmentation/adapters/mmseg/api/test_train.py index 99a93fac1f3..8f70756764b 100644 --- a/tests/unit/algorithms/segmentation/adapters/mmseg/api/test_train.py +++ b/tests/unit/algorithms/segmentation/adapters/mmseg/api/test_train.py @@ -94,7 +94,6 @@ def test_train_model_multiple_datasets_distributed_training(self, mock_modules, # Call the function train_segmentor(model, [dataset, dataset], mmcv_cfg, distributed=True, validate=True) - @pytest.mark.skipif(is_xpu_available() or not torch.cuda.is_available(), reason="cuda is not available") def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv_cfg, model, dataset): # Create mock inputs _ = mock_modules @@ -104,11 +103,14 @@ def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv # Call the function train_segmentor(model, dataset, mmcv_cfg, timestamp=timestamp, meta=meta) - @pytest.mark.skipif(not is_xpu_available(), reason="xpu is not available") - def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset): + def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset, mocker): # Create mock inputs _ = mock_modules mmcv_cfg.device = "xpu" - + mocker.patch("otx.algorithms.segmentation.adapters.mmseg.apis.train.torch") + mocker.patch( + "otx.algorithms.segmentation.adapters.mmseg.apis.train.torch.xpu.optimize", + return_value=(mocker.MagicMock(), mocker.MagicMock()), + ) # Call the function train_segmentor(model, dataset, mmcv_cfg) diff --git a/tools/experiment.py b/tools/experiment.py index 6a79aae7537..53dc0faeb37 100644 --- a/tools/experiment.py +++ b/tools/experiment.py @@ -22,10 +22,11 @@ from typing import Any, Dict, List, Optional, Tuple, Union import yaml -from otx.cli.tools.cli import main as otx_cli from rich.console import Console from rich.table import Table +from otx.cli.tools.cli import main as otx_cli + rich_console = Console()