diff --git a/src/otx/algorithms/anomaly/adapters/anomalib/strategies/xpu_single.py b/src/otx/algorithms/anomaly/adapters/anomalib/strategies/xpu_single.py
index 3c00aeb6747..014132a840e 100644
--- a/src/otx/algorithms/anomaly/adapters/anomalib/strategies/xpu_single.py
+++ b/src/otx/algorithms/anomaly/adapters/anomalib/strategies/xpu_single.py
@@ -30,7 +30,6 @@ def __init__(
         checkpoint_io: Optional[CheckpointIO] = None,
         precision_plugin: Optional[PrecisionPlugin] = None,
     ):
-
         if not is_xpu_available():
             raise MisconfigurationException("`SingleXPUStrategy` requires XPU devices to run")
 
diff --git a/src/otx/algorithms/common/adapters/torch/amp/xpu_grad_scaler.py b/src/otx/algorithms/common/adapters/torch/amp/xpu_grad_scaler.py
index f3994050cae..be37f003b78 100644
--- a/src/otx/algorithms/common/adapters/torch/amp/xpu_grad_scaler.py
+++ b/src/otx/algorithms/common/adapters/torch/amp/xpu_grad_scaler.py
@@ -7,7 +7,11 @@
 from typing import List
 
 import torch
-from intel_extension_for_pytorch.cpu.autocast._grad_scaler import _MultiDeviceReplicator
+
+from otx.algorithms.common.utils.utils import is_xpu_available
+
+if is_xpu_available():
+    from intel_extension_for_pytorch.cpu.autocast._grad_scaler import _MultiDeviceReplicator
 from torch.cuda.amp.grad_scaler import GradScaler, _refresh_per_optimizer_state
 
 
@@ -16,6 +20,8 @@ class XPUGradScaler(GradScaler):
 
     def __init__(self, init_scale=2.0**16, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000, enabled=True):
         self._enabled = enabled
+        if not is_xpu_available():
+            raise RuntimeError("XPU GradScaler requires XPU device.")
 
         if self._enabled:
             assert growth_factor > 1.0, "The growth factor must be > 1.0."
diff --git a/tests/unit/algorithms/anomaly/adapters/anomalib/accelerators/xpu.py b/tests/unit/algorithms/anomaly/adapters/anomalib/accelerators/xpu.py
index 4b89d101661..d277fc2c6dc 100644
--- a/tests/unit/algorithms/anomaly/adapters/anomalib/accelerators/xpu.py
+++ b/tests/unit/algorithms/anomaly/adapters/anomalib/accelerators/xpu.py
@@ -10,41 +10,50 @@
 from otx.algorithms.common.utils import is_xpu_available
 
 
-@pytest.mark.skipif(not is_xpu_available(), reason="XPU is not available")
 class TestXPUAccelerator:
     @pytest.fixture
-    def accelerator(self):
-        return XPUAccelerator()
+    def accelerator(self, mocker):
+        mock_torch = mocker.patch("otx.algorithms.anomaly.adapters.anomalib.accelerators.xpu.torch")
+        return XPUAccelerator(), mock_torch
 
     def test_setup_device(self, accelerator):
+        accelerator, mock_torch = accelerator
         device = torch.device("xpu")
         accelerator.setup_device(device)
+        assert mock_torch.xpu.set_device.called
 
     def test_parse_devices(self, accelerator):
+        accelerator, _ = accelerator
         devices = [1, 2, 3]
         parsed_devices = accelerator.parse_devices(devices)
         assert isinstance(parsed_devices, list)
         assert parsed_devices == devices
 
-    def test_get_parallel_devices(self, accelerator):
+    def test_get_parallel_devices(self, accelerator, mocker):
+        accelerator, _ = accelerator
         devices = [1, 2, 3]
         parallel_devices = accelerator.get_parallel_devices(devices)
         assert isinstance(parallel_devices, list)
-        assert parallel_devices == [torch.device("xpu", idx) for idx in devices]
+        assert all([isinstance(device, mocker.MagicMock) for device in parallel_devices])
 
-    def test_auto_device_count(self, accelerator):
+    def test_auto_device_count(self, accelerator, mocker):
+        accelerator, mock_torch = accelerator
         count = accelerator.auto_device_count()
-        assert isinstance(count, int)
+        assert isinstance(count, mocker.MagicMock)
+        assert mock_torch.xpu.device_count.called
 
     def test_is_available(self, accelerator):
+        accelerator, _ = accelerator
         available = accelerator.is_available()
         assert isinstance(available, bool)
         assert available == is_xpu_available()
 
     def test_get_device_stats(self, accelerator):
+        accelerator, _ = accelerator
         device = torch.device("xpu")
         stats = accelerator.get_device_stats(device)
         assert isinstance(stats, dict)
 
     def test_teardown(self, accelerator):
+        accelerator, _ = accelerator
         accelerator.teardown()
diff --git a/tests/unit/algorithms/anomaly/adapters/anomalib/strategies/test_xpu_single.py b/tests/unit/algorithms/anomaly/adapters/anomalib/strategies/test_xpu_single.py
index 97ff1989ed4..f1e0de3204b 100644
--- a/tests/unit/algorithms/anomaly/adapters/anomalib/strategies/test_xpu_single.py
+++ b/tests/unit/algorithms/anomaly/adapters/anomalib/strategies/test_xpu_single.py
@@ -7,22 +7,37 @@
 import torch
 import pytorch_lightning as pl
 from otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single import SingleXPUStrategy
-from otx.algorithms.common.utils.utils import is_xpu_available
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
 
-@pytest.mark.skipif(not is_xpu_available(), reason="XPU is not available")
 class TestSingleXPUStrategy:
-    def test_init(self):
+    def test_init(self, mocker):
+        with pytest.raises(MisconfigurationException):
+            strategy = SingleXPUStrategy(device="xpu:0")
+        mocked_is_xpu_available = mocker.patch(
+            "otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single.is_xpu_available", return_value=True
+        )
         strategy = SingleXPUStrategy(device="xpu:0")
+        assert mocked_is_xpu_available.call_count == 1
         assert strategy._root_device.type == "xpu"
         assert strategy.accelerator is None
 
-    def test_is_distributed(self):
-        strategy = SingleXPUStrategy(device="xpu:0")
+    @pytest.fixture
+    def strategy(self, mocker):
+        mocker.patch(
+            "otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single.is_xpu_available", return_value=True
+        )
+        return SingleXPUStrategy(device="xpu:0")
+
+    def test_is_distributed(self, strategy):
         assert not strategy.is_distributed
 
-    def test_setup_optimizers(self):
-        strategy = SingleXPUStrategy(device="xpu:0")
+    def test_setup_optimizers(self, strategy, mocker):
+        mocker.patch("otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single.torch")
+        mocker.patch(
+            "otx.algorithms.anomaly.adapters.anomalib.strategies.xpu_single.torch.xpu.optimize",
+            return_value=(mocker.MagicMock(), mocker.MagicMock()),
+        )
         trainer = pl.Trainer()
         # Create mock optimizers and models for testing
         model = torch.nn.Linear(10, 2)
diff --git a/tests/unit/algorithms/classification/adapters/mmcls/api/test_train.py b/tests/unit/algorithms/classification/adapters/mmcls/api/test_train.py
index 60aa92eef84..26143b66807 100644
--- a/tests/unit/algorithms/classification/adapters/mmcls/api/test_train.py
+++ b/tests/unit/algorithms/classification/adapters/mmcls/api/test_train.py
@@ -96,7 +96,6 @@ def test_train_model_multiple_datasets_distributed_training(self, mock_modules,
         # Call the function
         train_model(model, [dataset, dataset], mmcv_cfg, distributed=True, validate=True)
 
-    @pytest.mark.skipif(is_xpu_available() or not torch.cuda.is_available(), reason="cuda is not available")
     def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv_cfg, model, dataset):
         # Create mock inputs
         _ = mock_modules
@@ -107,12 +106,15 @@ def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv
         # Call the function
         train_model(model, dataset, mmcv_cfg, timestamp=timestamp, device=device, meta=meta)
 
-    @pytest.mark.skipif(not is_xpu_available(), reason="xpu is not available")
-    def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset):
+    def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset, mocker):
         # Create mock inputs
         _ = mock_modules
         device = "xpu"
         mmcv_cfg.device = "xpu"
-
+        mocker.patch("otx.algorithms.classification.adapters.mmcls.apis.train.torch")
+        mocker.patch(
+            "otx.algorithms.classification.adapters.mmcls.apis.train.torch.xpu.optimize",
+            return_value=(mocker.MagicMock(), mocker.MagicMock()),
+        )
         # Call the function
         train_model(model, dataset, mmcv_cfg, device=device)
diff --git a/tests/unit/algorithms/common/adapters/mmcv/hooks/test_xpu_optimizer_hook.py b/tests/unit/algorithms/common/adapters/mmcv/hooks/test_xpu_optimizer_hook.py
index dd13f4c6e81..0151ccff104 100644
--- a/tests/unit/algorithms/common/adapters/mmcv/hooks/test_xpu_optimizer_hook.py
+++ b/tests/unit/algorithms/common/adapters/mmcv/hooks/test_xpu_optimizer_hook.py
@@ -3,18 +3,16 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import pytest
-from otx.algorithms.common.utils.utils import is_xpu_available
 
-
-@pytest.mark.skipif(not is_xpu_available(), reason="XPU is not available")
-def test_init():
+def test_init(mocker):
     from otx.algorithms.common.adapters.mmcv.hooks.xpu_optimizer_hook import BFp16XPUOptimizerHook
-    from otx.algorithms.common.adapters.torch.amp import XPUGradScaler
 
+    mocker.patch(
+        "otx.algorithms.common.adapters.mmcv.hooks.xpu_optimizer_hook.XPUGradScaler", return_value=mocker.MagicMock()
+    )
     hook = BFp16XPUOptimizerHook(grad_clip=None, coalesce=True, bucket_size_mb=-1, loss_scale=512.0, distributed=True)
     assert hook.coalesce is True  # Check coalesce is True
     assert hook.bucket_size_mb == -1  # Check bucket size is -1
     assert hook._scale_update_param is 512.0  # Check scale update param is 512.0
     assert hook.distributed is True  # Check distributed is True
-    assert isinstance(hook.loss_scaler, XPUGradScaler)
+    assert isinstance(hook.loss_scaler, mocker.MagicMock)
diff --git a/tests/unit/algorithms/common/adapters/torch/amp/test_xpu_grad_scaler.py b/tests/unit/algorithms/common/adapters/torch/amp/test_xpu_grad_scaler.py
index 107939f50eb..465834b9a37 100644
--- a/tests/unit/algorithms/common/adapters/torch/amp/test_xpu_grad_scaler.py
+++ b/tests/unit/algorithms/common/adapters/torch/amp/test_xpu_grad_scaler.py
@@ -6,16 +6,14 @@
 
 import pytest
 import torch
-from otx.algorithms.common.utils import is_xpu_available
 
-if is_xpu_available():
-    from otx.algorithms.common.adapters.torch.amp.xpu_grad_scaler import XPUGradScaler
+from otx.algorithms.common.adapters.torch.amp.xpu_grad_scaler import XPUGradScaler
 
 
-@pytest.mark.skipif(not is_xpu_available(), reason="XPU is not available")
 class TestXPUGradScaler:
     @pytest.fixture
-    def grad_scaler(self):
+    def grad_scaler(self, mocker):
+        mocker.patch("otx.algorithms.common.adapters.torch.amp.xpu_grad_scaler.is_xpu_available", return_value=True)
         return XPUGradScaler()
 
     @pytest.fixture
@@ -31,15 +29,11 @@ def test_init(self, grad_scaler):
         assert grad_scaler._backoff_factor == 0.5
         assert grad_scaler._growth_interval == 2000
 
-    def test_scale(self, grad_scaler):
-        outputs = torch.tensor([1.0, 2.0, 3.0], device="xpu:0")
+    def test_scale(self, grad_scaler, mocker):
+        outputs = mocker.MagicMock(torch.Tensor)
+        outputs.device.type = "xpu"
+        outputs.device.index = 0
+        grad_scaler._lazy_init_scale_growth_tracker = mocker.MagicMock()
+        grad_scaler._scale = mocker.MagicMock()
         scaled_outputs = grad_scaler.scale(outputs)
-        assert scaled_outputs.device.type == "xpu"
-        assert torch.equal(scaled_outputs, outputs * grad_scaler._scale)
-
-    def test_unscale_grads(self, grad_scaler, optimizer):
-        inv_scale = 1.0
-        found_inf = False
-        output = grad_scaler._unscale_grads_(optimizer, inv_scale, found_inf, allow_bf16=False)
-        assert isinstance(output, dict)
-        assert not output
+        assert isinstance(scaled_outputs.device.type, mocker.MagicMock)
diff --git a/tests/unit/algorithms/detection/adapters/mmdet/api/test_train.py b/tests/unit/algorithms/detection/adapters/mmdet/api/test_train.py
index e3a95e2c52a..1048cabbe2c 100644
--- a/tests/unit/algorithms/detection/adapters/mmdet/api/test_train.py
+++ b/tests/unit/algorithms/detection/adapters/mmdet/api/test_train.py
@@ -88,20 +88,24 @@ def test_train_model_multiple_datasets_distributed_training(self, mock_modules,
         # Call the function
         train_detector(model, [dataset, dataset], mmcv_cfg, distributed=True, validate=True)
 
-    @pytest.mark.skipif(is_xpu_available() or not torch.cuda.is_available(), reason="cuda is not available")
-    def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv_cfg, model, dataset):
+    def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv_cfg, model, dataset, mocker):
         # Create mock inputs
         _ = mock_modules
         timestamp = "2024-01-01"
         mmcv_cfg.device = "cuda"
         meta = {"info": "some_info"}
+
         # Call the function
         train_detector(model, dataset, mmcv_cfg, timestamp=timestamp, meta=meta)
 
-    @pytest.mark.skipif(not is_xpu_available(), reason="xpu is not available")
-    def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset):
+    def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset, mocker):
         # Create mock inputs
         _ = mock_modules
         mmcv_cfg.device = "xpu"
+        mocker.patch("otx.algorithms.detection.adapters.mmdet.apis.train.torch")
+        mocker.patch(
+            "otx.algorithms.detection.adapters.mmdet.apis.train.torch.xpu.optimize",
+            return_value=(mocker.MagicMock(), mocker.MagicMock()),
+        )
         # Call the function
         train_detector(model, dataset, mmcv_cfg)
diff --git a/tests/unit/algorithms/segmentation/adapters/mmseg/api/test_train.py b/tests/unit/algorithms/segmentation/adapters/mmseg/api/test_train.py
index 99a93fac1f3..8f70756764b 100644
--- a/tests/unit/algorithms/segmentation/adapters/mmseg/api/test_train.py
+++ b/tests/unit/algorithms/segmentation/adapters/mmseg/api/test_train.py
@@ -94,7 +94,6 @@ def test_train_model_multiple_datasets_distributed_training(self, mock_modules,
         # Call the function
         train_segmentor(model, [dataset, dataset], mmcv_cfg, distributed=True, validate=True)
 
-    @pytest.mark.skipif(is_xpu_available() or not torch.cuda.is_available(), reason="cuda is not available")
     def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv_cfg, model, dataset):
         # Create mock inputs
         _ = mock_modules
@@ -104,11 +103,14 @@ def test_train_model_specific_timestamp_and_cuda_device(self, mock_modules, mmcv
         # Call the function
         train_segmentor(model, dataset, mmcv_cfg, timestamp=timestamp, meta=meta)
 
-    @pytest.mark.skipif(not is_xpu_available(), reason="xpu is not available")
-    def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset):
+    def test_train_model_xpu_device(self, mock_modules, mmcv_cfg, model, dataset, mocker):
         # Create mock inputs
         _ = mock_modules
         mmcv_cfg.device = "xpu"
-
+        mocker.patch("otx.algorithms.segmentation.adapters.mmseg.apis.train.torch")
+        mocker.patch(
+            "otx.algorithms.segmentation.adapters.mmseg.apis.train.torch.xpu.optimize",
+            return_value=(mocker.MagicMock(), mocker.MagicMock()),
+        )
         # Call the function
         train_segmentor(model, dataset, mmcv_cfg)
diff --git a/tools/experiment.py b/tools/experiment.py
index 6a79aae7537..53dc0faeb37 100644
--- a/tools/experiment.py
+++ b/tools/experiment.py
@@ -22,10 +22,11 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import yaml
-from otx.cli.tools.cli import main as otx_cli
 from rich.console import Console
 from rich.table import Table
 
+from otx.cli.tools.cli import main as otx_cli
+
 rich_console = Console()