pyg-team · DamianSzwichtenberg · Jul 17, 2023 · Jun 14, 2023 · Jun 19, 2023 · Jul 7, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Added possibility to run inference benchmarks on XPU device ([#7705](https://github.com/pyg-team/pytorch_geometric/pull/7705))
 - Added `HeteroData` support in `to_networkx` ([#7713](https://github.com/pyg-team/pytorch_geometric/pull/7713))
 - Added `FlopsCount` support via `fvcore` ([#7693](https://github.com/pyg-team/pytorch_geometric/pull/7693))
 - Added back support for PyTorch >= 1.11.0 ([#7656](https://github.com/pyg-team/pytorch_geometric/pull/7656))

@@ -16,7 +16,12 @@
 )
 from torch_geometric.loader import NeighborLoader
 from torch_geometric.nn import PNAConv
-from torch_geometric.profile import rename_profile_file, timeit, torch_profile
+from torch_geometric.profile import (
+    rename_profile_file,
+    timeit,
+    torch_profile,
+    xpu_profile,
+)
 
 supported_sets = {
     'ogbn-mag': ['rgat', 'rgcn'],
@@ -42,11 +47,23 @@ def run(args: argparse.ArgumentParser):
         warnings.warn("Cannot write profile data to CSV because profiling is "
                       "disabled")
 
-    # cuda device is not suitable for full batch mode
-    device = torch.device(
-        'cuda' if not args.full_batch and torch.cuda.is_available() else 'cpu')
+    if args.device == 'xpu':
+        try:
+            import intel_extension_for_pytorch as ipex
+        except ImportError:
+            raise RuntimeError('XPU device requires IPEX to be installed')
+
+    if ((args.device == 'cuda' and not torch.cuda.is_available())
+            or (args.device == 'xpu' and not torch.xpu.is_available())):
+        raise RuntimeError(f'{args.device.upper()} is not available')
+
+    if args.device == 'cuda' and args.full_batch:
+        raise RuntimeError('CUDA device is not suitable for full batch mode')
+
+    device = torch.device(args.device)
 
     print('BENCHMARK STARTS')
+    print(f'Running on {args.device.upper()}')
     for dataset_name in args.datasets:
         assert dataset_name in supported_sets.keys(
         ), f"Dataset {dataset_name} isn't supported."
@@ -66,11 +83,17 @@ def run(args: argparse.ArgumentParser):
         if args.num_layers != [1] and not hetero and args.num_steps != -1:
             raise ValueError("Layer-wise inference requires `steps=-1`")
 
-        if torch.cuda.is_available():
+        if args.device == 'cuda':
             amp = torch.cuda.amp.autocast(enabled=False)
+        elif args.device == 'xpu':
+            amp = torch.xpu.amp.autocast(enabled=False)
         else:
             amp = torch.cpu.amp.autocast(enabled=args.bf16)
 
+        if args.device == 'xpu' and args.warmup < 1:
+            print('XPU device requires warmup - setting warmup=1')
+            args.warmup = 1
+
         inputs_channels = data[
             'paper'].num_features if dataset_name == 'ogbn-mag' \
             else dataset.num_features
@@ -163,16 +186,22 @@ def run(args: argparse.ArgumentParser):
                             state_dict = torch.load(args.ckpt_path)
                             model.load_state_dict(state_dict)
                         model.eval()
+                        if args.device == 'xpu':
+                            model = ipex.optimize(model)
 
                         # Define context manager parameters:
                         if args.cpu_affinity and with_loader:
                             cpu_affinity = subgraph_loader.enable_cpu_affinity(
                                 args.loader_cores)
                         else:
                             cpu_affinity = nullcontext()
-                        profile = torch_profile(
-                            args.export_chrome_trace, csv_data,
-                            args.write_csv) if args.profile else nullcontext()
+                        if args.profile and args.device == 'xpu':
+                            profile = xpu_profile(args.export_chrome_trace)
+                        elif args.profile:
+                            profile = torch_profile(args.export_chrome_trace,
+                                                    csv_data, args.write_csv)
+                        else:
+                            profile = nullcontext()
                         itt = emit_itt(
                         ) if args.vtune_profile else nullcontext()
 
@@ -256,6 +285,8 @@ def run(args: argparse.ArgumentParser):
     argparser = argparse.ArgumentParser('GNN inference benchmark')
     add = argparser.add_argument
 
+    add('--device', choices=['cpu', 'cuda', 'xpu'], default='cpu',
+        help='Device to run benchmark on')
     add('--datasets', nargs='+',
         default=['ogbn-mag', 'ogbn-products', 'Reddit'], type=str)
     add('--use-sparse-tensor', action='store_true',

@@ -1,6 +1,7 @@
 import os.path
 import warnings
 
+import pytest
 import torch
 import torch.nn.functional as F
 
@@ -11,11 +12,12 @@
     rename_profile_file,
     timeit,
 )
-from torch_geometric.profile.profile import torch_profile
+from torch_geometric.profile.profile import torch_profile, xpu_profile
 from torch_geometric.testing import (
     onlyCUDA,
     onlyLinux,
     onlyOnline,
+    onlyXPU,
     withCUDA,
     withPackage,
 )
@@ -105,3 +107,30 @@ def test_torch_profile(capfd, get_dataset, device):
     rename_profile_file('test_profile')
     assert os.path.exists('profile-test_profile.json')
     os.remove('profile-test_profile.json')
+
+
+@onlyXPU
+@onlyOnline
+@pytest.mark.parametrize('export_chrome_trace', [False, True])
+def test_xpu_profile(capfd, get_dataset, export_chrome_trace):
+    dataset = get_dataset(name='Cora')
+    device = torch.device('xpu')
+    data = dataset[0].to(device)
+    model = GraphSAGE(dataset.num_features, hidden_channels=64, num_layers=3,
+                      out_channels=dataset.num_classes).to(device)
+
+    with xpu_profile(export_chrome_trace):
+        model(data.x, data.edge_index)
+
+    out, _ = capfd.readouterr()
+    assert 'Self CPU' in out
+    if data.x.is_xpu:
+        assert 'Self XPU' in out
+
+    f_name = 'timeline.json'
+    f_exists = os.path.exists(f_name)
+    if not export_chrome_trace:
+        assert not f_exists
+    else:
+        assert f_exists
+        os.remove(f_name)
@@ -4,6 +4,7 @@
     print_time_total,
     rename_profile_file,
     torch_profile,
+    xpu_profile,
 )
 from .utils import count_parameters
 from .utils import get_model_size
@@ -21,6 +22,7 @@
     'print_time_total',
     'rename_profile_file',
     'torch_profile',
+    'xpu_profile',
     'count_parameters',
     'get_model_size',
     'get_data_size',

@@ -265,6 +265,15 @@ def torch_profile(export_chrome_trace=True, csv_data=None, write_csv=None):
         save_profile_data(csv_data, events, use_cuda)
 
 
+@contextmanager
+def xpu_profile(export_chrome_trace=True):
+    with torch.autograd.profiler_legacy.profile(use_xpu=True) as profile:
+        yield
+    print(profile.key_averages().table(sort_by='self_xpu_time_total'))
+    if export_chrome_trace:
+        profile.export_chrome_trace('timeline.json')
+
+
 def format_prof_time(time):
     # Profile time is in micro seconds, so format it appropriately:
     return round(time / 1e6, 3)

@@ -4,6 +4,7 @@
     onlyLinux,
     onlyPython,
     onlyCUDA,
+    onlyXPU,
     onlyOnline,
     onlyGraphviz,
     onlyNeighborSampler,
@@ -22,6 +23,7 @@
     'onlyLinux',
     'onlyPython',
     'onlyCUDA',
+    'onlyXPU',
     'onlyOnline',
     'onlyGraphviz',
     'onlyNeighborSampler',

@@ -59,6 +59,20 @@ def onlyCUDA(func: Callable) -> Callable:
     )(func)
 
 
+def onlyXPU(func: Callable) -> Callable:
+    r"""A decorator to skip tests if XPU is not found."""
+    import pytest
+    try:
+        import intel_extension_for_pytorch as ipex
+        xpu_available = ipex.xpu.is_available()
+    except ImportError:
+        xpu_available = False
+    return pytest.mark.skipif(
+        not xpu_available,
+        reason="XPU not available",
+    )(func)
+
+
 def onlyOnline(func: Callable):
     r"""A decorator to skip tests if there exists no connection to the
     internet."""