pyg-team · DamianSzwichtenberg · Jul 17, 2023 · Jun 14, 2023 · Jun 19, 2023 · Jul 7, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Added possibility to run inference benchmarks on XPU device ([#7705](https://github.com/pyg-team/pytorch_geometric/pull/7705))
 - Added `HeteroData` support in `to_networkx` ([#7713](https://github.com/pyg-team/pytorch_geometric/pull/7713))
 - Added `FlopsCount` support via `fvcore` ([#7693](https://github.com/pyg-team/pytorch_geometric/pull/7693))
 - Added back support for PyTorch >= 1.11.0 ([#7656](https://github.com/pyg-team/pytorch_geometric/pull/7656))

@@ -13,6 +13,7 @@
     save_benchmark_data,
     test,
     write_to_csv,
+    xpu_profiler,
 )
 from torch_geometric.loader import NeighborLoader
 from torch_geometric.nn import PNAConv
@@ -42,11 +43,23 @@ def run(args: argparse.ArgumentParser):
         warnings.warn("Cannot write profile data to CSV because profiling is "
                       "disabled")
 
-    # cuda device is not suitable for full batch mode
-    device = torch.device(
-        'cuda' if not args.full_batch and torch.cuda.is_available() else 'cpu')
+    if args.device == 'xpu':
+        try:
+            import intel_extension_for_pytorch as ipex
+        except ImportError:
+            raise RuntimeError('XPU device requires IPEX to be installed')
+
+    if ((args.device == 'cuda' and not torch.cuda.is_available())
+            or (args.device == 'xpu' and not torch.xpu.is_available())):
+        raise RuntimeError(f'{args.device.upper()} is not available')
+
+    if args.device == 'cuda' and args.full_batch:
+        raise RuntimeError('CUDA device is not suitable for full batch mode')
+
+    device = torch.device(args.device)
 
     print('BENCHMARK STARTS')
+    print(f'Running on {args.device.upper()}')
     for dataset_name in args.datasets:
         assert dataset_name in supported_sets.keys(
         ), f"Dataset {dataset_name} isn't supported."
@@ -66,11 +79,17 @@ def run(args: argparse.ArgumentParser):
         if args.num_layers != [1] and not hetero and args.num_steps != -1:
             raise ValueError("Layer-wise inference requires `steps=-1`")
 
-        if torch.cuda.is_available():
+        if args.device == 'cuda':
             amp = torch.cuda.amp.autocast(enabled=False)
+        elif args.device == 'xpu':
+            amp = torch.xpu.amp.autocast(enabled=False)
         else:
             amp = torch.cpu.amp.autocast(enabled=args.bf16)
 
+        if args.device == 'xpu' and args.warmup < 1:
+            print('XPU device requires warmup - setting warmup=1')
+            args.warmup = 1
+
         inputs_channels = data[
             'paper'].num_features if dataset_name == 'ogbn-mag' \
             else dataset.num_features
@@ -163,16 +182,22 @@ def run(args: argparse.ArgumentParser):
                             state_dict = torch.load(args.ckpt_path)
                             model.load_state_dict(state_dict)
                         model.eval()
+                        if args.device == 'xpu':
+                            model = ipex.optimize(model)
 
                         # Define context manager parameters:
                         if args.cpu_affinity and with_loader:
                             cpu_affinity = subgraph_loader.enable_cpu_affinity(
                                 args.loader_cores)
                         else:
                             cpu_affinity = nullcontext()
-                        profile = torch_profile(
-                            args.export_chrome_trace, csv_data,
-                            args.write_csv) if args.profile else nullcontext()
+                        if args.profile and args.device == 'xpu':
+                            profile = xpu_profiler(args.export_chrome_trace)
+                        elif args.profile:
+                            profile = torch_profile(args.export_chrome_trace,
+                                                    csv_data, args.write_csv)
+                        else:
+                            profile = nullcontext()
                         itt = emit_itt(
                         ) if args.vtune_profile else nullcontext()
 
@@ -256,6 +281,8 @@ def run(args: argparse.ArgumentParser):
     argparser = argparse.ArgumentParser('GNN inference benchmark')
     add = argparser.add_argument
 
+    add('--device', choices=['cpu', 'cuda', 'xpu'], default='cuda',
+        help='Device to run benchmark on')
     add('--datasets', nargs='+',
         default=['ogbn-mag', 'ogbn-products', 'Reddit'], type=str)
     add('--use-sparse-tensor', action='store_true',

diff --git a/benchmark/utils/__init__.py b/benchmark/utils/__init__.py
@@ -4,6 +4,7 @@
 from .utils import get_split_masks
 from .utils import save_benchmark_data, write_to_csv
 from .utils import test
+from .utils import xpu_profiler
 
 __all__ = [
     'emit_itt',
@@ -14,4 +15,5 @@
     'save_benchmark_data',
     'write_to_csv',
     'test',
+    'xpu_profiler',
 ]
diff --git a/benchmark/utils/utils.py b/benchmark/utils/utils.py
@@ -1,5 +1,6 @@
 import os
 import os.path as osp
+from contextlib import contextmanager
 from datetime import datetime
 
 import torch
@@ -18,7 +19,6 @@
 try:
     from torch.autograd.profiler import emit_itt
 except ImportError:
-    from contextlib import contextmanager
 
     @contextmanager
     def emit_itt(*args, **kwargs):
@@ -194,3 +194,12 @@ def test(model, loader, device, hetero, progress_bar=True,
             total_examples += batch_size
             total_correct += int((pred == batch.y[:batch_size]).sum())
     return total_correct / total_examples
+
+
+@contextmanager
+def xpu_profiler(export_chrome_trace=True):
+    with torch.autograd.profiler_legacy.profile(use_xpu=True) as profile:
+        yield
+    print(profile.key_averages().table(sort_by='self_xpu_time_total'))
+    if export_chrome_trace:
+        profile.export_chrome_trace('timeline.json')