From 68bb171d31e12dd1d45d5b8d2d45df87ff19b589 Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Wed, 14 Jun 2023 09:05:10 +0200 Subject: [PATCH 01/12] Enable XPU inference benchmarks --- benchmark/inference/inference_benchmark.py | 41 ++++++++++++++++++---- benchmark/utils/__init__.py | 2 ++ benchmark/utils/utils.py | 10 ++++-- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/benchmark/inference/inference_benchmark.py b/benchmark/inference/inference_benchmark.py index 380c1d53f5fa..825a3a38de5a 100644 --- a/benchmark/inference/inference_benchmark.py +++ b/benchmark/inference/inference_benchmark.py @@ -13,6 +13,7 @@ save_benchmark_data, test, write_to_csv, + xpu_profiler, ) from torch_geometric.loader import NeighborLoader from torch_geometric.nn import PNAConv @@ -42,11 +43,23 @@ def run(args: argparse.ArgumentParser): warnings.warn("Cannot write profile data to CSV because profiling is " "disabled") - # cuda device is not suitable for full batch mode - device = torch.device( - 'cuda' if not args.full_batch and torch.cuda.is_available() else 'cpu') + if args.device == 'xpu': + try: + import intel_extension_for_pytorch as ipex + except ImportError: + raise RuntimeError('XPU device requires IPEX to be installed') + + if ((args.device == 'cuda' and not torch.cuda.is_available()) or + (args.device == 'xpu' and not torch.xpu.is_available())): + raise RuntimeError(f'{args.device.upper()} is not available') + + if args.device == 'cuda' and args.full_batch: + raise RuntimeError('CUDA device is not suitable for full batch mode') + + device = torch.device(args.device) print('BENCHMARK STARTS') + print(f'Running on {args.device.upper()}') for dataset_name in args.datasets: assert dataset_name in supported_sets.keys( ), f"Dataset {dataset_name} isn't supported." @@ -66,11 +79,17 @@ def run(args: argparse.ArgumentParser): if args.num_layers != [1] and not hetero and args.num_steps != -1: raise ValueError("Layer-wise inference requires `steps=-1`") - if torch.cuda.is_available(): + if args.device == 'cuda': amp = torch.cuda.amp.autocast(enabled=False) + elif args.device == 'xpu': + amp = torch.xpu.amp.autocast(enabled=False) else: amp = torch.cpu.amp.autocast(enabled=args.bf16) + if args.device == 'xpu' and args.warmup < 1: + print('XPU device requires warmup - setting warmup=1') + args.warmup = 1 + inputs_channels = data[ 'paper'].num_features if dataset_name == 'ogbn-mag' \ else dataset.num_features @@ -163,6 +182,8 @@ def run(args: argparse.ArgumentParser): state_dict = torch.load(args.ckpt_path) model.load_state_dict(state_dict) model.eval() + if args.device == 'xpu': + model = ipex.optimize(model) # Define context manager parameters: if args.cpu_affinity and with_loader: @@ -170,9 +191,13 @@ def run(args: argparse.ArgumentParser): args.loader_cores) else: cpu_affinity = nullcontext() - profile = torch_profile( - args.export_chrome_trace, csv_data, - args.write_csv) if args.profile else nullcontext() + if args.profile and device == 'xpu': + profile = xpu_profiler() + elif args.profile: + profile = torch_profile(args.export_chrome_trace, + csv_data, args.write_csv) + else: + profile = nullcontext() itt = emit_itt( ) if args.vtune_profile else nullcontext() @@ -256,6 +281,8 @@ def run(args: argparse.ArgumentParser): argparser = argparse.ArgumentParser('GNN inference benchmark') add = argparser.add_argument + add('--device', choices=['cpu', 'cuda', 'xpu'], default='cuda', + help='Device to run benchmark on') add('--datasets', nargs='+', default=['ogbn-mag', 'ogbn-products', 'Reddit'], type=str) add('--use-sparse-tensor', action='store_true', diff --git a/benchmark/utils/__init__.py b/benchmark/utils/__init__.py index d97451a778e3..f73aebb5b7e7 100644 --- a/benchmark/utils/__init__.py +++ b/benchmark/utils/__init__.py @@ -4,6 +4,7 @@ from .utils import get_split_masks from .utils import save_benchmark_data, write_to_csv from .utils import test +from .utils import xpu_profiler __all__ = [ 'emit_itt', @@ -14,4 +15,5 @@ 'save_benchmark_data', 'write_to_csv', 'test', + 'xpu_profiler', ] diff --git a/benchmark/utils/utils.py b/benchmark/utils/utils.py index 5b2dd3d9f650..7c389dc89aaf 100644 --- a/benchmark/utils/utils.py +++ b/benchmark/utils/utils.py @@ -1,5 +1,6 @@ import os import os.path as osp +from contextlib import contextmanager from datetime import datetime import torch @@ -18,8 +19,6 @@ try: from torch.autograd.profiler import emit_itt except ImportError: - from contextlib import contextmanager - @contextmanager def emit_itt(*args, **kwargs): yield @@ -194,3 +193,10 @@ def test(model, loader, device, hetero, progress_bar=True, total_examples += batch_size total_correct += int((pred == batch.y[:batch_size]).sum()) return total_correct / total_examples + + +@contextmanager +def xpu_profiler(): + with torch.autograd.profiler_legacy.profile(use_xpu=True) as profile: + yield + print(profile.key_averages().table(sort_by='self_xpu_time_total')) From 383159aad95945db0f09e089ee94510e4660bbca Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Mon, 19 Jun 2023 11:32:12 +0200 Subject: [PATCH 02/12] Export chrome trace file for XPU device --- benchmark/inference/inference_benchmark.py | 4 ++-- benchmark/utils/utils.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmark/inference/inference_benchmark.py b/benchmark/inference/inference_benchmark.py index 825a3a38de5a..b8d861ee0a0e 100644 --- a/benchmark/inference/inference_benchmark.py +++ b/benchmark/inference/inference_benchmark.py @@ -191,8 +191,8 @@ def run(args: argparse.ArgumentParser): args.loader_cores) else: cpu_affinity = nullcontext() - if args.profile and device == 'xpu': - profile = xpu_profiler() + if args.profile and args.device == 'xpu': + profile = xpu_profiler(args.export_chrome_trace) elif args.profile: profile = torch_profile(args.export_chrome_trace, csv_data, args.write_csv) diff --git a/benchmark/utils/utils.py b/benchmark/utils/utils.py index 7c389dc89aaf..59ec3c015574 100644 --- a/benchmark/utils/utils.py +++ b/benchmark/utils/utils.py @@ -196,7 +196,9 @@ def test(model, loader, device, hetero, progress_bar=True, @contextmanager -def xpu_profiler(): +def xpu_profiler(export_chrome_trace=True): with torch.autograd.profiler_legacy.profile(use_xpu=True) as profile: yield print(profile.key_averages().table(sort_by='self_xpu_time_total')) + if export_chrome_trace: + profile.export_chrome_trace('timeline.json') From 3c83f40392304672866d296aa054422bd09578e2 Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Fri, 7 Jul 2023 07:54:11 +0200 Subject: [PATCH 03/12] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9eaa0d292bd..9bcc1c405979 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- Added possibility to run inference benchmarks on XPU device ([#7705](https://github.com/pyg-team/pytorch_geometric/pull/7705)) - Added `HeteroData` support in `to_networkx` ([#7713](https://github.com/pyg-team/pytorch_geometric/pull/7713)) - Added `FlopsCount` support via `fvcore` ([#7693](https://github.com/pyg-team/pytorch_geometric/pull/7693)) - Added back support for PyTorch >= 1.11.0 ([#7656](https://github.com/pyg-team/pytorch_geometric/pull/7656)) From 02f827af508c2fc55e2e857a132df05be262b236 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Jul 2023 06:01:20 +0000 Subject: [PATCH 04/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmark/inference/inference_benchmark.py | 6 +++--- benchmark/utils/utils.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/benchmark/inference/inference_benchmark.py b/benchmark/inference/inference_benchmark.py index b8d861ee0a0e..0aa90e9cdf99 100644 --- a/benchmark/inference/inference_benchmark.py +++ b/benchmark/inference/inference_benchmark.py @@ -49,8 +49,8 @@ def run(args: argparse.ArgumentParser): except ImportError: raise RuntimeError('XPU device requires IPEX to be installed') - if ((args.device == 'cuda' and not torch.cuda.is_available()) or - (args.device == 'xpu' and not torch.xpu.is_available())): + if ((args.device == 'cuda' and not torch.cuda.is_available()) + or (args.device == 'xpu' and not torch.xpu.is_available())): raise RuntimeError(f'{args.device.upper()} is not available') if args.device == 'cuda' and args.full_batch: @@ -195,7 +195,7 @@ def run(args: argparse.ArgumentParser): profile = xpu_profiler(args.export_chrome_trace) elif args.profile: profile = torch_profile(args.export_chrome_trace, - csv_data, args.write_csv) + csv_data, args.write_csv) else: profile = nullcontext() itt = emit_itt( diff --git a/benchmark/utils/utils.py b/benchmark/utils/utils.py index 59ec3c015574..0bb1af204125 100644 --- a/benchmark/utils/utils.py +++ b/benchmark/utils/utils.py @@ -19,6 +19,7 @@ try: from torch.autograd.profiler import emit_itt except ImportError: + @contextmanager def emit_itt(*args, **kwargs): yield From 376786f91f691eb2e98dca263029dac4a2963808 Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Mon, 17 Jul 2023 08:38:34 +0200 Subject: [PATCH 05/12] Set 'cpu' as a default device Co-authored-by: kgajdamo --- benchmark/inference/inference_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/inference/inference_benchmark.py b/benchmark/inference/inference_benchmark.py index 0aa90e9cdf99..4919a661745b 100644 --- a/benchmark/inference/inference_benchmark.py +++ b/benchmark/inference/inference_benchmark.py @@ -281,7 +281,7 @@ def run(args: argparse.ArgumentParser): argparser = argparse.ArgumentParser('GNN inference benchmark') add = argparser.add_argument - add('--device', choices=['cpu', 'cuda', 'xpu'], default='cuda', + add('--device', choices=['cpu', 'cuda', 'xpu'], default='cpu', help='Device to run benchmark on') add('--datasets', nargs='+', default=['ogbn-mag', 'ogbn-products', 'Reddit'], type=str) From b37a5d8f2637fdfeaae6d192bb58e5907cb0fc82 Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Mon, 17 Jul 2023 09:00:19 +0200 Subject: [PATCH 06/12] Move `xpu_profile` to torch_geometric/profile --- benchmark/inference/inference_benchmark.py | 5 ++--- benchmark/utils/utils.py | 11 +---------- torch_geometric/profile/profile.py | 9 +++++++++ 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/benchmark/inference/inference_benchmark.py b/benchmark/inference/inference_benchmark.py index 4919a661745b..3fce77a99f99 100644 --- a/benchmark/inference/inference_benchmark.py +++ b/benchmark/inference/inference_benchmark.py @@ -13,11 +13,10 @@ save_benchmark_data, test, write_to_csv, - xpu_profiler, ) from torch_geometric.loader import NeighborLoader from torch_geometric.nn import PNAConv -from torch_geometric.profile import rename_profile_file, timeit, torch_profile +from torch_geometric.profile import rename_profile_file, timeit, torch_profile, xpu_profile supported_sets = { 'ogbn-mag': ['rgat', 'rgcn'], @@ -192,7 +191,7 @@ def run(args: argparse.ArgumentParser): else: cpu_affinity = nullcontext() if args.profile and args.device == 'xpu': - profile = xpu_profiler(args.export_chrome_trace) + profile = xpu_profile(args.export_chrome_trace) elif args.profile: profile = torch_profile(args.export_chrome_trace, csv_data, args.write_csv) diff --git a/benchmark/utils/utils.py b/benchmark/utils/utils.py index 0bb1af204125..5b2dd3d9f650 100644 --- a/benchmark/utils/utils.py +++ b/benchmark/utils/utils.py @@ -1,6 +1,5 @@ import os import os.path as osp -from contextlib import contextmanager from datetime import datetime import torch @@ -19,6 +18,7 @@ try: from torch.autograd.profiler import emit_itt except ImportError: + from contextlib import contextmanager @contextmanager def emit_itt(*args, **kwargs): @@ -194,12 +194,3 @@ def test(model, loader, device, hetero, progress_bar=True, total_examples += batch_size total_correct += int((pred == batch.y[:batch_size]).sum()) return total_correct / total_examples - - -@contextmanager -def xpu_profiler(export_chrome_trace=True): - with torch.autograd.profiler_legacy.profile(use_xpu=True) as profile: - yield - print(profile.key_averages().table(sort_by='self_xpu_time_total')) - if export_chrome_trace: - profile.export_chrome_trace('timeline.json') diff --git a/torch_geometric/profile/profile.py b/torch_geometric/profile/profile.py index fc8f0a887914..748d7f57b7ad 100644 --- a/torch_geometric/profile/profile.py +++ b/torch_geometric/profile/profile.py @@ -265,6 +265,15 @@ def torch_profile(export_chrome_trace=True, csv_data=None, write_csv=None): save_profile_data(csv_data, events, use_cuda) +@contextmanager +def xpu_profile(export_chrome_trace=True): + with torch.autograd.profiler_legacy.profile(use_xpu=True) as profile: + yield + print(profile.key_averages().table(sort_by='self_xpu_time_total')) + if export_chrome_trace: + profile.export_chrome_trace('timeline.json') + + def format_prof_time(time): # Profile time is in micro seconds, so format it appropriately: return round(time / 1e6, 3) From ee13f981a10a5521c418a7f78bc49fba148b5259 Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Mon, 17 Jul 2023 09:26:57 +0200 Subject: [PATCH 07/12] Add test for XPU_profile --- test/profile/test_profile.py | 21 ++++++++++++++++++++- torch_geometric/testing/__init__.py | 2 ++ torch_geometric/testing/decorators.py | 14 ++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/test/profile/test_profile.py b/test/profile/test_profile.py index c4b5c38f3f65..5c8aca04607c 100644 --- a/test/profile/test_profile.py +++ b/test/profile/test_profile.py @@ -11,9 +11,10 @@ rename_profile_file, timeit, ) -from torch_geometric.profile.profile import torch_profile +from torch_geometric.profile.profile import torch_profile, xpu_profile from torch_geometric.testing import ( onlyCUDA, + onlyXPU, onlyLinux, onlyOnline, withCUDA, @@ -105,3 +106,21 @@ def test_torch_profile(capfd, get_dataset, device): rename_profile_file('test_profile') assert os.path.exists('profile-test_profile.json') os.remove('profile-test_profile.json') + + +@onlyXPU +@onlyOnline +def test_xpu_profile(capfd, get_dataset): + dataset = get_dataset(name='Cora') + device = torch.device('xpu') + data = dataset[0].to(device) + model = GraphSAGE(dataset.num_features, hidden_channels=64, num_layers=3, + out_channels=dataset.num_classes).to(device) + + with xpu_profile(): + model(data.x, data.edge_index) + + out, _ = capfd.readouterr() + assert 'Self CPU' in out + if data.x.is_xpu: + assert 'Self XPU' in out diff --git a/torch_geometric/testing/__init__.py b/torch_geometric/testing/__init__.py index 83f9820416b3..6a108d42fc04 100644 --- a/torch_geometric/testing/__init__.py +++ b/torch_geometric/testing/__init__.py @@ -4,6 +4,7 @@ onlyLinux, onlyPython, onlyCUDA, + onlyXPU, onlyOnline, onlyGraphviz, onlyNeighborSampler, @@ -22,6 +23,7 @@ 'onlyLinux', 'onlyPython', 'onlyCUDA', + 'onlyXPU', 'onlyOnline', 'onlyGraphviz', 'onlyNeighborSampler', diff --git a/torch_geometric/testing/decorators.py b/torch_geometric/testing/decorators.py index b62625fa6a3f..e0d61db08ddd 100644 --- a/torch_geometric/testing/decorators.py +++ b/torch_geometric/testing/decorators.py @@ -59,6 +59,20 @@ def onlyCUDA(func: Callable) -> Callable: )(func) +def onlyXPU(func: Callable) -> Callable: + r"""A decorator to skip tests if XPU is not found.""" + import pytest + try: + import intel_extension_for_pytorch + xpu_available = torch.xpu.is_available() + except ImportError: + xpu_available = False + return pytest.mark.skipif( + not xpu_available, + reason="XPU not available", + )(func) + + def onlyOnline(func: Callable): r"""A decorator to skip tests if there exists no connection to the internet.""" From 4f372c7bab516ec388c54aed2ddfe04ac3b71586 Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Mon, 17 Jul 2023 15:53:34 +0200 Subject: [PATCH 08/12] Clean-up imports --- benchmark/utils/__init__.py | 2 -- torch_geometric/profile/__init__.py | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/utils/__init__.py b/benchmark/utils/__init__.py index f73aebb5b7e7..d97451a778e3 100644 --- a/benchmark/utils/__init__.py +++ b/benchmark/utils/__init__.py @@ -4,7 +4,6 @@ from .utils import get_split_masks from .utils import save_benchmark_data, write_to_csv from .utils import test -from .utils import xpu_profiler __all__ = [ 'emit_itt', @@ -15,5 +14,4 @@ 'save_benchmark_data', 'write_to_csv', 'test', - 'xpu_profiler', ] diff --git a/torch_geometric/profile/__init__.py b/torch_geometric/profile/__init__.py index 9f7340979181..a72d953e6cf4 100644 --- a/torch_geometric/profile/__init__.py +++ b/torch_geometric/profile/__init__.py @@ -4,6 +4,7 @@ print_time_total, rename_profile_file, torch_profile, + xpu_profile, ) from .utils import count_parameters from .utils import get_model_size @@ -21,6 +22,7 @@ 'print_time_total', 'rename_profile_file', 'torch_profile', + 'xpu_profile', 'count_parameters', 'get_model_size', 'get_data_size', From 8b0ba697d6f9aa8d2e68f00643bfe3f34d6e97cb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jul 2023 13:56:54 +0000 Subject: [PATCH 09/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmark/inference/inference_benchmark.py | 7 ++++++- test/profile/test_profile.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/benchmark/inference/inference_benchmark.py b/benchmark/inference/inference_benchmark.py index 3fce77a99f99..22d6dd7bf64f 100644 --- a/benchmark/inference/inference_benchmark.py +++ b/benchmark/inference/inference_benchmark.py @@ -16,7 +16,12 @@ ) from torch_geometric.loader import NeighborLoader from torch_geometric.nn import PNAConv -from torch_geometric.profile import rename_profile_file, timeit, torch_profile, xpu_profile +from torch_geometric.profile import ( + rename_profile_file, + timeit, + torch_profile, + xpu_profile, +) supported_sets = { 'ogbn-mag': ['rgat', 'rgcn'], diff --git a/test/profile/test_profile.py b/test/profile/test_profile.py index 5c8aca04607c..e9ed2c2b4890 100644 --- a/test/profile/test_profile.py +++ b/test/profile/test_profile.py @@ -14,9 +14,9 @@ from torch_geometric.profile.profile import torch_profile, xpu_profile from torch_geometric.testing import ( onlyCUDA, - onlyXPU, onlyLinux, onlyOnline, + onlyXPU, withCUDA, withPackage, ) From 1d65f9522f75bc2e8616e3357c9f7a07b2708334 Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Mon, 17 Jul 2023 16:02:31 +0200 Subject: [PATCH 10/12] Fix pep8 --- torch_geometric/testing/decorators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torch_geometric/testing/decorators.py b/torch_geometric/testing/decorators.py index e0d61db08ddd..8c5e35c16e41 100644 --- a/torch_geometric/testing/decorators.py +++ b/torch_geometric/testing/decorators.py @@ -63,8 +63,8 @@ def onlyXPU(func: Callable) -> Callable: r"""A decorator to skip tests if XPU is not found.""" import pytest try: - import intel_extension_for_pytorch - xpu_available = torch.xpu.is_available() + import intel_extension_for_pytorch as ipex + xpu_available = ipex.xpu.is_available() except ImportError: xpu_available = False return pytest.mark.skipif( From b292fd089ec034c5c2fa992de72818c63f6516af Mon Sep 17 00:00:00 2001 From: Damian Szwichtenberg Date: Mon, 17 Jul 2023 17:18:20 +0200 Subject: [PATCH 11/12] Update tests --- test/profile/test_profile.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/test/profile/test_profile.py b/test/profile/test_profile.py index e9ed2c2b4890..3631a46f6f74 100644 --- a/test/profile/test_profile.py +++ b/test/profile/test_profile.py @@ -1,5 +1,6 @@ import os.path import warnings +import pytest import torch import torch.nn.functional as F @@ -110,17 +111,26 @@ def test_torch_profile(capfd, get_dataset, device): @onlyXPU @onlyOnline -def test_xpu_profile(capfd, get_dataset): +@pytest.mark.parametrize('export_chrome_trace', [False, True]) +def test_xpu_profile(capfd, get_dataset, export_chrome_trace): dataset = get_dataset(name='Cora') device = torch.device('xpu') data = dataset[0].to(device) model = GraphSAGE(dataset.num_features, hidden_channels=64, num_layers=3, out_channels=dataset.num_classes).to(device) - with xpu_profile(): + with xpu_profile(export_chrome_trace): model(data.x, data.edge_index) out, _ = capfd.readouterr() assert 'Self CPU' in out if data.x.is_xpu: assert 'Self XPU' in out + + f_name = 'timeline.json' + f_exists = os.path.exists(f_name) + if not export_chrome_trace: + assert not f_exists + else: + assert f_exists + os.remove(f_name) From 639f0583966294d99f515f6632fdd400048b2926 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jul 2023 15:19:29 +0000 Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/profile/test_profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/profile/test_profile.py b/test/profile/test_profile.py index 3631a46f6f74..b64d1125f9da 100644 --- a/test/profile/test_profile.py +++ b/test/profile/test_profile.py @@ -1,7 +1,7 @@ import os.path import warnings -import pytest +import pytest import torch import torch.nn.functional as F