Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add possibility to run inference benchmarks on XPU device #7705

Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Added

- Added possibility to run inference benchmarks on XPU device ([#7705](https://github.com/pyg-team/pytorch_geometric/pull/7705))
- Added `HeteroData` support in `to_networkx` ([#7713](https://github.com/pyg-team/pytorch_geometric/pull/7713))
- Added `FlopsCount` support via `fvcore` ([#7693](https://github.com/pyg-team/pytorch_geometric/pull/7693))
- Added back support for PyTorch >= 1.11.0 ([#7656](https://github.com/pyg-team/pytorch_geometric/pull/7656))
Expand Down
47 changes: 39 additions & 8 deletions benchmark/inference/inference_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@
)
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import PNAConv
from torch_geometric.profile import rename_profile_file, timeit, torch_profile
from torch_geometric.profile import (
rename_profile_file,
timeit,
torch_profile,
xpu_profile,
)

supported_sets = {
'ogbn-mag': ['rgat', 'rgcn'],
Expand All @@ -42,11 +47,23 @@ def run(args: argparse.ArgumentParser):
warnings.warn("Cannot write profile data to CSV because profiling is "
"disabled")

# cuda device is not suitable for full batch mode
device = torch.device(
'cuda' if not args.full_batch and torch.cuda.is_available() else 'cpu')
if args.device == 'xpu':
try:
import intel_extension_for_pytorch as ipex
except ImportError:
raise RuntimeError('XPU device requires IPEX to be installed')

if ((args.device == 'cuda' and not torch.cuda.is_available())
or (args.device == 'xpu' and not torch.xpu.is_available())):
raise RuntimeError(f'{args.device.upper()} is not available')

if args.device == 'cuda' and args.full_batch:
raise RuntimeError('CUDA device is not suitable for full batch mode')

device = torch.device(args.device)

print('BENCHMARK STARTS')
print(f'Running on {args.device.upper()}')
for dataset_name in args.datasets:
assert dataset_name in supported_sets.keys(
), f"Dataset {dataset_name} isn't supported."
Expand All @@ -66,11 +83,17 @@ def run(args: argparse.ArgumentParser):
if args.num_layers != [1] and not hetero and args.num_steps != -1:
raise ValueError("Layer-wise inference requires `steps=-1`")

if torch.cuda.is_available():
if args.device == 'cuda':
amp = torch.cuda.amp.autocast(enabled=False)
elif args.device == 'xpu':
amp = torch.xpu.amp.autocast(enabled=False)
else:
amp = torch.cpu.amp.autocast(enabled=args.bf16)

if args.device == 'xpu' and args.warmup < 1:
print('XPU device requires warmup - setting warmup=1')
args.warmup = 1

inputs_channels = data[
'paper'].num_features if dataset_name == 'ogbn-mag' \
else dataset.num_features
Expand Down Expand Up @@ -163,16 +186,22 @@ def run(args: argparse.ArgumentParser):
state_dict = torch.load(args.ckpt_path)
model.load_state_dict(state_dict)
model.eval()
if args.device == 'xpu':
model = ipex.optimize(model)

# Define context manager parameters:
if args.cpu_affinity and with_loader:
cpu_affinity = subgraph_loader.enable_cpu_affinity(
args.loader_cores)
else:
cpu_affinity = nullcontext()
profile = torch_profile(
args.export_chrome_trace, csv_data,
args.write_csv) if args.profile else nullcontext()
if args.profile and args.device == 'xpu':
profile = xpu_profile(args.export_chrome_trace)
elif args.profile:
profile = torch_profile(args.export_chrome_trace,
csv_data, args.write_csv)
else:
profile = nullcontext()
itt = emit_itt(
) if args.vtune_profile else nullcontext()

Expand Down Expand Up @@ -256,6 +285,8 @@ def run(args: argparse.ArgumentParser):
argparser = argparse.ArgumentParser('GNN inference benchmark')
add = argparser.add_argument

add('--device', choices=['cpu', 'cuda', 'xpu'], default='cpu',
help='Device to run benchmark on')
add('--datasets', nargs='+',
default=['ogbn-mag', 'ogbn-products', 'Reddit'], type=str)
add('--use-sparse-tensor', action='store_true',
Expand Down
31 changes: 30 additions & 1 deletion test/profile/test_profile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os.path
import warnings

import pytest
import torch
import torch.nn.functional as F

Expand All @@ -11,11 +12,12 @@
rename_profile_file,
timeit,
)
from torch_geometric.profile.profile import torch_profile
from torch_geometric.profile.profile import torch_profile, xpu_profile
from torch_geometric.testing import (
onlyCUDA,
onlyLinux,
onlyOnline,
onlyXPU,
withCUDA,
withPackage,
)
Expand Down Expand Up @@ -105,3 +107,30 @@ def test_torch_profile(capfd, get_dataset, device):
rename_profile_file('test_profile')
assert os.path.exists('profile-test_profile.json')
os.remove('profile-test_profile.json')


@onlyXPU
@onlyOnline
@pytest.mark.parametrize('export_chrome_trace', [False, True])
def test_xpu_profile(capfd, get_dataset, export_chrome_trace):
dataset = get_dataset(name='Cora')
device = torch.device('xpu')
data = dataset[0].to(device)
model = GraphSAGE(dataset.num_features, hidden_channels=64, num_layers=3,
out_channels=dataset.num_classes).to(device)

with xpu_profile(export_chrome_trace):
model(data.x, data.edge_index)

out, _ = capfd.readouterr()
assert 'Self CPU' in out
if data.x.is_xpu:
assert 'Self XPU' in out

f_name = 'timeline.json'
f_exists = os.path.exists(f_name)
if not export_chrome_trace:
assert not f_exists
else:
assert f_exists
os.remove(f_name)
2 changes: 2 additions & 0 deletions torch_geometric/profile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
print_time_total,
rename_profile_file,
torch_profile,
xpu_profile,
)
from .utils import count_parameters
from .utils import get_model_size
Expand All @@ -21,6 +22,7 @@
'print_time_total',
'rename_profile_file',
'torch_profile',
'xpu_profile',
'count_parameters',
'get_model_size',
'get_data_size',
Expand Down
9 changes: 9 additions & 0 deletions torch_geometric/profile/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,15 @@ def torch_profile(export_chrome_trace=True, csv_data=None, write_csv=None):
save_profile_data(csv_data, events, use_cuda)


@contextmanager
def xpu_profile(export_chrome_trace=True):
with torch.autograd.profiler_legacy.profile(use_xpu=True) as profile:
yield
print(profile.key_averages().table(sort_by='self_xpu_time_total'))
if export_chrome_trace:
profile.export_chrome_trace('timeline.json')


def format_prof_time(time):
# Profile time is in micro seconds, so format it appropriately:
return round(time / 1e6, 3)
Expand Down
2 changes: 2 additions & 0 deletions torch_geometric/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
onlyLinux,
onlyPython,
onlyCUDA,
onlyXPU,
onlyOnline,
onlyGraphviz,
onlyNeighborSampler,
Expand All @@ -22,6 +23,7 @@
'onlyLinux',
'onlyPython',
'onlyCUDA',
'onlyXPU',
'onlyOnline',
'onlyGraphviz',
'onlyNeighborSampler',
Expand Down
14 changes: 14 additions & 0 deletions torch_geometric/testing/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ def onlyCUDA(func: Callable) -> Callable:
)(func)


def onlyXPU(func: Callable) -> Callable:
r"""A decorator to skip tests if XPU is not found."""
import pytest
try:
import intel_extension_for_pytorch as ipex
xpu_available = ipex.xpu.is_available()
except ImportError:
xpu_available = False
return pytest.mark.skipif(
not xpu_available,
reason="XPU not available",
)(func)


def onlyOnline(func: Callable):
r"""A decorator to skip tests if there exists no connection to the
internet."""
Expand Down