Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add profiling support for benchmark/kernel and benchmark/inference #5073

Merged
merged 24 commits into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3659947
Add inference test for benchmark/kernel
yanbing-j Jul 19, 2022
cc45413
Add gcn + ogbn-products in benchmark/kernel
yanbing-j Jul 19, 2022
6054a6c
Merge GCN and SAGE into one
yanbing-j Jul 29, 2022
51b287c
Add profile support in benchmark/inference
yanbing-j Aug 1, 2022
e31a5f8
Merge branch 'master' into yanbing/benchmark
yanbing-j Aug 1, 2022
96b868c
Add decorator for torch.profile and move dataloader outside of run_tr…
yanbing-j Aug 3, 2022
f7f25c4
Merge branch 'master' into yanbing/benchmark
yanbing-j Aug 3, 2022
8fa35e9
Fix code coverage
yanbing-j Aug 3, 2022
11db83f
Add changelog and fix bug
yanbing-j Aug 4, 2022
07ad269
Update
yanbing-j Aug 5, 2022
feda5b4
Merge branch 'master' into yanbing/benchmark
yanbing-j Aug 5, 2022
5677e49
remove gcn+ogbn from benchmark/kernel, add GraphSage in benchmark/inf…
yanbing-j Aug 11, 2022
6e48b43
Merge branch 'master' into yanbing/benchmark
yanbing-j Aug 11, 2022
d3e7d18
Merge timeit and e2e_time
yanbing-j Aug 15, 2022
78ad7ff
Merge branch 'master' into yanbing/benchmark
yanbing-j Aug 15, 2022
1789e6c
Update test_profile.py
yanbing-j Aug 15, 2022
1cf7d7e
Merge branch 'master' into yanbing/benchmark
yanbing-j Aug 17, 2022
b1a15bb
Update timeit and torch_profile in citation and points
yanbing-j Aug 17, 2022
4fef86a
Merge branch 'master' into yanbing/benchmark
yanbing-j Aug 24, 2022
32c6ff3
Update and add log argument in timeit
yanbing-j Aug 24, 2022
42df421
Update benchmark/inference/utils.py
rusty1s Aug 24, 2022
a5e2ae6
Update benchmark/kernel/train_eval.py
rusty1s Aug 24, 2022
06e9d52
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 24, 2022
872cdec
Merge branch 'master' into yanbing/benchmark
rusty1s Aug 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

## [2.0.5] - 2022-MM-DD
### Added
- Added inference benchmark for GCN+ognb-products ([#5073](https://github.com/pyg-team/pytorch_geometric/pull/5073))
- `NeighborSampler` supports graphs without edges ([#5072](https://github.com/pyg-team/pytorch_geometric/pull/5072))
- Added the `MeanSubtractionNorm` layer ([#5068](https://github.com/pyg-team/pytorch_geometric/pull/5068))
- Added `pyg_lib.segment_matmul` integration within `RGCNConv` ([#5052](https://github.com/pyg-team/pytorch_geometric/pull/5052), [#5096](https://github.com/pyg-team/pytorch_geometric/pull/5096))
Expand Down
22 changes: 22 additions & 0 deletions benchmark/inference/inference_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
from timeit import default_timer

import torch
from torch.profiler import ProfilerActivity, profile
from utils import get_dataset, get_model

from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import PNAConv
from torch_geometric.profile import rename_profile_file, trace_handler

supported_sets = {
'ogbn-mag': ['rgat', 'rgcn'],
Expand Down Expand Up @@ -92,12 +94,30 @@ def run(args: argparse.ArgumentParser) -> None:
model = model.to(device)
model.eval()

for _ in range(args.warmup):
model.inference(subgraph_loader, device,
progress_bar=True)

start = default_timer()
model.inference(subgraph_loader, device,
progress_bar=True)
stop = default_timer()
print(f'Inference time={stop-start:.3f} seconds\n')

if args.profile:
with profile(
activities=[
ProfilerActivity.CPU,
ProfilerActivity.CUDA
], on_trace_ready=trace_handler) as p:
model.inference(subgraph_loader, device,
progress_bar=True)
p.step()
rename_profile_file(
model_name, dataset_name, str(batch_size),
str(layers), str(hidden_channels),
str(subgraph_loader.num_neighbors))


if __name__ == '__main__':
argparser = argparse.ArgumentParser('GNN inference benchmark')
Expand All @@ -121,6 +141,8 @@ def run(args: argparse.ArgumentParser) -> None:
'--hetero-num-neighbors', default=-1, type=int,
help='number of neighbors to sample per layer for hetero workloads')
argparser.add_argument('--num-workers', default=2, type=int)
argparser.add_argument('--warmup', default=1, type=int)
argparser.add_argument('--profile', action='store_true')

args = argparser.parse_args()

Expand Down
180 changes: 180 additions & 0 deletions benchmark/kernel/gcn-ogbn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import argparse
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
import time

import torch
import torch.nn.functional as F
from ogb.nodeproppred import Evaluator, PygNodePropPredDataset
from torch.profiler import ProfilerActivity, profile

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv
from torch_geometric.profile import rename_profile_file, trace_handler


class Net(torch.nn.Module):
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
dropout, use_sage) -> None:
super(Net, self).__init__()
if use_sage:
conv_layer_0 = SAGEConv(in_channels, hidden_channels)
conv_layer_1 = SAGEConv(hidden_channels, hidden_channels)
conv_layer_2 = SAGEConv(hidden_channels, out_channels)
else:
conv_layer_0 = GCNConv(in_channels, hidden_channels,
normalize=False)
conv_layer_1 = GCNConv(hidden_channels, hidden_channels,
normalize=False)
conv_layer_2 = GCNConv(hidden_channels, out_channels,
normalize=False)

self.convs = torch.nn.ModuleList()
self.convs.append(conv_layer_0)
for _ in range(num_layers - 2):
self.convs.append(conv_layer_1)
self.convs.append(conv_layer_2)

self.dropout = dropout

def reset_parameters(self):
for conv in self.convs:
conv.reset_parameters()

def forward(self, x, adj_t):
for conv in self.convs[:-1]:
x = conv(x, adj_t)
x = F.relu(x)
x = F.dropout(x, p=self.dropout, training=self.training)
x = self.convs[-1](x, adj_t)
return torch.log_softmax(x, dim=-1)


def train(model, data, train_idx, optimizer):
model.train()

optimizer.zero_grad()
out = model(data.x, data.adj_t)[train_idx]
loss = F.nll_loss(out, data.y.squeeze(1)[train_idx])
loss.backward()
optimizer.step()

return loss.item()
rusty1s marked this conversation as resolved.
Show resolved Hide resolved


@torch.no_grad()
def test(model, data, split_idx, evaluator):
model.eval()

out = model(data.x, data.adj_t)
y_pred = out.argmax(dim=-1, keepdim=True)

train_acc = evaluator.eval({
'y_true': data.y[split_idx['train']],
'y_pred': y_pred[split_idx['train']],
})['acc']
valid_acc = evaluator.eval({
'y_true': data.y[split_idx['valid']],
'y_pred': y_pred[split_idx['valid']],
})['acc']
test_acc = evaluator.eval({
'y_true': data.y[split_idx['test']],
'y_pred': y_pred[split_idx['test']],
})['acc']

return train_acc, valid_acc, test_acc


@torch.no_grad()
def inference(model, data):
model.eval()
model(data.x, data.adj_t)


def main():
parser = argparse.ArgumentParser(description='OGBN-Products (GNN)')
parser.add_argument('--device', type=int, default=0)
parser.add_argument('--log_steps', type=int, default=1)
parser.add_argument('--use_sage', action='store_true')
parser.add_argument('--num_layers', type=int, default=3)
parser.add_argument('--hidden_channels', type=int, default=256)
parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--lr', type=float, default=0.01)
parser.add_argument('--epochs', type=int, default=300)
parser.add_argument('--runs', type=int, default=10)
parser.add_argument('--inference', action='store_true')
parser.add_argument('--profile', action='store_true')
args = parser.parse_args()
print(args)

device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)

dataset = PygNodePropPredDataset(name='ogbn-products', root='dataset/',
transform=T.ToSparseTensor())
data = dataset[0]

split_idx = dataset.get_idx_split()
train_idx = split_idx['train'].to(device)

model = Net(data.num_features, args.hidden_channels, dataset.num_classes,
args.num_layers, args.dropout, args.use_sage).to(device)

if not args.use_sage:
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
# Pre-compute GCN normalization.
adj_t = data.adj_t.set_diag()
deg = adj_t.sum(dim=1).to(torch.float)
deg_inv_sqrt = deg.pow(-0.5)
deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1)
data.adj_t = adj_t

data = data.to(device)

if not args.inference:
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
evaluator = Evaluator(name='ogbn-products')

for run in range(args.runs):
model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
for epoch in range(1, 1 + args.epochs):
loss = train(model, data, train_idx, optimizer)
result = test(model, data, split_idx, evaluator)

if epoch % args.log_steps == 0:
train_acc, valid_acc, test_acc = result
print(f'Run: {run + 1:02d}, '
f'Epoch: {epoch:02d}, '
f'Loss: {loss:.4f}, '
f'Train: {100 * train_acc:.2f}%, '
f'Valid: {100 * valid_acc:.2f}% '
f'Test: {100 * test_acc:.2f}%')
else:
model.reset_parameters()
for epoch in range(1, 1 + args.epochs):
print("Epoch ", epoch)
if epoch == args.epochs:
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
if torch.cuda.is_available():
torch.cuda.synchronize()
t_start = time.time()

inference(model, data)

if epoch == args.epochs:
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
if torch.cuda.is_available():
torch.cuda.synchronize()
t_end = time.time()
duration = t_end - t_start
print(f'End-to-End Inference time: {duration:.8f}s',
flush=True)

if args.profile:
with profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
on_trace_ready=trace_handler) as p:
inference(model, data)
p.step()
rename_profile_file('GCN' if not args.use_sage else 'SAGE',
'ogbn-products')


if __name__ == "__main__":
main()
85 changes: 66 additions & 19 deletions benchmark/kernel/main_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@
from gcn import GCN
from gin import GIN
from graph_sage import GraphSAGE
from train_eval import eval_acc, train
from train_eval import eval_acc, inference_run, train

from torch_geometric import seed_everything
from torch_geometric.loader import DataLoader
from torch_geometric.profile import get_stats_summary, profileit, timeit
from torch_geometric.profile import (
get_stats_summary,
profileit,
rename_profile_file,
timeit,
)
from torch_geometric.profile.profile import e2e_time, torch_profile

seed_everything(0)

Expand All @@ -22,6 +28,8 @@
help='Skip the first few runs')
parser.add_argument('--goal_accuracy', type=int, default=1,
help='The goal test accuracy')
parser.add_argument('--inference', action='store_true')
parser.add_argument('--profile', action='store_true')
args = parser.parse_args()

layers = [1, 2, 3]
Expand All @@ -37,11 +45,8 @@

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Decorate train and eval functions:
train = profileit(print_layer_stats=False)(train)
eval_acc = timeit()(eval_acc)

for dataset_name, Net in product(datasets, nets):
def prepare_dataloader(dataset_name):
dataset = get_dataset(dataset_name, sparse=True)
num_train = int(len(dataset) * 0.8)
num_val = int(len(dataset) * 0.1)
Expand All @@ -56,21 +61,63 @@
shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=args.batch_size,
shuffle=False)
return dataset, train_loader, val_loader, test_loader

for num_layers, hidden in product(layers, hiddens):
print(f'--\n{dataset_name} - {Net.__name__} - {num_layers} - {hidden}')

model = Net(dataset, num_layers, hidden).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
def run_train():
for dataset_name, Net in product(datasets, nets):
dataset, train_loader, val_loader, test_loader = prepare_dataloader(
dataset_name)

stats_list = []
for epoch in range(1, args.epochs + 1):
loss, stats = train(model, optimizer, train_loader)
val_acc, val_time = eval_acc(model, val_loader)
test_acc, test_time = eval_acc(model, test_loader)
for num_layers, hidden in product(layers, hiddens):
print("--\n{} - {} - {} - {}".format(dataset_name, Net.__name__,
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
num_layers, hidden))

if epoch >= args.warmup_profile:
stats_list.append(stats)
model = Net(dataset, num_layers, hidden).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

stats_summary = get_stats_summary(stats_list)
print(stats_summary)
stats_list = []
for epoch in range(1, args.epochs + 1):
loss, stats = train(model, optimizer, train_loader)
val_acc, val_time = eval_acc(model, val_loader)
test_acc, test_time = eval_acc(model, test_loader)

if epoch >= args.warmup_profile:
stats_list.append(stats)

stats_summary = get_stats_summary(stats_list)
print(stats_summary)


@torch.no_grad()
def run_inference():
for dataset_name, Net in product(datasets, nets):
dataset, _, _, test_loader = prepare_dataloader(dataset_name)

for num_layers, hidden in product(layers, hiddens):
print("--\n{} - {} - {} - {}".format(dataset_name, Net.__name__,
num_layers, hidden))

model = Net(dataset, num_layers, hidden).to(device)

for epoch in range(1, args.epochs + 1):
if epoch == args.epochs:
e2e_inference_run = e2e_time()(inference_run)
e2e_inference_run(model, test_loader)
else:
inference_run(model, test_loader)

if args.profile:
profile_inference_run = torch_profile()(inference_run)
profile_inference_run(model, test_loader)
rename_profile_file(Net.__name__, dataset_name,
str(num_layers), str(hidden))


if not args.inference:
# Decorate train and eval functions:
train = profileit(print_layer_stats=False)(train)
eval_acc = timeit()(eval_acc)
run_train()
else:
run_inference()
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
9 changes: 9 additions & 0 deletions benchmark/kernel/train_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,12 @@ def eval_loss(model, loader):
out = model(data)
loss += F.nll_loss(out, data.y.view(-1), reduction='sum').item()
return loss / len(loader.dataset)


@torch.no_grad()
def inference_run(model, loader):
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
model.eval()
for data in loader:
data = data.to(device)
with torch.no_grad():
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
model(data)
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
rusty1s marked this conversation as resolved.
Show resolved Hide resolved
Loading