From 8b7b50ad79e3c7de5db677df26526decf20e33b1 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 27 Jun 2022 13:40:02 +0800 Subject: [PATCH 01/24] [Benchmark] Add inference and profile in citation --- benchmark/citation/appnp.py | 13 +++- benchmark/citation/arma.py | 13 +++- benchmark/citation/cheb.py | 13 +++- benchmark/citation/gat.py | 13 +++- benchmark/citation/gcn.py | 13 +++- benchmark/citation/sgc.py | 13 +++- benchmark/citation/train_eval.py | 129 +++++++++++++++++++++---------- 7 files changed, 161 insertions(+), 46 deletions(-) diff --git a/benchmark/citation/appnp.py b/benchmark/citation/appnp.py index 10f805a0c06f..469240d2c307 100644 --- a/benchmark/citation/appnp.py +++ b/benchmark/citation/appnp.py @@ -20,6 +20,8 @@ parser.add_argument('--normalize_features', type=bool, default=True) parser.add_argument('--K', type=int, default=10) parser.add_argument('--alpha', type=float, default=0.1) +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference args = parser.parse_args() @@ -47,4 +49,13 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, - args.early_stopping, permute_masks) + args.early_stopping, args.inference, args.profile, permute_masks) + +if args.profile: + import os + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + profile_file = profile_dir + 'profile-citation-APPNP-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' + timeline_file = profile_dir + 'profile-citation-APPNP-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + os.rename('profile.log', profile_file) + os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/arma.py b/benchmark/citation/arma.py index 0d0405e7a548..31c39b4a9e9a 100644 --- a/benchmark/citation/arma.py +++ b/benchmark/citation/arma.py @@ -21,6 +21,8 @@ parser.add_argument('--num_layers', type=int, default=1) parser.add_argument('--shared_weights', type=bool, default=False) parser.add_argument('--skip_dropout', type=float, default=0.75) +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference args = parser.parse_args() @@ -49,4 +51,13 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, - args.early_stopping, permute_masks) + args.early_stopping, args.inference, args.profile, permute_masks) + +if args.profile: + import os + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + profile_file = profile_dir + 'profile-citation-ARMA-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' + timeline_file = profile_dir + 'profile-citation-ARMA-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + os.rename('profile.log', profile_file) + os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/cheb.py b/benchmark/citation/cheb.py index 18e6b4b23934..ba56dd323522 100644 --- a/benchmark/citation/cheb.py +++ b/benchmark/citation/cheb.py @@ -18,6 +18,8 @@ parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--normalize_features', type=bool, default=True) parser.add_argument('--num_hops', type=int, default=3) +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference args = parser.parse_args() @@ -42,4 +44,13 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, - args.early_stopping, permute_masks) + args.early_stopping, args.inference, args.profile, permute_masks) + +if args.profile: + import os + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + profile_file = profile_dir + 'profile-citation-CHEBY-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' + timeline_file = profile_dir + 'profile-citation-CHEBY-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + os.rename('profile.log', profile_file) + os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/gat.py b/benchmark/citation/gat.py index 0f85e5144a73..52e0cea82581 100644 --- a/benchmark/citation/gat.py +++ b/benchmark/citation/gat.py @@ -19,6 +19,8 @@ parser.add_argument('--normalize_features', type=bool, default=True) parser.add_argument('--heads', type=int, default=8) parser.add_argument('--output_heads', type=int, default=1) +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference args = parser.parse_args() @@ -47,4 +49,13 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, - args.early_stopping, permute_masks) + args.early_stopping, args.inference, args.profile, permute_masks) + +if args.profile: + import os + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + profile_file = profile_dir + 'profile-citation-GAT-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' + timeline_file = profile_dir + 'profile-citation-GAT-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + os.rename('profile.log', profile_file) + os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/gcn.py b/benchmark/citation/gcn.py index b8c220f519b8..df0b3d74385e 100644 --- a/benchmark/citation/gcn.py +++ b/benchmark/citation/gcn.py @@ -17,6 +17,8 @@ parser.add_argument('--hidden', type=int, default=16) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--normalize_features', type=bool, default=True) +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference args = parser.parse_args() @@ -41,4 +43,13 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, - args.early_stopping, permute_masks) + args.early_stopping, args.inference, args.profile, permute_masks) + +if args.profile: + import os + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + profile_file = profile_dir + 'profile-citation-GCN-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' + timeline_file = profile_dir + 'profile-citation-GCN-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + os.rename('profile.log', profile_file) + os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/sgc.py b/benchmark/citation/sgc.py index b21a37e07a5c..5adf3814c189 100644 --- a/benchmark/citation/sgc.py +++ b/benchmark/citation/sgc.py @@ -16,6 +16,8 @@ parser.add_argument('--early_stopping', type=int, default=10) parser.add_argument('--normalize_features', type=bool, default=False) parser.add_argument('--K', type=int, default=2) +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference args = parser.parse_args() @@ -37,4 +39,13 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, - args.early_stopping, permute_masks) + args.early_stopping, args.inference, args.profile, permute_masks) + +if args.profile: + import os + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + profile_file = profile_dir + 'profile-citation-SGC-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' + timeline_file = profile_dir + 'profile-citation-SGC-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + os.rename('profile.log', profile_file) + os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/train_eval.py b/benchmark/citation/train_eval.py index 5e5c190d81e5..049cfb4617aa 100644 --- a/benchmark/citation/train_eval.py +++ b/benchmark/citation/train_eval.py @@ -4,10 +4,12 @@ import torch.nn.functional as F from torch import tensor from torch.optim import Adam +from torch.profiler import profile, ProfilerActivity from torch_geometric.utils import index_to_mask device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +profile_sort = "self_cuda_time_total" if torch.cuda.is_available() else "self_cpu_time_total" def random_planetoid_splits(data, num_classes): @@ -33,61 +35,103 @@ def random_planetoid_splits(data, num_classes): return data - -def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, +def trace_handler(p): + output = p.key_averages().table(sort_by=profile_sort) + print(output) + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + profile_file = profile_dir + 'profile' + '.log' + with open(profile_file, 'w') as f: + f.write(output) + f.close() + timeline_file = profile_dir + 'timeline' + '.json' + p.export_chrome_trace(timeline_file) + +def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inference, profiling, permute_masks=None, logger=None): - val_losses, accs, durations = [], [], [] - for _ in range(runs): - data = dataset[0] - if permute_masks is not None: - data = permute_masks(data, dataset.num_classes) - data = data.to(device) + if not inference: + for _ in range(runs): + data = dataset[0] + if permute_masks is not None: + data = permute_masks(data, dataset.num_classes) + data = data.to(device) + + model.to(device).reset_parameters() + optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + + t_start = time.perf_counter() + + best_val_loss = float('inf') + test_acc = 0 + val_loss_history = [] - model.to(device).reset_parameters() - optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + for epoch in range(1, epochs + 1): + train(model, optimizer, data) + eval_info = evaluate(model, data) + eval_info['epoch'] = epoch - if torch.cuda.is_available(): - torch.cuda.synchronize() + if logger is not None: + logger(eval_info) - t_start = time.perf_counter() + if eval_info['val_loss'] < best_val_loss: + best_val_loss = eval_info['val_loss'] + test_acc = eval_info['test_acc'] - best_val_loss = float('inf') - test_acc = 0 - val_loss_history = [] + val_loss_history.append(eval_info['val_loss']) + if early_stopping > 0 and epoch > epochs // 2: + tmp = tensor(val_loss_history[-(early_stopping + 1):-1]) + if eval_info['val_loss'] > tmp.mean().item(): + break - for epoch in range(1, epochs + 1): - train(model, optimizer, data) - eval_info = evaluate(model, data) - eval_info['epoch'] = epoch + if torch.cuda.is_available(): + torch.cuda.synchronize() - if logger is not None: - logger(eval_info) + t_end = time.perf_counter() - if eval_info['val_loss'] < best_val_loss: - best_val_loss = eval_info['val_loss'] - test_acc = eval_info['test_acc'] + val_losses.append(best_val_loss) + accs.append(test_acc) + durations.append(t_end - t_start) + loss, acc, duration = tensor(val_losses), tensor(accs), tensor(durations) - val_loss_history.append(eval_info['val_loss']) - if early_stopping > 0 and epoch > epochs // 2: - tmp = tensor(val_loss_history[-(early_stopping + 1):-1]) - if eval_info['val_loss'] > tmp.mean().item(): - break + print(f'Val Loss: {float(loss.mean()):.4f}, ' + f'Test Accuracy: {float(acc.mean()):.3f} ± {float(acc.std()):.3f}, ' + f'Duration: {float(duration.mean()):.3f}s') + else: + for i in range(runs): + data = dataset[0] + if permute_masks is not None: + data = permute_masks(data, dataset.num_classes) + data = data.to(device) - if torch.cuda.is_available(): - torch.cuda.synchronize() + model.to(device).reset_parameters() - t_end = time.perf_counter() + if torch.cuda.is_available(): + torch.cuda.synchronize() - val_losses.append(best_val_loss) - accs.append(test_acc) - durations.append(t_end - t_start) + t_start = time.perf_counter() - loss, acc, duration = tensor(val_losses), tensor(accs), tensor(durations) + for epoch in range(1, epochs + 1): + if profiling and i == int(runs / 2) and epoch == int(epochs / 2): + with profile(activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA], + on_trace_ready=trace_handler) as p: + test(model, data) + p.step() + else: + test(model, data) - print(f'Val Loss: {float(loss.mean()):.4f}, ' - f'Test Accuracy: {float(acc.mean()):.3f} ± {float(acc.std()):.3f}, ' - f'Duration: {float(duration.mean()):.3f}') + if torch.cuda.is_available(): + torch.cuda.synchronize() + + t_end = time.perf_counter() + durations.append(t_end - t_start) + + duration = tensor(durations) + print(f'Inference Duration: {float(duration.mean()):.3f}s') def train(model, optimizer, data): @@ -116,3 +160,8 @@ def evaluate(model, data): outs[f'{key}_acc'] = acc return outs + +def test(model, data): + model.eval() + with torch.no_grad(): + logits = model(data) From 064d52923a5168ab2e044435c822913020d15061 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Wed, 29 Jun 2022 08:50:42 +0800 Subject: [PATCH 02/24] Print end-to-end time of inference --- benchmark/citation/appnp.py | 7 +++++-- benchmark/citation/arma.py | 7 +++++-- benchmark/citation/cheb.py | 7 +++++-- benchmark/citation/gat.py | 7 +++++-- benchmark/citation/gcn.py | 7 +++++-- benchmark/citation/run.sh | 36 ++++++++++++++++++++++++++++++++ benchmark/citation/sgc.py | 7 +++++-- benchmark/citation/train_eval.py | 13 ------------ 8 files changed, 66 insertions(+), 25 deletions(-) diff --git a/benchmark/citation/appnp.py b/benchmark/citation/appnp.py index 469240d2c307..97a28699589a 100644 --- a/benchmark/citation/appnp.py +++ b/benchmark/citation/appnp.py @@ -1,4 +1,5 @@ import argparse +import time import torch import torch.nn.functional as F @@ -48,14 +49,16 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None +t_start = time.time() run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) +t_end = time.time() +duration = t_end - t_start +print("appnp-", args.dataset, "-", args.random_splits, ": End-to-End time: ", duration, " s") if args.profile: import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - profile_file = profile_dir + 'profile-citation-APPNP-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' timeline_file = profile_dir + 'profile-citation-APPNP-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' - os.rename('profile.log', profile_file) os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/arma.py b/benchmark/citation/arma.py index 31c39b4a9e9a..103aca88c0c5 100644 --- a/benchmark/citation/arma.py +++ b/benchmark/citation/arma.py @@ -1,4 +1,5 @@ import argparse +import time import torch import torch.nn.functional as F @@ -50,14 +51,16 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None +t_start = time.time() run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) +t_end = time.time() +duration = t_end - t_start +print("arma-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - profile_file = profile_dir + 'profile-citation-ARMA-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' timeline_file = profile_dir + 'profile-citation-ARMA-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' - os.rename('profile.log', profile_file) os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/cheb.py b/benchmark/citation/cheb.py index ba56dd323522..7a8788a704fc 100644 --- a/benchmark/citation/cheb.py +++ b/benchmark/citation/cheb.py @@ -1,4 +1,5 @@ import argparse +import time import torch import torch.nn.functional as F @@ -43,14 +44,16 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None +t_start = time.time() run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) +t_end = time.time() +duration = t_end - t_start +print("cheby-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - profile_file = profile_dir + 'profile-citation-CHEBY-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' timeline_file = profile_dir + 'profile-citation-CHEBY-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' - os.rename('profile.log', profile_file) os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/gat.py b/benchmark/citation/gat.py index 52e0cea82581..551375534355 100644 --- a/benchmark/citation/gat.py +++ b/benchmark/citation/gat.py @@ -1,4 +1,5 @@ import argparse +import time import torch import torch.nn.functional as F @@ -48,14 +49,16 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None +t_start = time.time() run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) +t_end = time.time() +duration = t_end - t_start +print("gat-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - profile_file = profile_dir + 'profile-citation-GAT-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' timeline_file = profile_dir + 'profile-citation-GAT-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' - os.rename('profile.log', profile_file) os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/gcn.py b/benchmark/citation/gcn.py index df0b3d74385e..7bdbd2c1b64a 100644 --- a/benchmark/citation/gcn.py +++ b/benchmark/citation/gcn.py @@ -1,4 +1,5 @@ import argparse +import time import torch import torch.nn.functional as F @@ -42,14 +43,16 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None +t_start = time.time() run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) +t_end = time.time() +duration = t_end - t_start +print("gcn-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - profile_file = profile_dir + 'profile-citation-GCN-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' timeline_file = profile_dir + 'profile-citation-GCN-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' - os.rename('profile.log', profile_file) os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/run.sh b/benchmark/citation/run.sh index dc584555fe7a..6e4288f678e1 100755 --- a/benchmark/citation/run.sh +++ b/benchmark/citation/run.sh @@ -6,26 +6,38 @@ echo "====" echo "GCN" python gcn.py --dataset=Cora python gcn.py --dataset=Cora --random_splits=True +python gcn.py --dataset=Cora --inference=True --profile=True +python gcn.py --dataset=Cora --random_splits=True --inference=True --profile=True echo "GAT" python gat.py --dataset=Cora python gat.py --dataset=Cora --random_splits=True +python gat.py --dataset=Cora --inference=True --profile=True +python gat.py --dataset=Cora --random_splits=True --inference=True --profile=True echo "Cheby" python cheb.py --dataset=Cora --num_hops=3 python cheb.py --dataset=Cora --num_hops=3 --random_splits=True +python cheb.py --dataset=Cora --num_hops=3 --inference=True --profile=True +python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference=True --profile=True echo "SGC" python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference=True --profile=True +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference=True --profile=True echo "ARMA" python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True +python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference=True --profile=True +python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True --profile=True echo "APPNP" python appnp.py --dataset=Cora --alpha=0.1 python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True +python appnp.py --dataset=Cora --alpha=0.1 --inference=True --profile=True +python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference=True --profile=True echo "CiteSeer" echo "========" @@ -33,26 +45,38 @@ echo "========" echo "GCN" python gcn.py --dataset=CiteSeer python gcn.py --dataset=CiteSeer --random_splits=True +python gcn.py --dataset=CiteSeer --inference=True --profile=True +python gcn.py --dataset=CiteSeer --random_splits=True --inference=True --profile=True echo "GAT" python gat.py --dataset=CiteSeer python gat.py --dataset=CiteSeer --random_splits=True +python gat.py --dataset=CiteSeer --inference=True --profile=True +python gat.py --dataset=CiteSeer --random_splits=True --inference=True --profile=True echo "Cheby" python cheb.py --dataset=CiteSeer --num_hops=2 python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True +python cheb.py --dataset=CiteSeer --num_hops=2 --inference=True --profile=True +python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference=True --profile=True echo "SGC" python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference=True --profile=True +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference=True --profile=True echo "ARMA" python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference=True --profile=True +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True --profile=True echo "APPNP" python appnp.py --dataset=CiteSeer --alpha=0.1 python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True +python appnp.py --dataset=CiteSeer --alpha=0.1 --inference=True --profile=True +python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference=True --profile=True echo "PubMed" echo "======" @@ -60,23 +84,35 @@ echo "======" echo "GCN" python gcn.py --dataset=PubMed python gcn.py --dataset=PubMed --random_splits=True +python gcn.py --dataset=PubMed --inference=True --profile=True +python gcn.py --dataset=PubMed --random_splits=True --inference=True --profile=True echo "GAT" python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference=True --profile=True +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference=True --profile=True echo "Cheby" python cheb.py --dataset=PubMed --num_hops=2 python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True +python cheb.py --dataset=PubMed --num_hops=2 --inference=True --profile=True +python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference=True --profile=True echo "SGC" python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference=True --profile=True +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference=True --profile=True echo "ARMA" python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference=True --profile=True +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference=True --profile=True echo "APPNP" python appnp.py --dataset=PubMed --alpha=0.1 python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True +python appnp.py --dataset=PubMed --alpha=0.1 --inference=True --profile=True +python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference=True --profile=True diff --git a/benchmark/citation/sgc.py b/benchmark/citation/sgc.py index 5adf3814c189..c4b769e80899 100644 --- a/benchmark/citation/sgc.py +++ b/benchmark/citation/sgc.py @@ -1,4 +1,5 @@ import argparse +import time import torch import torch.nn.functional as F @@ -38,14 +39,16 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None +t_start = time.time() run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) +t_end = time.time() +duration = t_end - t_start +print("sgc-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - profile_file = profile_dir + 'profile-citation-SGC-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.log' timeline_file = profile_dir + 'profile-citation-SGC-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' - os.rename('profile.log', profile_file) os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/train_eval.py b/benchmark/citation/train_eval.py index 049cfb4617aa..5390a7bfc5fd 100644 --- a/benchmark/citation/train_eval.py +++ b/benchmark/citation/train_eval.py @@ -40,10 +40,6 @@ def trace_handler(p): print(output) import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - profile_file = profile_dir + 'profile' + '.log' - with open(profile_file, 'w') as f: - f.write(output) - f.close() timeline_file = profile_dir + 'timeline' + '.json' p.export_chrome_trace(timeline_file) @@ -112,8 +108,6 @@ def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inferenc if torch.cuda.is_available(): torch.cuda.synchronize() - t_start = time.perf_counter() - for epoch in range(1, epochs + 1): if profiling and i == int(runs / 2) and epoch == int(epochs / 2): with profile(activities=[ @@ -127,13 +121,6 @@ def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inferenc if torch.cuda.is_available(): torch.cuda.synchronize() - t_end = time.perf_counter() - durations.append(t_end - t_start) - - duration = tensor(durations) - print(f'Inference Duration: {float(duration.mean()):.3f}s') - - def train(model, optimizer, data): model.train() optimizer.zero_grad() From fcd08fb5cd2f6ef042ecfccba3a3770e5b1d5194 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Wed, 29 Jun 2022 13:03:44 +0800 Subject: [PATCH 03/24] Print end-to-end time of one epoch --- benchmark/citation/appnp.py | 6 +----- benchmark/citation/arma.py | 6 +----- benchmark/citation/cheb.py | 6 +----- benchmark/citation/gat.py | 6 +----- benchmark/citation/gcn.py | 6 +----- benchmark/citation/sgc.py | 6 +----- benchmark/citation/train_eval.py | 31 +++++++++++++++++++------------ 7 files changed, 25 insertions(+), 42 deletions(-) diff --git a/benchmark/citation/appnp.py b/benchmark/citation/appnp.py index 97a28699589a..a0812060bbc1 100644 --- a/benchmark/citation/appnp.py +++ b/benchmark/citation/appnp.py @@ -1,5 +1,4 @@ import argparse -import time import torch import torch.nn.functional as F @@ -49,12 +48,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -t_start = time.time() +print("appnp-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) -t_end = time.time() -duration = t_end - t_start -print("appnp-", args.dataset, "-", args.random_splits, ": End-to-End time: ", duration, " s") if args.profile: import os diff --git a/benchmark/citation/arma.py b/benchmark/citation/arma.py index 103aca88c0c5..43d587705422 100644 --- a/benchmark/citation/arma.py +++ b/benchmark/citation/arma.py @@ -1,5 +1,4 @@ import argparse -import time import torch import torch.nn.functional as F @@ -51,12 +50,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -t_start = time.time() +print("arma-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) -t_end = time.time() -duration = t_end - t_start -print("arma-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os diff --git a/benchmark/citation/cheb.py b/benchmark/citation/cheb.py index 7a8788a704fc..0ca8d4780f8f 100644 --- a/benchmark/citation/cheb.py +++ b/benchmark/citation/cheb.py @@ -1,5 +1,4 @@ import argparse -import time import torch import torch.nn.functional as F @@ -44,12 +43,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -t_start = time.time() +print("cheby-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) -t_end = time.time() -duration = t_end - t_start -print("cheby-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os diff --git a/benchmark/citation/gat.py b/benchmark/citation/gat.py index 551375534355..4f8b2027be12 100644 --- a/benchmark/citation/gat.py +++ b/benchmark/citation/gat.py @@ -1,5 +1,4 @@ import argparse -import time import torch import torch.nn.functional as F @@ -49,12 +48,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -t_start = time.time() +print("gat-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) -t_end = time.time() -duration = t_end - t_start -print("gat-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os diff --git a/benchmark/citation/gcn.py b/benchmark/citation/gcn.py index 7bdbd2c1b64a..b84b91133e4b 100644 --- a/benchmark/citation/gcn.py +++ b/benchmark/citation/gcn.py @@ -1,5 +1,4 @@ import argparse -import time import torch import torch.nn.functional as F @@ -43,12 +42,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -t_start = time.time() +print("gcn-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) -t_end = time.time() -duration = t_end - t_start -print("gcn-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os diff --git a/benchmark/citation/sgc.py b/benchmark/citation/sgc.py index c4b769e80899..4d836c824c9d 100644 --- a/benchmark/citation/sgc.py +++ b/benchmark/citation/sgc.py @@ -1,5 +1,4 @@ import argparse -import time import torch import torch.nn.functional as F @@ -39,12 +38,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -t_start = time.time() +print("gcn-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) -t_end = time.time() -duration = t_end - t_start -print("sgc-{}-{}: End-to-End time: {} s".format(args.dataset, args.random_splits, duration)) if args.profile: import os diff --git a/benchmark/citation/train_eval.py b/benchmark/citation/train_eval.py index 5390a7bfc5fd..7f095b5e4022 100644 --- a/benchmark/citation/train_eval.py +++ b/benchmark/citation/train_eval.py @@ -105,22 +105,29 @@ def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inferenc model.to(device).reset_parameters() - if torch.cuda.is_available(): - torch.cuda.synchronize() - for epoch in range(1, epochs + 1): - if profiling and i == int(runs / 2) and epoch == int(epochs / 2): - with profile(activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA], - on_trace_ready=trace_handler) as p: + if i == int(runs / 2) and epoch == int(epochs / 2): + if profiling: + with profile(activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA], + on_trace_ready=trace_handler) as p: + test(model, data) + p.step() + else: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + test(model, data) - p.step() + + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print("End-to-End time: {} s".format(duration), flush=True) else: test(model, data) - if torch.cuda.is_available(): - torch.cuda.synchronize() - def train(model, optimizer, data): model.train() optimizer.zero_grad() @@ -151,4 +158,4 @@ def evaluate(model, data): def test(model, data): model.eval() with torch.no_grad(): - logits = model(data) + model(data) From 0a0d34980efd84f1fe0db7920ffef8fb8df76efa Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Thu, 30 Jun 2022 14:16:23 +0800 Subject: [PATCH 04/24] Add inference.sh --- benchmark/citation/inference.sh | 118 ++++++++++++++++++++++++++++++++ benchmark/citation/run.sh | 36 ---------- benchmark/citation/sgc.py | 2 +- 3 files changed, 119 insertions(+), 37 deletions(-) create mode 100755 benchmark/citation/inference.sh diff --git a/benchmark/citation/inference.sh b/benchmark/citation/inference.sh new file mode 100755 index 000000000000..7c8180573bee --- /dev/null +++ b/benchmark/citation/inference.sh @@ -0,0 +1,118 @@ +#!/bin/sh + +echo "Cora" +echo "====" + +echo "GCN" +python gcn.py --dataset=Cora --inference=True +python gcn.py --dataset=Cora --random_splits=True --inference=True +python gcn.py --dataset=Cora --inference=True --profile=True +python gcn.py --dataset=Cora --random_splits=True --inference=True --profile=True + +echo "GAT" +python gat.py --dataset=Cora --inference=True +python gat.py --dataset=Cora --random_splits=True --inference=True +python gat.py --dataset=Cora --inference=True --profile=True +python gat.py --dataset=Cora --random_splits=True --inference=True --profile=True + +echo "Cheby" +python cheb.py --dataset=Cora --num_hops=3 --inference=True +python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference=True +python cheb.py --dataset=Cora --num_hops=3 --inference=True --profile=True +python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference=True --profile=True + +echo "SGC" +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference=True +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference=True +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference=True --profile=True +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference=True --profile=True + +echo "ARMA" +python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference=True +python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True +python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference=True --profile=True +python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True --profile=True + +echo "APPNP" +python appnp.py --dataset=Cora --alpha=0.1 --inference=True +python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference=True +python appnp.py --dataset=Cora --alpha=0.1 --inference=True --profile=True +python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference=True --profile=True + +echo "CiteSeer" +echo "========" + +echo "GCN" +python gcn.py --dataset=CiteSeer --inference=True +python gcn.py --dataset=CiteSeer --random_splits=True --inference=True +python gcn.py --dataset=CiteSeer --inference=True --profile=True +python gcn.py --dataset=CiteSeer --random_splits=True --inference=True --profile=True + +echo "GAT" +python gat.py --dataset=CiteSeer --inference=True +python gat.py --dataset=CiteSeer --random_splits=True --inference=True +python gat.py --dataset=CiteSeer --inference=True --profile=True +python gat.py --dataset=CiteSeer --random_splits=True --inference=True --profile=True + +echo "Cheby" +python cheb.py --dataset=CiteSeer --num_hops=2 --inference=True +python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference=True +python cheb.py --dataset=CiteSeer --num_hops=2 --inference=True --profile=True +python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference=True --profile=True + +echo "SGC" +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference=True +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference=True +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference=True --profile=True +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference=True --profile=True + +echo "ARMA" +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference=True +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference=True --profile=True +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True --profile=True + +echo "APPNP" +python appnp.py --dataset=CiteSeer --alpha=0.1 --inference=True +python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference=True +python appnp.py --dataset=CiteSeer --alpha=0.1 --inference=True --profile=True +python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference=True --profile=True + +echo "PubMed" +echo "======" + +echo "GCN" +python gcn.py --dataset=PubMed --inference=True +python gcn.py --dataset=PubMed --random_splits=True --inference=True +python gcn.py --dataset=PubMed --inference=True --profile=True +python gcn.py --dataset=PubMed --random_splits=True --inference=True --profile=True + +echo "GAT" +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference=True +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference=True +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference=True --profile=True +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference=True --profile=True + +echo "Cheby" +python cheb.py --dataset=PubMed --num_hops=2 --inference=True +python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference=True +python cheb.py --dataset=PubMed --num_hops=2 --inference=True --profile=True +python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference=True --profile=True + +echo "SGC" +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference=True +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference=True +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference=True --profile=True +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference=True --profile=True + +echo "ARMA" +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference=True +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference=True +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference=True --profile=True +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference=True --profile=True + +echo "APPNP" +python appnp.py --dataset=PubMed --alpha=0.1 --inference=True +python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference=True +python appnp.py --dataset=PubMed --alpha=0.1 --inference=True --profile=True +python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference=True --profile=True diff --git a/benchmark/citation/run.sh b/benchmark/citation/run.sh index 6e4288f678e1..dc584555fe7a 100755 --- a/benchmark/citation/run.sh +++ b/benchmark/citation/run.sh @@ -6,38 +6,26 @@ echo "====" echo "GCN" python gcn.py --dataset=Cora python gcn.py --dataset=Cora --random_splits=True -python gcn.py --dataset=Cora --inference=True --profile=True -python gcn.py --dataset=Cora --random_splits=True --inference=True --profile=True echo "GAT" python gat.py --dataset=Cora python gat.py --dataset=Cora --random_splits=True -python gat.py --dataset=Cora --inference=True --profile=True -python gat.py --dataset=Cora --random_splits=True --inference=True --profile=True echo "Cheby" python cheb.py --dataset=Cora --num_hops=3 python cheb.py --dataset=Cora --num_hops=3 --random_splits=True -python cheb.py --dataset=Cora --num_hops=3 --inference=True --profile=True -python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference=True --profile=True echo "SGC" python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True -python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference=True --profile=True -python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference=True --profile=True echo "ARMA" python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True -python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference=True --profile=True -python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True --profile=True echo "APPNP" python appnp.py --dataset=Cora --alpha=0.1 python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True -python appnp.py --dataset=Cora --alpha=0.1 --inference=True --profile=True -python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference=True --profile=True echo "CiteSeer" echo "========" @@ -45,38 +33,26 @@ echo "========" echo "GCN" python gcn.py --dataset=CiteSeer python gcn.py --dataset=CiteSeer --random_splits=True -python gcn.py --dataset=CiteSeer --inference=True --profile=True -python gcn.py --dataset=CiteSeer --random_splits=True --inference=True --profile=True echo "GAT" python gat.py --dataset=CiteSeer python gat.py --dataset=CiteSeer --random_splits=True -python gat.py --dataset=CiteSeer --inference=True --profile=True -python gat.py --dataset=CiteSeer --random_splits=True --inference=True --profile=True echo "Cheby" python cheb.py --dataset=CiteSeer --num_hops=2 python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True -python cheb.py --dataset=CiteSeer --num_hops=2 --inference=True --profile=True -python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference=True --profile=True echo "SGC" python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True -python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference=True --profile=True -python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference=True --profile=True echo "ARMA" python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True -python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference=True --profile=True -python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True --profile=True echo "APPNP" python appnp.py --dataset=CiteSeer --alpha=0.1 python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True -python appnp.py --dataset=CiteSeer --alpha=0.1 --inference=True --profile=True -python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference=True --profile=True echo "PubMed" echo "======" @@ -84,35 +60,23 @@ echo "======" echo "GCN" python gcn.py --dataset=PubMed python gcn.py --dataset=PubMed --random_splits=True -python gcn.py --dataset=PubMed --inference=True --profile=True -python gcn.py --dataset=PubMed --random_splits=True --inference=True --profile=True echo "GAT" python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True -python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference=True --profile=True -python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference=True --profile=True echo "Cheby" python cheb.py --dataset=PubMed --num_hops=2 python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True -python cheb.py --dataset=PubMed --num_hops=2 --inference=True --profile=True -python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference=True --profile=True echo "SGC" python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True -python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference=True --profile=True -python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference=True --profile=True echo "ARMA" python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True -python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference=True --profile=True -python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference=True --profile=True echo "APPNP" python appnp.py --dataset=PubMed --alpha=0.1 python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True -python appnp.py --dataset=PubMed --alpha=0.1 --inference=True --profile=True -python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference=True --profile=True diff --git a/benchmark/citation/sgc.py b/benchmark/citation/sgc.py index 4d836c824c9d..936879a81824 100644 --- a/benchmark/citation/sgc.py +++ b/benchmark/citation/sgc.py @@ -38,7 +38,7 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -print("gcn-{}-{}:".format(args.dataset, args.random_splits), end=' ') +print("sgc-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) From 5eb049122aa798a1eb51539b63a221c7884f3227 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Fri, 1 Jul 2022 10:56:22 +0800 Subject: [PATCH 05/24] Add inference and profile for to_hetero_mag --- examples/hetero/to_hetero_mag.py | 51 ++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/examples/hetero/to_hetero_mag.py b/examples/hetero/to_hetero_mag.py index 6605038c9af3..69c2e97ecd74 100644 --- a/examples/hetero/to_hetero_mag.py +++ b/examples/hetero/to_hetero_mag.py @@ -1,10 +1,12 @@ import argparse import os.path as osp +import time import torch import torch.nn.functional as F from torch.nn import ReLU from tqdm import tqdm +from torch.profiler import profile, ProfilerActivity import torch_geometric.transforms as T from torch_geometric.datasets import OGB_MAG @@ -13,9 +15,12 @@ parser = argparse.ArgumentParser() parser.add_argument('--use_hgt_loader', action='store_true') +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +profile_sort = "self_cuda_time_total" if torch.cuda.is_available() else "self_cpu_time_total" path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/OGB') transform = T.ToUndirected(merge=True) @@ -48,6 +53,13 @@ ]) model = to_hetero(model, data.metadata(), aggr='sum').to(device) +def trace_handler(p): + output = p.key_averages().table(sort_by=profile_sort) + print(output) + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + timeline_file = profile_dir + 'timeline-to-hetero-mag' + '.json' + p.export_chrome_trace(timeline_file) @torch.no_grad() def init_params(): @@ -92,11 +104,40 @@ def test(loader): return total_correct / total_examples +@torch.no_grad() +def inference(loader): + model.eval() + for batch in tqdm(loader): + batch = batch.to(device, 'edge_index') + batch_size = batch['paper'].batch_size + model(batch.x_dict, batch.edge_index_dict) init_params() # Initialize parameters. -optimizer = torch.optim.Adam(model.parameters(), lr=0.01) +if not args.inference: + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) -for epoch in range(1, 21): - loss = train() - val_acc = test(val_loader) - print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_acc:.4f}') + for epoch in range(1, 21): + loss = train() + val_acc = test(val_loader) + print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_acc:.4f}') +else: + for epoch in range(1, 21): + if epoch == 20: + if args.profile: + with profile(activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA], + on_trace_ready=trace_handler) as p: + inference(val_loader) + p.step() + else: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + inference(val_loader) + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print("End-to-End time: {} s".format(duration), flush=True) + else: + inference(val_loader) From ac33b480c315c17b3ea679c3f59418362efa9ad8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Jul 2022 04:46:31 +0000 Subject: [PATCH 06/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmark/citation/appnp.py | 6 ++++-- benchmark/citation/arma.py | 6 ++++-- benchmark/citation/cheb.py | 6 ++++-- benchmark/citation/gat.py | 6 ++++-- benchmark/citation/gcn.py | 6 ++++-- benchmark/citation/sgc.py | 6 ++++-- benchmark/citation/train_eval.py | 32 +++++++++++++++++++++----------- examples/hetero/to_hetero_mag.py | 23 +++++++++++++++-------- 8 files changed, 60 insertions(+), 31 deletions(-) diff --git a/benchmark/citation/appnp.py b/benchmark/citation/appnp.py index a0812060bbc1..4a9875921b2d 100644 --- a/benchmark/citation/appnp.py +++ b/benchmark/citation/appnp.py @@ -21,7 +21,8 @@ parser.add_argument('--K', type=int, default=10) parser.add_argument('--alpha', type=float, default=0.1) parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference args = parser.parse_args() @@ -56,5 +57,6 @@ def forward(self, data): import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-APPNP-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + timeline_file = profile_dir + 'profile-citation-APPNP-' + args.dataset + '-random_splits-' + str( + args.random_splits) + '.json' os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/arma.py b/benchmark/citation/arma.py index 43d587705422..5c65e9e0d814 100644 --- a/benchmark/citation/arma.py +++ b/benchmark/citation/arma.py @@ -22,7 +22,8 @@ parser.add_argument('--shared_weights', type=bool, default=False) parser.add_argument('--skip_dropout', type=float, default=0.75) parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference args = parser.parse_args() @@ -58,5 +59,6 @@ def forward(self, data): import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-ARMA-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + timeline_file = profile_dir + 'profile-citation-ARMA-' + args.dataset + '-random_splits-' + str( + args.random_splits) + '.json' os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/cheb.py b/benchmark/citation/cheb.py index 0ca8d4780f8f..b33c2fe2e96d 100644 --- a/benchmark/citation/cheb.py +++ b/benchmark/citation/cheb.py @@ -19,7 +19,8 @@ parser.add_argument('--normalize_features', type=bool, default=True) parser.add_argument('--num_hops', type=int, default=3) parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference args = parser.parse_args() @@ -51,5 +52,6 @@ def forward(self, data): import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-CHEBY-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + timeline_file = profile_dir + 'profile-citation-CHEBY-' + args.dataset + '-random_splits-' + str( + args.random_splits) + '.json' os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/gat.py b/benchmark/citation/gat.py index 4f8b2027be12..0ac29696a416 100644 --- a/benchmark/citation/gat.py +++ b/benchmark/citation/gat.py @@ -20,7 +20,8 @@ parser.add_argument('--heads', type=int, default=8) parser.add_argument('--output_heads', type=int, default=1) parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference args = parser.parse_args() @@ -56,5 +57,6 @@ def forward(self, data): import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-GAT-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + timeline_file = profile_dir + 'profile-citation-GAT-' + args.dataset + '-random_splits-' + str( + args.random_splits) + '.json' os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/gcn.py b/benchmark/citation/gcn.py index b84b91133e4b..d1f7683e1c28 100644 --- a/benchmark/citation/gcn.py +++ b/benchmark/citation/gcn.py @@ -18,7 +18,8 @@ parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--normalize_features', type=bool, default=True) parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference args = parser.parse_args() @@ -50,5 +51,6 @@ def forward(self, data): import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-GCN-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + timeline_file = profile_dir + 'profile-citation-GCN-' + args.dataset + '-random_splits-' + str( + args.random_splits) + '.json' os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/sgc.py b/benchmark/citation/sgc.py index 936879a81824..5ad43c3ada59 100644 --- a/benchmark/citation/sgc.py +++ b/benchmark/citation/sgc.py @@ -17,7 +17,8 @@ parser.add_argument('--normalize_features', type=bool, default=False) parser.add_argument('--K', type=int, default=2) parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference args = parser.parse_args() @@ -46,5 +47,6 @@ def forward(self, data): import os import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-SGC-' + args.dataset + '-random_splits-' + str(args.random_splits) + '.json' + timeline_file = profile_dir + 'profile-citation-SGC-' + args.dataset + '-random_splits-' + str( + args.random_splits) + '.json' os.rename('timeline.json', timeline_file) diff --git a/benchmark/citation/train_eval.py b/benchmark/citation/train_eval.py index 7f095b5e4022..2710109f87ef 100644 --- a/benchmark/citation/train_eval.py +++ b/benchmark/citation/train_eval.py @@ -4,12 +4,13 @@ import torch.nn.functional as F from torch import tensor from torch.optim import Adam -from torch.profiler import profile, ProfilerActivity +from torch.profiler import ProfilerActivity, profile from torch_geometric.utils import index_to_mask device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -profile_sort = "self_cuda_time_total" if torch.cuda.is_available() else "self_cpu_time_total" +profile_sort = "self_cuda_time_total" if torch.cuda.is_available( +) else "self_cpu_time_total" def random_planetoid_splits(data, num_classes): @@ -35,6 +36,7 @@ def random_planetoid_splits(data, num_classes): return data + def trace_handler(p): output = p.key_averages().table(sort_by=profile_sort) print(output) @@ -43,8 +45,9 @@ def trace_handler(p): timeline_file = profile_dir + 'timeline' + '.json' p.export_chrome_trace(timeline_file) -def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inference, profiling, - permute_masks=None, logger=None): + +def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, + inference, profiling, permute_masks=None, logger=None): val_losses, accs, durations = [], [], [] if not inference: for _ in range(runs): @@ -54,7 +57,8 @@ def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inferenc data = data.to(device) model.to(device).reset_parameters() - optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + optimizer = Adam(model.parameters(), lr=lr, + weight_decay=weight_decay) if torch.cuda.is_available(): torch.cuda.synchronize() @@ -91,9 +95,11 @@ def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inferenc val_losses.append(best_val_loss) accs.append(test_acc) durations.append(t_end - t_start) - loss, acc, duration = tensor(val_losses), tensor(accs), tensor(durations) + loss, acc, duration = tensor(val_losses), tensor(accs), tensor( + durations) - print(f'Val Loss: {float(loss.mean()):.4f}, ' + print( + f'Val Loss: {float(loss.mean()):.4f}, ' f'Test Accuracy: {float(acc.mean()):.3f} ± {float(acc.std()):.3f}, ' f'Duration: {float(duration.mean()):.3f}s') else: @@ -108,9 +114,10 @@ def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inferenc for epoch in range(1, epochs + 1): if i == int(runs / 2) and epoch == int(epochs / 2): if profiling: - with profile(activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA], - on_trace_ready=trace_handler) as p: + with profile( + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: test(model, data) p.step() else: @@ -124,10 +131,12 @@ def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inferenc torch.cuda.synchronize() t_end = time.time() duration = t_end - t_start - print("End-to-End time: {} s".format(duration), flush=True) + print("End-to-End time: {} s".format(duration), + flush=True) else: test(model, data) + def train(model, optimizer, data): model.train() optimizer.zero_grad() @@ -155,6 +164,7 @@ def evaluate(model, data): return outs + def test(model, data): model.eval() with torch.no_grad(): diff --git a/examples/hetero/to_hetero_mag.py b/examples/hetero/to_hetero_mag.py index 69c2e97ecd74..bbad09c1aa57 100644 --- a/examples/hetero/to_hetero_mag.py +++ b/examples/hetero/to_hetero_mag.py @@ -5,8 +5,8 @@ import torch import torch.nn.functional as F from torch.nn import ReLU +from torch.profiler import ProfilerActivity, profile from tqdm import tqdm -from torch.profiler import profile, ProfilerActivity import torch_geometric.transforms as T from torch_geometric.datasets import OGB_MAG @@ -16,11 +16,13 @@ parser = argparse.ArgumentParser() parser.add_argument('--use_hgt_loader', action='store_true') parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, default=False) # Currently support profile in inference +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -profile_sort = "self_cuda_time_total" if torch.cuda.is_available() else "self_cpu_time_total" +profile_sort = "self_cuda_time_total" if torch.cuda.is_available( +) else "self_cpu_time_total" path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/OGB') transform = T.ToUndirected(merge=True) @@ -53,6 +55,7 @@ ]) model = to_hetero(model, data.metadata(), aggr='sum').to(device) + def trace_handler(p): output = p.key_averages().table(sort_by=profile_sort) print(output) @@ -61,6 +64,7 @@ def trace_handler(p): timeline_file = profile_dir + 'timeline-to-hetero-mag' + '.json' p.export_chrome_trace(timeline_file) + @torch.no_grad() def init_params(): # Initialize lazy parameters via forwarding a single batch to the model: @@ -104,6 +108,7 @@ def test(loader): return total_correct / total_examples + @torch.no_grad() def inference(loader): model.eval() @@ -112,6 +117,7 @@ def inference(loader): batch_size = batch['paper'].batch_size model(batch.x_dict, batch.edge_index_dict) + init_params() # Initialize parameters. if not args.inference: optimizer = torch.optim.Adam(model.parameters(), lr=0.01) @@ -124,11 +130,12 @@ def inference(loader): for epoch in range(1, 21): if epoch == 20: if args.profile: - with profile(activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA], - on_trace_ready=trace_handler) as p: - inference(val_loader) - p.step() + with profile( + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: + inference(val_loader) + p.step() else: if torch.cuda.is_available(): torch.cuda.synchronize() From d5dfd35e22d400f5d8849bb275dd54443f53d3ea Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 4 Jul 2022 09:27:46 +0800 Subject: [PATCH 07/24] Add inference for pna --- examples/pna.py | 64 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/examples/pna.py b/examples/pna.py index 4697f49d7121..8f530c2fce39 100644 --- a/examples/pna.py +++ b/examples/pna.py @@ -1,15 +1,24 @@ import os.path as osp +import time +import argparse import torch import torch.nn.functional as F from torch.nn import Embedding, Linear, ModuleList, ReLU, Sequential from torch.optim.lr_scheduler import ReduceLROnPlateau +from torch.profiler import ProfilerActivity, profile from torch_geometric.datasets import ZINC from torch_geometric.loader import DataLoader from torch_geometric.nn import BatchNorm, PNAConv, global_add_pool from torch_geometric.utils import degree +parser = argparse.ArgumentParser() +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference +args = parser.parse_args() + path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ZINC') train_dataset = ZINC(path, subset=True, split='train') val_dataset = ZINC(path, subset=True, split='val') @@ -68,10 +77,20 @@ def forward(self, x, edge_index, edge_attr, batch): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = Net().to(device) +profile_sort = "self_cuda_time_total" if torch.cuda.is_available( +) else "self_cpu_time_total" optimizer = torch.optim.Adam(model.parameters(), lr=0.001) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, min_lr=0.00001) +def trace_handler(p): + output = p.key_averages().table(sort_by=profile_sort) + print(output) + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + timeline_file = profile_dir + 'timeline-to-pna' + '.json' + p.export_chrome_trace(timeline_file) + def train(epoch): model.train() @@ -99,11 +118,40 @@ def test(loader): total_error += (out.squeeze() - data.y).abs().sum().item() return total_error / len(loader.dataset) - -for epoch in range(1, 301): - loss = train(epoch) - val_mae = test(val_loader) - test_mae = test(test_loader) - scheduler.step(val_mae) - print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, ' - f'Test: {test_mae:.4f}') +@torch.no_grad() +def inference(loader): + model.eval() + for data in loader: + data = data.to(device) + model(data.x, data.edge_index, data.edge_attr, data.batch) + +if not args.inference: + for epoch in range(1, 301): + loss = train(epoch) + val_mae = test(val_loader) + test_mae = test(test_loader) + scheduler.step(val_mae) + print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, ' + f'Test: {test_mae:.4f}') +else: + for epoch in range(1, 301): + if epoch == 300: + if args.profile: + with profile( + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: + inference(test_loader) + p.step() + else: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + inference(test_loader) + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print("End-to-End time: {} s".format(duration), flush=True) + else: + inference(test_loader) From 5cb18c8ada5a07c69930ed9a2b8e58bbd0174fac Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Jul 2022 01:30:56 +0000 Subject: [PATCH 08/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- examples/pna.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/examples/pna.py b/examples/pna.py index 8f530c2fce39..c81e97eb4af9 100644 --- a/examples/pna.py +++ b/examples/pna.py @@ -1,6 +1,6 @@ +import argparse import os.path as osp import time -import argparse import torch import torch.nn.functional as F @@ -83,6 +83,7 @@ def forward(self, x, edge_index, edge_attr, batch): scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, min_lr=0.00001) + def trace_handler(p): output = p.key_averages().table(sort_by=profile_sort) print(output) @@ -118,6 +119,7 @@ def test(loader): total_error += (out.squeeze() - data.y).abs().sum().item() return total_error / len(loader.dataset) + @torch.no_grad() def inference(loader): model.eval() @@ -125,6 +127,7 @@ def inference(loader): data = data.to(device) model(data.x, data.edge_index, data.edge_attr, data.batch) + if not args.inference: for epoch in range(1, 301): loss = train(epoch) @@ -132,15 +135,15 @@ def inference(loader): test_mae = test(test_loader) scheduler.step(val_mae) print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, ' - f'Test: {test_mae:.4f}') + f'Test: {test_mae:.4f}') else: for epoch in range(1, 301): if epoch == 300: if args.profile: with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: inference(test_loader) p.step() else: From c8d3b8c2f426942bf8f58abb7bd721592613f8e6 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 4 Jul 2022 15:53:24 +0800 Subject: [PATCH 09/24] Add inference for benchmark/points/edge_cnn --- benchmark/points/edge_cnn.py | 13 +++++- benchmark/points/train_eval.py | 83 +++++++++++++++++++++++++--------- 2 files changed, 74 insertions(+), 22 deletions(-) diff --git a/benchmark/points/edge_cnn.py b/benchmark/points/edge_cnn.py index 9c4a2fa82548..40902e4a351b 100644 --- a/benchmark/points/edge_cnn.py +++ b/benchmark/points/edge_cnn.py @@ -17,6 +17,9 @@ parser.add_argument('--lr_decay_factor', type=float, default=0.5) parser.add_argument('--lr_decay_step_size', type=int, default=50) parser.add_argument('--weight_decay', type=float, default=0) +parser.add_argument('--inference', type=bool, default=False) +parser.add_argument('--profile', type=bool, + default=False) # Currently support profile in inference args = parser.parse_args() @@ -54,5 +57,13 @@ def forward(self, pos, batch): train_dataset, test_dataset = get_dataset(num_points=1024) model = Net(train_dataset.num_classes) +print("edge_cnn", end=' ') run(train_dataset, test_dataset, model, args.epochs, args.batch_size, args.lr, - args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay) + args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay, args.inference, args.profile) + +if args.profile: + import os + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + timeline_file = profile_dir + 'profile-points-edge_cnn.json' + os.rename('timeline.json', timeline_file) diff --git a/benchmark/points/train_eval.py b/benchmark/points/train_eval.py index b19833c8eb33..6a7ddf835562 100644 --- a/benchmark/points/train_eval.py +++ b/benchmark/points/train_eval.py @@ -7,10 +7,19 @@ from torch_geometric.loader import DataLoader device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +profile_sort = "self_cuda_time_total" if torch.cuda.is_available( +) else "self_cpu_time_total" +def trace_handler(p): + output = p.key_averages().table(sort_by=profile_sort) + print(output) + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + timeline_file = profile_dir + 'timeline' + '.json' + p.export_chrome_trace(timeline_file) def run(train_dataset, test_dataset, model, epochs, batch_size, lr, - lr_decay_factor, lr_decay_step_size, weight_decay): + lr_decay_factor, lr_decay_step_size, weight_decay, inference, profiling): model = model.to(device) optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) @@ -18,26 +27,52 @@ def run(train_dataset, test_dataset, model, epochs, batch_size, lr, train_loader = DataLoader(train_dataset, batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size, shuffle=False) - for epoch in range(1, epochs + 1): - if torch.cuda.is_available(): - torch.cuda.synchronize() - - t_start = time.perf_counter() - - train(model, optimizer, train_loader, device) - test_acc = test(model, test_loader, device) - - if torch.cuda.is_available(): - torch.cuda.synchronize() - - t_end = time.perf_counter() - - print(f'Epoch: {epoch:03d}, Test: {test_acc:.4f}, ' - f'Duration: {t_end - t_start:.2f}') - - if epoch % lr_decay_step_size == 0: - for param_group in optimizer.param_groups: - param_group['lr'] = lr_decay_factor * param_group['lr'] + if not inference: + for epoch in range(1, epochs + 1): + if torch.cuda.is_available(): + torch.cuda.synchronize() + + t_start = time.perf_counter() + + train(model, optimizer, train_loader, device) + test_acc = test(model, test_loader, device) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + + t_end = time.perf_counter() + + print(f'Epoch: {epoch:03d}, Test: {test_acc:.4f}, ' + f'Duration: {t_end - t_start:.2f}') + + if epoch % lr_decay_step_size == 0: + for param_group in optimizer.param_groups: + param_group['lr'] = lr_decay_factor * param_group['lr'] + else: + for epoch in range(1, epochs + 1): + if epoch == epochs: + if profiling: + with profile( + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: + inference(model, test_loader, device) + p.step() + else: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + + inference(model, test_loader, device) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print("End-to-End time: {} s".format(duration), + flush=True) + else: + inference(model, test_loader, device) def train(model, optimizer, train_loader, device): @@ -63,3 +98,9 @@ def test(model, test_loader, device): test_acc = correct / len(test_loader.dataset) return test_acc + +def inference(model, test_loader, device): + model.eval() + for data in test_loader: + data = data.to(device) + model(data.pos, data.batch) From e563390cb2e481aa17ebdc688cb20652c40c712a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Jul 2022 07:56:41 +0000 Subject: [PATCH 10/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmark/points/edge_cnn.py | 3 ++- benchmark/points/train_eval.py | 17 ++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/benchmark/points/edge_cnn.py b/benchmark/points/edge_cnn.py index 40902e4a351b..7431344ae133 100644 --- a/benchmark/points/edge_cnn.py +++ b/benchmark/points/edge_cnn.py @@ -59,7 +59,8 @@ def forward(self, pos, batch): model = Net(train_dataset.num_classes) print("edge_cnn", end=' ') run(train_dataset, test_dataset, model, args.epochs, args.batch_size, args.lr, - args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay, args.inference, args.profile) + args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay, + args.inference, args.profile) if args.profile: import os diff --git a/benchmark/points/train_eval.py b/benchmark/points/train_eval.py index 6a7ddf835562..30c52808c98e 100644 --- a/benchmark/points/train_eval.py +++ b/benchmark/points/train_eval.py @@ -10,6 +10,7 @@ profile_sort = "self_cuda_time_total" if torch.cuda.is_available( ) else "self_cpu_time_total" + def trace_handler(p): output = p.key_averages().table(sort_by=profile_sort) print(output) @@ -18,8 +19,10 @@ def trace_handler(p): timeline_file = profile_dir + 'timeline' + '.json' p.export_chrome_trace(timeline_file) + def run(train_dataset, test_dataset, model, epochs, batch_size, lr, - lr_decay_factor, lr_decay_step_size, weight_decay, inference, profiling): + lr_decay_factor, lr_decay_step_size, weight_decay, inference, + profiling): model = model.to(device) optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) @@ -43,7 +46,7 @@ def run(train_dataset, test_dataset, model, epochs, batch_size, lr, t_end = time.perf_counter() print(f'Epoch: {epoch:03d}, Test: {test_acc:.4f}, ' - f'Duration: {t_end - t_start:.2f}') + f'Duration: {t_end - t_start:.2f}') if epoch % lr_decay_step_size == 0: for param_group in optimizer.param_groups: @@ -53,9 +56,9 @@ def run(train_dataset, test_dataset, model, epochs, batch_size, lr, if epoch == epochs: if profiling: with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: inference(model, test_loader, device) p.step() else: @@ -69,8 +72,7 @@ def run(train_dataset, test_dataset, model, epochs, batch_size, lr, torch.cuda.synchronize() t_end = time.time() duration = t_end - t_start - print("End-to-End time: {} s".format(duration), - flush=True) + print("End-to-End time: {} s".format(duration), flush=True) else: inference(model, test_loader, device) @@ -99,6 +101,7 @@ def test(model, test_loader, device): return test_acc + def inference(model, test_loader, device): model.eval() for data in test_loader: From b129f9d15ee49b614e6a67c4967d81e978a90f8a Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 4 Jul 2022 16:16:10 +0800 Subject: [PATCH 11/24] Fix error --- benchmark/points/train_eval.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/benchmark/points/train_eval.py b/benchmark/points/train_eval.py index 30c52808c98e..b17b4184b8ed 100644 --- a/benchmark/points/train_eval.py +++ b/benchmark/points/train_eval.py @@ -3,6 +3,7 @@ import torch import torch.nn.functional as F from torch.optim import Adam +from torch.profiler import ProfilerActivity, profile from torch_geometric.loader import DataLoader @@ -59,14 +60,14 @@ def run(train_dataset, test_dataset, model, epochs, batch_size, lr, activities=[ ProfilerActivity.CPU, ProfilerActivity.CUDA ], on_trace_ready=trace_handler) as p: - inference(model, test_loader, device) + inference_run(model, test_loader, device) p.step() else: if torch.cuda.is_available(): torch.cuda.synchronize() t_start = time.time() - inference(model, test_loader, device) + inference_run(model, test_loader, device) if torch.cuda.is_available(): torch.cuda.synchronize() @@ -74,7 +75,7 @@ def run(train_dataset, test_dataset, model, epochs, batch_size, lr, duration = t_end - t_start print("End-to-End time: {} s".format(duration), flush=True) else: - inference(model, test_loader, device) + inference_run(model, test_loader, device) def train(model, optimizer, train_loader, device): @@ -102,7 +103,7 @@ def test(model, test_loader, device): return test_acc -def inference(model, test_loader, device): +def inference_run(model, test_loader, device): model.eval() for data in test_loader: data = data.to(device) From b80cd4107565230c1e0e852068dd3f0d3cd472be Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 11 Jul 2022 14:36:02 +0800 Subject: [PATCH 12/24] Update scripts --- benchmark/citation/appnp.py | 19 ++- benchmark/citation/arma.py | 29 ++--- benchmark/citation/cheb.py | 21 ++-- benchmark/citation/gat.py | 27 ++-- benchmark/citation/gcn.py | 21 ++-- benchmark/citation/inference.sh | 144 ++++++++++----------- benchmark/citation/sgc.py | 21 ++-- benchmark/citation/train_eval.py | 189 ++++++++++++++-------------- benchmark/points/edge_cnn.py | 13 +- benchmark/points/train_eval.py | 13 +- examples/hetero/to_hetero_mag.py | 18 +-- examples/pna.py | 18 +-- torch_geometric/profile/__init__.py | 3 + torch_geometric/profile/profile.py | 24 ++++ 14 files changed, 260 insertions(+), 300 deletions(-) diff --git a/benchmark/citation/appnp.py b/benchmark/citation/appnp.py index 4a9875921b2d..a6aa31357175 100644 --- a/benchmark/citation/appnp.py +++ b/benchmark/citation/appnp.py @@ -5,7 +5,8 @@ from citation import get_planetoid_dataset, random_planetoid_splits, run from torch.nn import Linear -from torch_geometric.nn import APPNP +from torch_geometric.nn import APPNP as Conv +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, required=True) @@ -20,9 +21,8 @@ parser.add_argument('--normalize_features', type=bool, default=True) parser.add_argument('--K', type=int, default=10) parser.add_argument('--alpha', type=float, default=0.1) -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() @@ -31,7 +31,7 @@ def __init__(self, dataset): super().__init__() self.lin1 = Linear(dataset.num_features, args.hidden) self.lin2 = Linear(args.hidden, dataset.num_classes) - self.prop1 = APPNP(args.K, args.alpha) + self.prop1 = Conv(args.K, args.alpha) def reset_parameters(self): self.lin1.reset_parameters() @@ -49,14 +49,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -print("appnp-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - import os - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-APPNP-' + args.dataset + '-random_splits-' + str( - args.random_splits) + '.json' - os.rename('timeline.json', timeline_file) + rename_profile_file('citation', Conv.__name__, args.dataset, + str(args.random_splits)) diff --git a/benchmark/citation/arma.py b/benchmark/citation/arma.py index 5c65e9e0d814..37a87a17cd60 100644 --- a/benchmark/citation/arma.py +++ b/benchmark/citation/arma.py @@ -4,7 +4,8 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import ARMAConv +from torch_geometric.nn import ARMAConv as Conv +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, required=True) @@ -21,21 +22,20 @@ parser.add_argument('--num_layers', type=int, default=1) parser.add_argument('--shared_weights', type=bool, default=False) parser.add_argument('--skip_dropout', type=float, default=0.75) -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = ARMAConv(dataset.num_features, args.hidden, - args.num_stacks, args.num_layers, - args.shared_weights, dropout=args.skip_dropout) - self.conv2 = ARMAConv(args.hidden, dataset.num_classes, - args.num_stacks, args.num_layers, - args.shared_weights, dropout=args.skip_dropout) + self.conv1 = Conv(dataset.num_features, args.hidden, args.num_stacks, + args.num_layers, args.shared_weights, + dropout=args.skip_dropout) + self.conv2 = Conv(args.hidden, dataset.num_classes, args.num_stacks, + args.num_layers, args.shared_weights, + dropout=args.skip_dropout) def reset_parameters(self): self.conv1.reset_parameters() @@ -51,14 +51,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -print("arma-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - import os - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-ARMA-' + args.dataset + '-random_splits-' + str( - args.random_splits) + '.json' - os.rename('timeline.json', timeline_file) + rename_profile_file('citation', Conv.__name__, args.dataset, + str(args.random_splits)) diff --git a/benchmark/citation/cheb.py b/benchmark/citation/cheb.py index b33c2fe2e96d..4038beab2c07 100644 --- a/benchmark/citation/cheb.py +++ b/benchmark/citation/cheb.py @@ -4,7 +4,8 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import ChebConv +from torch_geometric.nn import ChebConv as Conv +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, required=True) @@ -18,17 +19,16 @@ parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--normalize_features', type=bool, default=True) parser.add_argument('--num_hops', type=int, default=3) -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = ChebConv(dataset.num_features, args.hidden, args.num_hops) - self.conv2 = ChebConv(args.hidden, dataset.num_classes, args.num_hops) + self.conv1 = Conv(dataset.num_features, args.hidden, args.num_hops) + self.conv2 = Conv(args.hidden, dataset.num_classes, args.num_hops) def reset_parameters(self): self.conv1.reset_parameters() @@ -44,14 +44,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -print("cheby-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - import os - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-CHEBY-' + args.dataset + '-random_splits-' + str( - args.random_splits) + '.json' - os.rename('timeline.json', timeline_file) + rename_profile_file('citation', Conv.__name__, args.dataset, + str(args.random_splits)) diff --git a/benchmark/citation/gat.py b/benchmark/citation/gat.py index 0ac29696a416..bd202c8f8184 100644 --- a/benchmark/citation/gat.py +++ b/benchmark/citation/gat.py @@ -4,7 +4,8 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import GATConv +from torch_geometric.nn import GATConv as Conv +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, required=True) @@ -19,20 +20,19 @@ parser.add_argument('--normalize_features', type=bool, default=True) parser.add_argument('--heads', type=int, default=8) parser.add_argument('--output_heads', type=int, default=1) -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = GATConv(dataset.num_features, args.hidden, - heads=args.heads, dropout=args.dropout) - self.conv2 = GATConv(args.hidden * args.heads, dataset.num_classes, - heads=args.output_heads, concat=False, - dropout=args.dropout) + self.conv1 = Conv(dataset.num_features, args.hidden, heads=args.heads, + dropout=args.dropout) + self.conv2 = Conv(args.hidden * args.heads, dataset.num_classes, + heads=args.output_heads, concat=False, + dropout=args.dropout) def reset_parameters(self): self.conv1.reset_parameters() @@ -49,14 +49,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -print("gat-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - import os - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-GAT-' + args.dataset + '-random_splits-' + str( - args.random_splits) + '.json' - os.rename('timeline.json', timeline_file) + rename_profile_file('citation', Conv.__name__, args.dataset, + str(args.random_splits)) diff --git a/benchmark/citation/gcn.py b/benchmark/citation/gcn.py index d1f7683e1c28..96695f46d187 100644 --- a/benchmark/citation/gcn.py +++ b/benchmark/citation/gcn.py @@ -4,7 +4,8 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import GCNConv +from torch_geometric.nn import GCNConv as Conv +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, required=True) @@ -17,17 +18,16 @@ parser.add_argument('--hidden', type=int, default=16) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--normalize_features', type=bool, default=True) -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = GCNConv(dataset.num_features, args.hidden) - self.conv2 = GCNConv(args.hidden, dataset.num_classes) + self.conv1 = Conv(dataset.num_features, args.hidden) + self.conv2 = Conv(args.hidden, dataset.num_classes) def reset_parameters(self): self.conv1.reset_parameters() @@ -43,14 +43,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -print("gcn-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - import os - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-GCN-' + args.dataset + '-random_splits-' + str( - args.random_splits) + '.json' - os.rename('timeline.json', timeline_file) + rename_profile_file('citation', Conv.__name__, args.dataset, + str(args.random_splits)) diff --git a/benchmark/citation/inference.sh b/benchmark/citation/inference.sh index 7c8180573bee..cb9f3e8f23c6 100755 --- a/benchmark/citation/inference.sh +++ b/benchmark/citation/inference.sh @@ -4,115 +4,115 @@ echo "Cora" echo "====" echo "GCN" -python gcn.py --dataset=Cora --inference=True -python gcn.py --dataset=Cora --random_splits=True --inference=True -python gcn.py --dataset=Cora --inference=True --profile=True -python gcn.py --dataset=Cora --random_splits=True --inference=True --profile=True +python gcn.py --dataset=Cora --inference +python gcn.py --dataset=Cora --random_splits=True --inference +python gcn.py --dataset=Cora --inference --profile +python gcn.py --dataset=Cora --random_splits=True --inference --profile echo "GAT" -python gat.py --dataset=Cora --inference=True -python gat.py --dataset=Cora --random_splits=True --inference=True -python gat.py --dataset=Cora --inference=True --profile=True -python gat.py --dataset=Cora --random_splits=True --inference=True --profile=True +python gat.py --dataset=Cora --inference +python gat.py --dataset=Cora --random_splits=True --inference +python gat.py --dataset=Cora --inference --profile +python gat.py --dataset=Cora --random_splits=True --inference --profile echo "Cheby" -python cheb.py --dataset=Cora --num_hops=3 --inference=True -python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference=True -python cheb.py --dataset=Cora --num_hops=3 --inference=True --profile=True -python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference=True --profile=True +python cheb.py --dataset=Cora --num_hops=3 --inference +python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference +python cheb.py --dataset=Cora --num_hops=3 --inference --profile +python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference --profile echo "SGC" -python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference=True -python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference=True -python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference=True --profile=True -python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference=True --profile=True +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference --profile +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference --profile echo "ARMA" -python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference=True -python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True -python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference=True --profile=True -python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True --profile=True +python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference +python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference +python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference --profile +python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference --profile echo "APPNP" -python appnp.py --dataset=Cora --alpha=0.1 --inference=True -python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference=True -python appnp.py --dataset=Cora --alpha=0.1 --inference=True --profile=True -python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference=True --profile=True +python appnp.py --dataset=Cora --alpha=0.1 --inference +python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference +python appnp.py --dataset=Cora --alpha=0.1 --inference --profile +python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference --profile echo "CiteSeer" echo "========" echo "GCN" -python gcn.py --dataset=CiteSeer --inference=True -python gcn.py --dataset=CiteSeer --random_splits=True --inference=True -python gcn.py --dataset=CiteSeer --inference=True --profile=True -python gcn.py --dataset=CiteSeer --random_splits=True --inference=True --profile=True +python gcn.py --dataset=CiteSeer --inference +python gcn.py --dataset=CiteSeer --random_splits=True --inference +python gcn.py --dataset=CiteSeer --inference --profile +python gcn.py --dataset=CiteSeer --random_splits=True --inference --profile echo "GAT" -python gat.py --dataset=CiteSeer --inference=True -python gat.py --dataset=CiteSeer --random_splits=True --inference=True -python gat.py --dataset=CiteSeer --inference=True --profile=True -python gat.py --dataset=CiteSeer --random_splits=True --inference=True --profile=True +python gat.py --dataset=CiteSeer --inference +python gat.py --dataset=CiteSeer --random_splits=True --inference +python gat.py --dataset=CiteSeer --inference --profile +python gat.py --dataset=CiteSeer --random_splits=True --inference --profile echo "Cheby" -python cheb.py --dataset=CiteSeer --num_hops=2 --inference=True -python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference=True -python cheb.py --dataset=CiteSeer --num_hops=2 --inference=True --profile=True -python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference=True --profile=True +python cheb.py --dataset=CiteSeer --num_hops=2 --inference +python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference +python cheb.py --dataset=CiteSeer --num_hops=2 --inference --profile +python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference --profile echo "SGC" -python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference=True -python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference=True -python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference=True --profile=True -python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference=True --profile=True +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference --profile +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference --profile echo "ARMA" -python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference=True -python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True -python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference=True --profile=True -python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference=True --profile=True +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference --profile +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference --profile echo "APPNP" -python appnp.py --dataset=CiteSeer --alpha=0.1 --inference=True -python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference=True -python appnp.py --dataset=CiteSeer --alpha=0.1 --inference=True --profile=True -python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference=True --profile=True +python appnp.py --dataset=CiteSeer --alpha=0.1 --inference +python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference +python appnp.py --dataset=CiteSeer --alpha=0.1 --inference --profile +python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference --profile echo "PubMed" echo "======" echo "GCN" -python gcn.py --dataset=PubMed --inference=True -python gcn.py --dataset=PubMed --random_splits=True --inference=True -python gcn.py --dataset=PubMed --inference=True --profile=True -python gcn.py --dataset=PubMed --random_splits=True --inference=True --profile=True +python gcn.py --dataset=PubMed --inference +python gcn.py --dataset=PubMed --random_splits=True --inference +python gcn.py --dataset=PubMed --inference --profile +python gcn.py --dataset=PubMed --random_splits=True --inference --profile echo "GAT" -python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference=True -python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference=True -python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference=True --profile=True -python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference=True --profile=True +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference --profile +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference --profile echo "Cheby" -python cheb.py --dataset=PubMed --num_hops=2 --inference=True -python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference=True -python cheb.py --dataset=PubMed --num_hops=2 --inference=True --profile=True -python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference=True --profile=True +python cheb.py --dataset=PubMed --num_hops=2 --inference +python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference +python cheb.py --dataset=PubMed --num_hops=2 --inference --profile +python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference --profile echo "SGC" -python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference=True -python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference=True -python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference=True --profile=True -python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference=True --profile=True +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference --profile +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference --profile echo "ARMA" -python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference=True -python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference=True -python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference=True --profile=True -python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference=True --profile=True +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference --profile +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference --profile echo "APPNP" -python appnp.py --dataset=PubMed --alpha=0.1 --inference=True -python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference=True -python appnp.py --dataset=PubMed --alpha=0.1 --inference=True --profile=True -python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference=True --profile=True +python appnp.py --dataset=PubMed --alpha=0.1 --inference +python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference +python appnp.py --dataset=PubMed --alpha=0.1 --inference --profile +python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference --profile diff --git a/benchmark/citation/sgc.py b/benchmark/citation/sgc.py index 5ad43c3ada59..a0e5e062f42c 100644 --- a/benchmark/citation/sgc.py +++ b/benchmark/citation/sgc.py @@ -4,7 +4,8 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import SGConv +from torch_geometric.nn import SGConv as Conv +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, required=True) @@ -16,17 +17,16 @@ parser.add_argument('--early_stopping', type=int, default=10) parser.add_argument('--normalize_features', type=bool, default=False) parser.add_argument('--K', type=int, default=2) -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = SGConv(dataset.num_features, dataset.num_classes, - K=args.K, cached=True) + self.conv1 = Conv(dataset.num_features, dataset.num_classes, K=args.K, + cached=True) def reset_parameters(self): self.conv1.reset_parameters() @@ -39,14 +39,9 @@ def forward(self, data): dataset = get_planetoid_dataset(args.dataset, args.normalize_features) permute_masks = random_planetoid_splits if args.random_splits else None -print("sgc-{}-{}:".format(args.dataset, args.random_splits), end=' ') run(dataset, Net(dataset), args.runs, args.epochs, args.lr, args.weight_decay, args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - import os - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-citation-SGC-' + args.dataset + '-random_splits-' + str( - args.random_splits) + '.json' - os.rename('timeline.json', timeline_file) + rename_profile_file('citation', Conv.__name__, args.dataset, + str(args.random_splits)) diff --git a/benchmark/citation/train_eval.py b/benchmark/citation/train_eval.py index 2710109f87ef..9485a834e51c 100644 --- a/benchmark/citation/train_eval.py +++ b/benchmark/citation/train_eval.py @@ -6,11 +6,10 @@ from torch.optim import Adam from torch.profiler import ProfilerActivity, profile +from torch_geometric.profile import trace_handler from torch_geometric.utils import index_to_mask device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -profile_sort = "self_cuda_time_total" if torch.cuda.is_available( -) else "self_cpu_time_total" def random_planetoid_splits(data, num_classes): @@ -37,104 +36,103 @@ def random_planetoid_splits(data, num_classes): return data -def trace_handler(p): - output = p.key_averages().table(sort_by=profile_sort) - print(output) - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'timeline' + '.json' - p.export_chrome_trace(timeline_file) +def run_train(dataset, model, runs, epochs, lr, weight_decay, early_stopping, + permute_masks=None, logger=None): + val_losses, accs, durations = [], [], [] + for _ in range(runs): + data = dataset[0] + if permute_masks is not None: + data = permute_masks(data, dataset.num_classes) + data = data.to(device) + + model.to(device).reset_parameters() + optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + + t_start = time.perf_counter() + + best_val_loss = float('inf') + test_acc = 0 + val_loss_history = [] + + for epoch in range(1, epochs + 1): + train(model, optimizer, data) + eval_info = evaluate(model, data) + eval_info['epoch'] = epoch + + if logger is not None: + logger(eval_info) + + if eval_info['val_loss'] < best_val_loss: + best_val_loss = eval_info['val_loss'] + test_acc = eval_info['test_acc'] + + val_loss_history.append(eval_info['val_loss']) + if early_stopping > 0 and epoch > epochs // 2: + tmp = tensor(val_loss_history[-(early_stopping + 1):-1]) + if eval_info['val_loss'] > tmp.mean().item(): + break + + if torch.cuda.is_available(): + torch.cuda.synchronize() + + t_end = time.perf_counter() + + val_losses.append(best_val_loss) + accs.append(test_acc) + durations.append(t_end - t_start) + loss, acc, duration = tensor(val_losses), tensor(accs), tensor(durations) + + print(f'Val Loss: {float(loss.mean()):.4f}, ' + f'Test Accuracy: {float(acc.mean()):.3f} ± {float(acc.std()):.3f}, ' + f'Duration: {float(duration.mean()):.3f}s') + + +def run_inference(dataset, model, runs, epochs, profiling, permute_masks=None, + logger=None): + for i in range(runs): + data = dataset[0] + if permute_masks is not None: + data = permute_masks(data, dataset.num_classes) + data = data.to(device) + + model.to(device).reset_parameters() + + for epoch in range(1, epochs + 1): + if i == runs - 1 and epoch == epochs: + if profiling: + with profile( + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: + inference(model, data) + p.step() + else: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + + inference(model, data) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print("End-to-End time: {} s".format(duration), flush=True) + else: + inference(model, data) def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inference, profiling, permute_masks=None, logger=None): - val_losses, accs, durations = [], [], [] if not inference: - for _ in range(runs): - data = dataset[0] - if permute_masks is not None: - data = permute_masks(data, dataset.num_classes) - data = data.to(device) - - model.to(device).reset_parameters() - optimizer = Adam(model.parameters(), lr=lr, - weight_decay=weight_decay) - - if torch.cuda.is_available(): - torch.cuda.synchronize() - - t_start = time.perf_counter() - - best_val_loss = float('inf') - test_acc = 0 - val_loss_history = [] - - for epoch in range(1, epochs + 1): - train(model, optimizer, data) - eval_info = evaluate(model, data) - eval_info['epoch'] = epoch - - if logger is not None: - logger(eval_info) - - if eval_info['val_loss'] < best_val_loss: - best_val_loss = eval_info['val_loss'] - test_acc = eval_info['test_acc'] - - val_loss_history.append(eval_info['val_loss']) - if early_stopping > 0 and epoch > epochs // 2: - tmp = tensor(val_loss_history[-(early_stopping + 1):-1]) - if eval_info['val_loss'] > tmp.mean().item(): - break - - if torch.cuda.is_available(): - torch.cuda.synchronize() - - t_end = time.perf_counter() - - val_losses.append(best_val_loss) - accs.append(test_acc) - durations.append(t_end - t_start) - loss, acc, duration = tensor(val_losses), tensor(accs), tensor( - durations) - - print( - f'Val Loss: {float(loss.mean()):.4f}, ' - f'Test Accuracy: {float(acc.mean()):.3f} ± {float(acc.std()):.3f}, ' - f'Duration: {float(duration.mean()):.3f}s') + run_train(dataset, model, runs, epochs, lr, weight_decay, + early_stopping, permute_masks, logger) else: - for i in range(runs): - data = dataset[0] - if permute_masks is not None: - data = permute_masks(data, dataset.num_classes) - data = data.to(device) - - model.to(device).reset_parameters() - - for epoch in range(1, epochs + 1): - if i == int(runs / 2) and epoch == int(epochs / 2): - if profiling: - with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: - test(model, data) - p.step() - else: - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_start = time.time() - - test(model, data) - - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_end = time.time() - duration = t_end - t_start - print("End-to-End time: {} s".format(duration), - flush=True) - else: - test(model, data) + run_inference(dataset, model, runs, epochs, profiling, permute_masks, + logger) def train(model, optimizer, data): @@ -165,7 +163,8 @@ def evaluate(model, data): return outs -def test(model, data): +@torch.no_grad() +def inference(model, data): model.eval() with torch.no_grad(): model(data) diff --git a/benchmark/points/edge_cnn.py b/benchmark/points/edge_cnn.py index 7431344ae133..1541216906fb 100644 --- a/benchmark/points/edge_cnn.py +++ b/benchmark/points/edge_cnn.py @@ -9,6 +9,7 @@ from torch.nn import Sequential as Seq from torch_geometric.nn import DynamicEdgeConv, global_max_pool +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=200) @@ -17,9 +18,8 @@ parser.add_argument('--lr_decay_factor', type=float, default=0.5) parser.add_argument('--lr_decay_step_size', type=int, default=50) parser.add_argument('--weight_decay', type=float, default=0) -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() @@ -57,14 +57,9 @@ def forward(self, pos, batch): train_dataset, test_dataset = get_dataset(num_points=1024) model = Net(train_dataset.num_classes) -print("edge_cnn", end=' ') run(train_dataset, test_dataset, model, args.epochs, args.batch_size, args.lr, args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay, args.inference, args.profile) if args.profile: - import os - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'profile-points-edge_cnn.json' - os.rename('timeline.json', timeline_file) + rename_profile_file('points', 'DynamicEdgeConv') diff --git a/benchmark/points/train_eval.py b/benchmark/points/train_eval.py index b17b4184b8ed..abdd303e38dd 100644 --- a/benchmark/points/train_eval.py +++ b/benchmark/points/train_eval.py @@ -6,19 +6,9 @@ from torch.profiler import ProfilerActivity, profile from torch_geometric.loader import DataLoader +from torch_geometric.profile import trace_handler device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -profile_sort = "self_cuda_time_total" if torch.cuda.is_available( -) else "self_cpu_time_total" - - -def trace_handler(p): - output = p.key_averages().table(sort_by=profile_sort) - print(output) - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'timeline' + '.json' - p.export_chrome_trace(timeline_file) def run(train_dataset, test_dataset, model, epochs, batch_size, lr, @@ -103,6 +93,7 @@ def test(model, test_loader, device): return test_acc +@torch.no_grad() def inference_run(model, test_loader, device): model.eval() for data in test_loader: diff --git a/examples/hetero/to_hetero_mag.py b/examples/hetero/to_hetero_mag.py index bbad09c1aa57..d0b0bd4a525b 100644 --- a/examples/hetero/to_hetero_mag.py +++ b/examples/hetero/to_hetero_mag.py @@ -12,17 +12,15 @@ from torch_geometric.datasets import OGB_MAG from torch_geometric.loader import HGTLoader, NeighborLoader from torch_geometric.nn import Linear, SAGEConv, Sequential, to_hetero +from torch_geometric.profile import trace_handler parser = argparse.ArgumentParser() parser.add_argument('--use_hgt_loader', action='store_true') -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -profile_sort = "self_cuda_time_total" if torch.cuda.is_available( -) else "self_cpu_time_total" path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/OGB') transform = T.ToUndirected(merge=True) @@ -56,15 +54,6 @@ model = to_hetero(model, data.metadata(), aggr='sum').to(device) -def trace_handler(p): - output = p.key_averages().table(sort_by=profile_sort) - print(output) - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'timeline-to-hetero-mag' + '.json' - p.export_chrome_trace(timeline_file) - - @torch.no_grad() def init_params(): # Initialize lazy parameters via forwarding a single batch to the model: @@ -114,7 +103,6 @@ def inference(loader): model.eval() for batch in tqdm(loader): batch = batch.to(device, 'edge_index') - batch_size = batch['paper'].batch_size model(batch.x_dict, batch.edge_index_dict) diff --git a/examples/pna.py b/examples/pna.py index c81e97eb4af9..a7ee23bd6367 100644 --- a/examples/pna.py +++ b/examples/pna.py @@ -11,12 +11,12 @@ from torch_geometric.datasets import ZINC from torch_geometric.loader import DataLoader from torch_geometric.nn import BatchNorm, PNAConv, global_add_pool +from torch_geometric.profile import rename_profile_file, trace_handler from torch_geometric.utils import degree parser = argparse.ArgumentParser() -parser.add_argument('--inference', type=bool, default=False) -parser.add_argument('--profile', type=bool, - default=False) # Currently support profile in inference +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ZINC') @@ -77,22 +77,11 @@ def forward(self, x, edge_index, edge_attr, batch): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = Net().to(device) -profile_sort = "self_cuda_time_total" if torch.cuda.is_available( -) else "self_cpu_time_total" optimizer = torch.optim.Adam(model.parameters(), lr=0.001) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=20, min_lr=0.00001) -def trace_handler(p): - output = p.key_averages().table(sort_by=profile_sort) - print(output) - import pathlib - profile_dir = str(pathlib.Path.cwd()) + '/' - timeline_file = profile_dir + 'timeline-to-pna' + '.json' - p.export_chrome_trace(timeline_file) - - def train(epoch): model.train() @@ -146,6 +135,7 @@ def inference(loader): ], on_trace_ready=trace_handler) as p: inference(test_loader) p.step() + rename_profile_file('pna') else: if torch.cuda.is_available(): torch.cuda.synchronize() diff --git a/torch_geometric/profile/__init__.py b/torch_geometric/profile/__init__.py index 9856d4eca56f..7e0fce58c498 100644 --- a/torch_geometric/profile/__init__.py +++ b/torch_geometric/profile/__init__.py @@ -1,4 +1,5 @@ from .profile import profileit, timeit, get_stats_summary +from .profile import trace_handler, rename_profile_file from .utils import count_parameters from .utils import get_model_size from .utils import get_data_size @@ -10,6 +11,8 @@ 'profileit', 'timeit', 'get_stats_summary', + 'trace_handler', + 'rename_profile_file', 'count_parameters', 'get_model_size', 'get_data_size', diff --git a/torch_geometric/profile/profile.py b/torch_geometric/profile/profile.py index fa2477535332..f9c5ceb76eff 100644 --- a/torch_geometric/profile/profile.py +++ b/torch_geometric/profile/profile.py @@ -172,3 +172,27 @@ def std(values: List[float]): def mean(values: List[float]): return float(torch.tensor(values).mean()) + + +def trace_handler(p): + if torch.cuda.is_available(): + profile_sort = 'self_cuda_time_total' + else: + profile_sort = 'self_cpu_time_total' + output = p.key_averages().table(sort_by=profile_sort) + print(output) + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + timeline_file = profile_dir + 'timeline' + '.json' + p.export_chrome_trace(timeline_file) + + +def rename_profile_file(*args): + import os + import pathlib + profile_dir = str(pathlib.Path.cwd()) + '/' + timeline_file = profile_dir + 'profile' + for arg in args: + timeline_file += '-' + arg + timeline_file += '.json' + os.rename('timeline.json', timeline_file) From 9bd310a7e9e00c397f01ae05b4214e5eb04d73ed Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 11 Jul 2022 16:01:56 +0800 Subject: [PATCH 13/24] Add profile test to increase code coverage --- test/profile/test_profile.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/test/profile/test_profile.py b/test/profile/test_profile.py index 2019a39506fd..06bcb0f908d0 100644 --- a/test/profile/test_profile.py +++ b/test/profile/test_profile.py @@ -1,8 +1,15 @@ import torch import torch.nn.functional as F +from torch.profiler import ProfilerActivity, profile from torch_geometric.nn import GraphSAGE -from torch_geometric.profile import get_stats_summary, profileit, timeit +from torch_geometric.profile import ( + get_stats_summary, + profileit, + rename_profile_file, + timeit, + trace_handler, +) from torch_geometric.testing import withCUDA @@ -41,7 +48,11 @@ def test(model, x, edge_index, y): assert stats.nvidia_smi_free_cuda > 0 assert stats.nvidia_smi_used_cuda > 0 - _, time = test(model, data.x, data.edge_index, data.y) + with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + on_trace_ready=trace_handler) as p: + _, time = test(model, data.x, data.edge_index, data.y) + p.step() + assert time > 0 if epoch >= 2: # Warm-up @@ -56,3 +67,7 @@ def test(model, x, edge_index, y): assert stats_summary.max_active_cuda > 0 assert stats_summary.min_nvidia_smi_free_cuda > 0 assert stats_summary.max_nvidia_smi_used_cuda > 0 + + rename_profile_file('test_profile') + import os.path + assert os.path.exists('profile-test_profile.json') From 5e90e8192c71877bfaeff3934ab2b89dd76d3155 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Mon, 11 Jul 2022 16:33:58 +0800 Subject: [PATCH 14/24] Update script of points benchmark --- benchmark/points/train_eval.py | 102 ++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/benchmark/points/train_eval.py b/benchmark/points/train_eval.py index abdd303e38dd..ca0cfa6fe6fc 100644 --- a/benchmark/points/train_eval.py +++ b/benchmark/points/train_eval.py @@ -11,61 +11,73 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -def run(train_dataset, test_dataset, model, epochs, batch_size, lr, - lr_decay_factor, lr_decay_step_size, weight_decay, inference, - profiling): - - model = model.to(device) +def run_train(train_dataset, test_dataset, model, epochs, batch_size, lr, + lr_decay_factor, lr_decay_step_size, weight_decay): optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) - train_loader = DataLoader(train_dataset, batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size, shuffle=False) - if not inference: - for epoch in range(1, epochs + 1): - if torch.cuda.is_available(): - torch.cuda.synchronize() + for epoch in range(1, epochs + 1): + print("Epoch {} starts".format(epoch)) + if torch.cuda.is_available(): + torch.cuda.synchronize() - t_start = time.perf_counter() + t_start = time.perf_counter() - train(model, optimizer, train_loader, device) - test_acc = test(model, test_loader, device) + train(model, optimizer, train_loader, device) + test_acc = test(model, test_loader, device) - if torch.cuda.is_available(): - torch.cuda.synchronize() + if torch.cuda.is_available(): + torch.cuda.synchronize() - t_end = time.perf_counter() + t_end = time.perf_counter() - print(f'Epoch: {epoch:03d}, Test: {test_acc:.4f}, ' - f'Duration: {t_end - t_start:.2f}') + print(f'Epoch: {epoch:03d}, Test: {test_acc:.4f}, ' + f'Duration: {t_end - t_start:.2f}') - if epoch % lr_decay_step_size == 0: - for param_group in optimizer.param_groups: - param_group['lr'] = lr_decay_factor * param_group['lr'] - else: - for epoch in range(1, epochs + 1): - if epoch == epochs: - if profiling: - with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: - inference_run(model, test_loader, device) - p.step() - else: - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_start = time.time() - - inference_run(model, test_loader, device) - - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_end = time.time() - duration = t_end - t_start - print("End-to-End time: {} s".format(duration), flush=True) + if epoch % lr_decay_step_size == 0: + for param_group in optimizer.param_groups: + param_group['lr'] = lr_decay_factor * param_group['lr'] + + +def run_inference(test_dataset, model, epochs, batch_size, profiling): + model = model.to(device) + test_loader = DataLoader(test_dataset, batch_size, shuffle=False) + + for epoch in range(1, epochs + 1): + print("Epoch {} starts".format(epoch)) + if epoch == epochs: + if profiling: + with profile( + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: + inference(model, test_loader, device) + p.step() else: - inference_run(model, test_loader, device) + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + + inference(model, test_loader, device) + + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print("End-to-End time: {} s".format(duration), flush=True) + else: + inference(model, test_loader, device) + + +def run(train_dataset, test_dataset, model, epochs, batch_size, lr, + lr_decay_factor, lr_decay_step_size, weight_decay, inference, + profiling): + if not inference: + run_train(train_dataset, test_dataset, model, epochs, batch_size, lr, + lr_decay_factor, lr_decay_step_size, weight_decay) + else: + run_inference(test_dataset, model, epochs, batch_size, profiling) def train(model, optimizer, train_loader, device): @@ -94,7 +106,7 @@ def test(model, test_loader, device): @torch.no_grad() -def inference_run(model, test_loader, device): +def inference(model, test_loader, device): model.eval() for data in test_loader: data = data.to(device) From f888faff2734c51aa3a8e68907523c9a6e5fca6c Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Tue, 12 Jul 2022 19:45:48 +0800 Subject: [PATCH 15/24] Update script for missing rename --- examples/hetero/to_hetero_mag.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/hetero/to_hetero_mag.py b/examples/hetero/to_hetero_mag.py index d0b0bd4a525b..aa002c961ce0 100644 --- a/examples/hetero/to_hetero_mag.py +++ b/examples/hetero/to_hetero_mag.py @@ -12,7 +12,7 @@ from torch_geometric.datasets import OGB_MAG from torch_geometric.loader import HGTLoader, NeighborLoader from torch_geometric.nn import Linear, SAGEConv, Sequential, to_hetero -from torch_geometric.profile import trace_handler +from torch_geometric.profile import rename_profile_file, trace_handler parser = argparse.ArgumentParser() parser.add_argument('--use_hgt_loader', action='store_true') @@ -124,6 +124,7 @@ def inference(loader): ], on_trace_ready=trace_handler) as p: inference(val_loader) p.step() + rename_profile_file('to_hetero_mag') else: if torch.cuda.is_available(): torch.cuda.synchronize() From 55c5c42a13da47fa26e96b2f1f118822f90eb76d Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Tue, 12 Jul 2022 21:17:14 +0800 Subject: [PATCH 16/24] Update scripts according to the comments --- benchmark/citation/train_eval.py | 55 +++++++++++++++--------------- torch_geometric/profile/profile.py | 5 ++- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/benchmark/citation/train_eval.py b/benchmark/citation/train_eval.py index 9485a834e51c..88a2b6c9e96e 100644 --- a/benchmark/citation/train_eval.py +++ b/benchmark/citation/train_eval.py @@ -94,36 +94,36 @@ def run_inference(dataset, model, runs, epochs, profiling, permute_masks=None, logger=None): for i in range(runs): data = dataset[0] - if permute_masks is not None: - data = permute_masks(data, dataset.num_classes) - data = data.to(device) - - model.to(device).reset_parameters() - - for epoch in range(1, epochs + 1): - if i == runs - 1 and epoch == epochs: - if profiling: - with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: + if permute_masks is not None: + data = permute_masks(data, dataset.num_classes) + data = data.to(device) + + model.to(device).reset_parameters() + + for epoch in range(1, epochs + 1): + if i == runs - 1 and epoch == epochs: + if profiling: + with profile( + activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA + ], on_trace_ready=trace_handler) as p: + inference(model, data) + p.step() + else: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + inference(model, data) - p.step() - else: - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_start = time.time() + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print("End-to-End time: {} s".format(duration), flush=True) + else: inference(model, data) - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_end = time.time() - duration = t_end - t_start - print("End-to-End time: {} s".format(duration), flush=True) - else: - inference(model, data) - def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, inference, profiling, permute_masks=None, logger=None): @@ -166,5 +166,4 @@ def evaluate(model, data): @torch.no_grad() def inference(model, data): model.eval() - with torch.no_grad(): - model(data) + model(data) diff --git a/torch_geometric/profile/profile.py b/torch_geometric/profile/profile.py index f9c5ceb76eff..a889cfacdc2c 100644 --- a/torch_geometric/profile/profile.py +++ b/torch_geometric/profile/profile.py @@ -1,3 +1,5 @@ +import os +import pathlib from typing import Any, List, NamedTuple, Tuple import torch @@ -181,15 +183,12 @@ def trace_handler(p): profile_sort = 'self_cpu_time_total' output = p.key_averages().table(sort_by=profile_sort) print(output) - import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' timeline_file = profile_dir + 'timeline' + '.json' p.export_chrome_trace(timeline_file) def rename_profile_file(*args): - import os - import pathlib profile_dir = str(pathlib.Path.cwd()) + '/' timeline_file = profile_dir + 'profile' for arg in args: From 00bca650030adf6ffbc0103fdbb5de59e6534873 Mon Sep 17 00:00:00 2001 From: yanbing-j Date: Tue, 12 Jul 2022 22:12:31 +0800 Subject: [PATCH 17/24] Add CPU test for profile --- benchmark/citation/inference.sh | 72 ++++++++++++++++----------------- test/profile/test_profile.py | 24 ++++++++--- 2 files changed, 54 insertions(+), 42 deletions(-) diff --git a/benchmark/citation/inference.sh b/benchmark/citation/inference.sh index cb9f3e8f23c6..5d425663bb8c 100755 --- a/benchmark/citation/inference.sh +++ b/benchmark/citation/inference.sh @@ -5,114 +5,114 @@ echo "====" echo "GCN" python gcn.py --dataset=Cora --inference -python gcn.py --dataset=Cora --random_splits=True --inference +python gcn.py --dataset=Cora --random_splits --inference python gcn.py --dataset=Cora --inference --profile -python gcn.py --dataset=Cora --random_splits=True --inference --profile +python gcn.py --dataset=Cora --random_splits --inference --profile echo "GAT" python gat.py --dataset=Cora --inference -python gat.py --dataset=Cora --random_splits=True --inference +python gat.py --dataset=Cora --random_splits --inference python gat.py --dataset=Cora --inference --profile -python gat.py --dataset=Cora --random_splits=True --inference --profile +python gat.py --dataset=Cora --random_splits --inference --profile echo "Cheby" python cheb.py --dataset=Cora --num_hops=3 --inference -python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference +python cheb.py --dataset=Cora --num_hops=3 --random_splits --inference python cheb.py --dataset=Cora --num_hops=3 --inference --profile -python cheb.py --dataset=Cora --num_hops=3 --random_splits=True --inference --profile +python cheb.py --dataset=Cora --num_hops=3 --random_splits --inference --profile echo "SGC" python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference -python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits --inference python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --inference --profile -python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits=True --inference --profile +python sgc.py --dataset=Cora --K=3 --weight_decay=0.0005 --random_splits --inference --profile echo "ARMA" python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference -python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference +python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits --inference python arma.py --dataset=Cora --num_stacks=2 --num_layers=1 --shared_weights=True --inference --profile -python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference --profile +python arma.py --dataset=Cora --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits --inference --profile echo "APPNP" python appnp.py --dataset=Cora --alpha=0.1 --inference -python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference +python appnp.py --dataset=Cora --alpha=0.1 --random_splits --inference python appnp.py --dataset=Cora --alpha=0.1 --inference --profile -python appnp.py --dataset=Cora --alpha=0.1 --random_splits=True --inference --profile +python appnp.py --dataset=Cora --alpha=0.1 --random_splits --inference --profile echo "CiteSeer" echo "========" echo "GCN" python gcn.py --dataset=CiteSeer --inference -python gcn.py --dataset=CiteSeer --random_splits=True --inference +python gcn.py --dataset=CiteSeer --random_splits --inference python gcn.py --dataset=CiteSeer --inference --profile -python gcn.py --dataset=CiteSeer --random_splits=True --inference --profile +python gcn.py --dataset=CiteSeer --random_splits --inference --profile echo "GAT" python gat.py --dataset=CiteSeer --inference -python gat.py --dataset=CiteSeer --random_splits=True --inference +python gat.py --dataset=CiteSeer --random_splits --inference python gat.py --dataset=CiteSeer --inference --profile -python gat.py --dataset=CiteSeer --random_splits=True --inference --profile +python gat.py --dataset=CiteSeer --random_splits --inference --profile echo "Cheby" python cheb.py --dataset=CiteSeer --num_hops=2 --inference -python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference +python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits --inference python cheb.py --dataset=CiteSeer --num_hops=2 --inference --profile -python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits=True --inference --profile +python cheb.py --dataset=CiteSeer --num_hops=3 --random_splits --inference --profile echo "SGC" python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference -python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits --inference python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --inference --profile -python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits=True --inference --profile +python sgc.py --dataset=CiteSeer --K=2 --weight_decay=0.005 --random_splits --inference --profile echo "ARMA" python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference -python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits --inference python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --inference --profile -python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits=True --inference --profile +python arma.py --dataset=CiteSeer --num_stacks=3 --num_layers=1 --shared_weights=True --random_splits --inference --profile echo "APPNP" python appnp.py --dataset=CiteSeer --alpha=0.1 --inference -python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference +python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits --inference python appnp.py --dataset=CiteSeer --alpha=0.1 --inference --profile -python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits=True --inference --profile +python appnp.py --dataset=CiteSeer --alpha=0.1 --random_splits --inference --profile echo "PubMed" echo "======" echo "GCN" python gcn.py --dataset=PubMed --inference -python gcn.py --dataset=PubMed --random_splits=True --inference +python gcn.py --dataset=PubMed --random_splits --inference python gcn.py --dataset=PubMed --inference --profile -python gcn.py --dataset=PubMed --random_splits=True --inference --profile +python gcn.py --dataset=PubMed --random_splits --inference --profile echo "GAT" python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference -python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits --inference python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --inference --profile -python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits=True --inference --profile +python gat.py --dataset=PubMed --lr=0.01 --weight_decay=0.001 --output_heads=8 --random_splits --inference --profile echo "Cheby" python cheb.py --dataset=PubMed --num_hops=2 --inference -python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference +python cheb.py --dataset=PubMed --num_hops=2 --random_splits --inference python cheb.py --dataset=PubMed --num_hops=2 --inference --profile -python cheb.py --dataset=PubMed --num_hops=2 --random_splits=True --inference --profile +python cheb.py --dataset=PubMed --num_hops=2 --random_splits --inference --profile echo "SGC" python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference -python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits --inference python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --inference --profile -python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits=True --inference --profile +python sgc.py --dataset=PubMed --K=2 --weight_decay=0.0005 --random_splits --inference --profile echo "ARMA" python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference -python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits --inference python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0 --inference --profile -python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits=True --inference --profile +python arma.py --dataset=PubMed --num_stacks=2 --num_layers=1 --skip_dropout=0.5 --random_splits --inference --profile echo "APPNP" python appnp.py --dataset=PubMed --alpha=0.1 --inference -python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference +python appnp.py --dataset=PubMed --alpha=0.1 --random_splits --inference python appnp.py --dataset=PubMed --alpha=0.1 --inference --profile -python appnp.py --dataset=PubMed --alpha=0.1 --random_splits=True --inference --profile +python appnp.py --dataset=PubMed --alpha=0.1 --random_splits --inference --profile diff --git a/test/profile/test_profile.py b/test/profile/test_profile.py index 06bcb0f908d0..7fbd63422459 100644 --- a/test/profile/test_profile.py +++ b/test/profile/test_profile.py @@ -1,3 +1,5 @@ +import os.path + import torch import torch.nn.functional as F from torch.profiler import ProfilerActivity, profile @@ -48,11 +50,7 @@ def test(model, x, edge_index, y): assert stats.nvidia_smi_free_cuda > 0 assert stats.nvidia_smi_used_cuda > 0 - with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], - on_trace_ready=trace_handler) as p: - _, time = test(model, data.x, data.edge_index, data.y) - p.step() - + _, time = test(model, data.x, data.edge_index, data.y) assert time > 0 if epoch >= 2: # Warm-up @@ -68,6 +66,20 @@ def test(model, x, edge_index, y): assert stats_summary.min_nvidia_smi_free_cuda > 0 assert stats_summary.max_nvidia_smi_used_cuda > 0 + +def test_trace_handler(get_dataset): + dataset = get_dataset(name='PubMed') + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + data = dataset[0].to(device) + model = GraphSAGE(dataset.num_features, hidden_channels=64, num_layers=3, + out_channels=dataset.num_classes).to(device) + model.eval() + + for epoch in range(3): + print("epoch ", epoch) + with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + on_trace_ready=trace_handler) as p: + model(data.x, data.edge_index) + p.step() rename_profile_file('test_profile') - import os.path assert os.path.exists('profile-test_profile.json') From cef334d6278c1144217f2a35abb259f7762c7a45 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 13 Jul 2022 12:59:01 +0000 Subject: [PATCH 18/24] update --- benchmark/citation/appnp.py | 6 +++--- benchmark/citation/arma.py | 16 ++++++++-------- benchmark/citation/cheb.py | 8 ++++---- benchmark/citation/gat.py | 14 +++++++------- benchmark/citation/gcn.py | 8 ++++---- benchmark/citation/sgc.py | 8 ++++---- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/benchmark/citation/appnp.py b/benchmark/citation/appnp.py index f7d7d63ac6a5..03573cacbccf 100644 --- a/benchmark/citation/appnp.py +++ b/benchmark/citation/appnp.py @@ -5,7 +5,7 @@ from citation import get_planetoid_dataset, random_planetoid_splits, run from torch.nn import Linear -from torch_geometric.nn import APPNP as Conv +from torch_geometric.nn import APPNP from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() @@ -31,7 +31,7 @@ def __init__(self, dataset): super().__init__() self.lin1 = Linear(dataset.num_features, args.hidden) self.lin2 = Linear(args.hidden, dataset.num_classes) - self.prop1 = Conv(args.K, args.alpha) + self.prop1 = APPNP(args.K, args.alpha) def reset_parameters(self): self.lin1.reset_parameters() @@ -53,5 +53,5 @@ def forward(self, data): args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - rename_profile_file('citation', Conv.__name__, args.dataset, + rename_profile_file('citation', APPNP.__name__, args.dataset, str(args.random_splits)) diff --git a/benchmark/citation/arma.py b/benchmark/citation/arma.py index 482013be9878..65cc9029fe5f 100644 --- a/benchmark/citation/arma.py +++ b/benchmark/citation/arma.py @@ -4,7 +4,7 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import ARMAConv as Conv +from torch_geometric.nn import ARMAConv from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() @@ -30,12 +30,12 @@ class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = Conv(dataset.num_features, args.hidden, args.num_stacks, - args.num_layers, args.shared_weights, - dropout=args.skip_dropout) - self.conv2 = Conv(args.hidden, dataset.num_classes, args.num_stacks, - args.num_layers, args.shared_weights, - dropout=args.skip_dropout) + self.conv1 = ARMAConv(dataset.num_features, args.hidden, + args.num_stacks, args.num_layers, + args.shared_weights, dropout=args.skip_dropout) + self.conv2 = ARMAConv(args.hidden, dataset.num_classes, + args.num_stacks, args.num_layers, + args.shared_weights, dropout=args.skip_dropout) def reset_parameters(self): self.conv1.reset_parameters() @@ -55,5 +55,5 @@ def forward(self, data): args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - rename_profile_file('citation', Conv.__name__, args.dataset, + rename_profile_file('citation', ARMAConv.__name__, args.dataset, str(args.random_splits)) diff --git a/benchmark/citation/cheb.py b/benchmark/citation/cheb.py index e258b76aab84..79f0182adc2c 100644 --- a/benchmark/citation/cheb.py +++ b/benchmark/citation/cheb.py @@ -4,7 +4,7 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import ChebConv as Conv +from torch_geometric.nn import ChebConv from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() @@ -27,8 +27,8 @@ class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = Conv(dataset.num_features, args.hidden, args.num_hops) - self.conv2 = Conv(args.hidden, dataset.num_classes, args.num_hops) + self.conv1 = ChebConv(dataset.num_features, args.hidden, args.num_hops) + self.conv2 = ChebConv(args.hidden, dataset.num_classes, args.num_hops) def reset_parameters(self): self.conv1.reset_parameters() @@ -48,5 +48,5 @@ def forward(self, data): args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - rename_profile_file('citation', Conv.__name__, args.dataset, + rename_profile_file('citation', ChebConv.__name__, args.dataset, str(args.random_splits)) diff --git a/benchmark/citation/gat.py b/benchmark/citation/gat.py index 55b178c1bbae..6d171370c269 100644 --- a/benchmark/citation/gat.py +++ b/benchmark/citation/gat.py @@ -4,7 +4,7 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import GATConv as Conv +from torch_geometric.nn import GATConv from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() @@ -28,11 +28,11 @@ class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = Conv(dataset.num_features, args.hidden, heads=args.heads, - dropout=args.dropout) - self.conv2 = Conv(args.hidden * args.heads, dataset.num_classes, - heads=args.output_heads, concat=False, - dropout=args.dropout) + self.conv1 = GATConv(dataset.num_features, args.hidden, + heads=args.heads, dropout=args.dropout) + self.conv2 = GATConv(args.hidden * args.heads, dataset.num_classes, + heads=args.output_heads, concat=False, + dropout=args.dropout) def reset_parameters(self): self.conv1.reset_parameters() @@ -53,5 +53,5 @@ def forward(self, data): args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - rename_profile_file('citation', Conv.__name__, args.dataset, + rename_profile_file('citation', GATConv.__name__, args.dataset, str(args.random_splits)) diff --git a/benchmark/citation/gcn.py b/benchmark/citation/gcn.py index ae1eed53bd1a..b42b531a2fdf 100644 --- a/benchmark/citation/gcn.py +++ b/benchmark/citation/gcn.py @@ -4,7 +4,7 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import GCNConv as Conv +from torch_geometric.nn import GCNConv from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() @@ -26,8 +26,8 @@ class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = Conv(dataset.num_features, args.hidden) - self.conv2 = Conv(args.hidden, dataset.num_classes) + self.conv1 = GCNConv(dataset.num_features, args.hidden) + self.conv2 = GCNConv(args.hidden, dataset.num_classes) def reset_parameters(self): self.conv1.reset_parameters() @@ -47,5 +47,5 @@ def forward(self, data): args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - rename_profile_file('citation', Conv.__name__, args.dataset, + rename_profile_file('citation', GCNConv.__name__, args.dataset, str(args.random_splits)) diff --git a/benchmark/citation/sgc.py b/benchmark/citation/sgc.py index 1acec1fc1d0f..633ffb208d25 100644 --- a/benchmark/citation/sgc.py +++ b/benchmark/citation/sgc.py @@ -4,7 +4,7 @@ import torch.nn.functional as F from citation import get_planetoid_dataset, random_planetoid_splits, run -from torch_geometric.nn import SGConv as Conv +from torch_geometric.nn import SGConv from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() @@ -25,8 +25,8 @@ class Net(torch.nn.Module): def __init__(self, dataset): super().__init__() - self.conv1 = Conv(dataset.num_features, dataset.num_classes, K=args.K, - cached=True) + self.conv1 = SGConv(dataset.num_features, dataset.num_classes, + K=args.K, cached=True) def reset_parameters(self): self.conv1.reset_parameters() @@ -43,5 +43,5 @@ def forward(self, data): args.early_stopping, args.inference, args.profile, permute_masks) if args.profile: - rename_profile_file('citation', Conv.__name__, args.dataset, + rename_profile_file('citation', SGConv.__name__, args.dataset, str(args.random_splits)) From e07d068fd79b4d3a0449427ab691de71e05d441b Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 13 Jul 2022 13:04:59 +0000 Subject: [PATCH 19/24] update --- benchmark/citation/train_eval.py | 60 +++++++++++++++----------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/benchmark/citation/train_eval.py b/benchmark/citation/train_eval.py index 88a2b6c9e96e..15a16fbc0b36 100644 --- a/benchmark/citation/train_eval.py +++ b/benchmark/citation/train_eval.py @@ -90,39 +90,36 @@ def run_train(dataset, model, runs, epochs, lr, weight_decay, early_stopping, f'Duration: {float(duration.mean()):.3f}s') -def run_inference(dataset, model, runs, epochs, profiling, permute_masks=None, +@torch.no_grad() +def run_inference(dataset, model, epochs, profiling, permute_masks=None, logger=None): - for i in range(runs): - data = dataset[0] - if permute_masks is not None: - data = permute_masks(data, dataset.num_classes) - data = data.to(device) + data = dataset[0] + if permute_masks is not None: + data = permute_masks(data, dataset.num_classes) + data = data.to(device) - model.to(device).reset_parameters() + model.to(device).reset_parameters() - for epoch in range(1, epochs + 1): - if i == runs - 1 and epoch == epochs: - if profiling: - with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: - inference(model, data) - p.step() - else: - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_start = time.time() - - inference(model, data) - - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_end = time.time() - duration = t_end - t_start - print("End-to-End time: {} s".format(duration), flush=True) - else: - inference(model, data) + for epoch in range(1, epochs + 1): + if epoch == epochs: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + + inference(model, data) + + if epoch == epochs: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print(f'End-to-End Inference Time: {duration:.8f}s', flush=True) + + if profiling: + with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + on_trace_ready=trace_handler) as p: + inference(model, data) + p.step() def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, @@ -131,8 +128,7 @@ def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping, run_train(dataset, model, runs, epochs, lr, weight_decay, early_stopping, permute_masks, logger) else: - run_inference(dataset, model, runs, epochs, profiling, permute_masks, - logger) + run_inference(dataset, model, epochs, profiling, permute_masks, logger) def train(model, optimizer, data): From b1a4620283f7489c2c16c07992655f649955784d Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 13 Jul 2022 13:08:50 +0000 Subject: [PATCH 20/24] update --- benchmark/points/edge_cnn.py | 2 +- benchmark/points/mpnn.py | 9 ++++++++- benchmark/points/point_cnn.py | 9 ++++++++- benchmark/points/point_net.py | 9 ++++++++- benchmark/points/spline_cnn.py | 9 ++++++++- 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/benchmark/points/edge_cnn.py b/benchmark/points/edge_cnn.py index 1541216906fb..6aed1ae71a17 100644 --- a/benchmark/points/edge_cnn.py +++ b/benchmark/points/edge_cnn.py @@ -62,4 +62,4 @@ def forward(self, pos, batch): args.inference, args.profile) if args.profile: - rename_profile_file('points', 'DynamicEdgeConv') + rename_profile_file('points', DynamicEdgeConv.__name__) diff --git a/benchmark/points/mpnn.py b/benchmark/points/mpnn.py index f9be424dc5f9..8bf4633d3003 100644 --- a/benchmark/points/mpnn.py +++ b/benchmark/points/mpnn.py @@ -9,6 +9,7 @@ from torch.nn import Sequential as Seq from torch_geometric.nn import NNConv, fps, global_mean_pool, radius_graph +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=200) @@ -17,6 +18,8 @@ parser.add_argument('--lr_decay_factor', type=float, default=0.5) parser.add_argument('--lr_decay_step_size', type=int, default=50) parser.add_argument('--weight_decay', type=float, default=0) +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() @@ -72,4 +75,8 @@ def forward(self, pos, batch): train_dataset, test_dataset = get_dataset(num_points=1024) model = Net(train_dataset.num_classes) run(train_dataset, test_dataset, model, args.epochs, args.batch_size, args.lr, - args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay) + args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay, + args.inference, args.profile) + +if args.profile: + rename_profile_file('points', NNConv.__name__) diff --git a/benchmark/points/point_cnn.py b/benchmark/points/point_cnn.py index 3746945234ec..59501cc9a63a 100644 --- a/benchmark/points/point_cnn.py +++ b/benchmark/points/point_cnn.py @@ -7,6 +7,7 @@ from torch.nn import Linear as Lin from torch_geometric.nn import XConv, fps, global_mean_pool +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=200) @@ -15,6 +16,8 @@ parser.add_argument('--lr_decay_factor', type=float, default=0.5) parser.add_argument('--lr_decay_step_size', type=int, default=50) parser.add_argument('--weight_decay', type=float, default=0) +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() @@ -60,4 +63,8 @@ def forward(self, pos, batch): train_dataset, test_dataset = get_dataset(num_points=1024) model = Net(train_dataset.num_classes) run(train_dataset, test_dataset, model, args.epochs, args.batch_size, args.lr, - args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay) + args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay, + args.inference, args.profile) + +if args.profile: + rename_profile_file('points', XConv.__name__) diff --git a/benchmark/points/point_net.py b/benchmark/points/point_net.py index 2dc5fcf51da7..9ee7546e0f29 100644 --- a/benchmark/points/point_net.py +++ b/benchmark/points/point_net.py @@ -9,6 +9,7 @@ from torch.nn import Sequential as Seq from torch_geometric.nn import PointConv, fps, global_max_pool, radius_graph +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=200) @@ -17,6 +18,8 @@ parser.add_argument('--lr_decay_factor', type=float, default=0.5) parser.add_argument('--lr_decay_step_size', type=int, default=50) parser.add_argument('--weight_decay', type=float, default=0) +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() @@ -68,4 +71,8 @@ def forward(self, pos, batch): train_dataset, test_dataset = get_dataset(num_points=1024) model = Net(train_dataset.num_classes) run(train_dataset, test_dataset, model, args.epochs, args.batch_size, args.lr, - args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay) + args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay, + args.inference, args.profile) + +if args.profile: + rename_profile_file('points', PointConv.__name__) diff --git a/benchmark/points/spline_cnn.py b/benchmark/points/spline_cnn.py index 481556a84d83..383195a2d871 100644 --- a/benchmark/points/spline_cnn.py +++ b/benchmark/points/spline_cnn.py @@ -7,6 +7,7 @@ from torch.nn import Linear as Lin from torch_geometric.nn import SplineConv, fps, global_mean_pool, radius_graph +from torch_geometric.profile import rename_profile_file parser = argparse.ArgumentParser() parser.add_argument('--epochs', type=int, default=200) @@ -15,6 +16,8 @@ parser.add_argument('--lr_decay_factor', type=float, default=0.5) parser.add_argument('--lr_decay_step_size', type=int, default=50) parser.add_argument('--weight_decay', type=float, default=0) +parser.add_argument('--inference', action='store_true') +parser.add_argument('--profile', action='store_true') args = parser.parse_args() @@ -69,4 +72,8 @@ def forward(self, pos, batch): train_dataset, test_dataset = get_dataset(num_points=1024) model = Net(train_dataset.num_classes) run(train_dataset, test_dataset, model, args.epochs, args.batch_size, args.lr, - args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay) + args.lr_decay_factor, args.lr_decay_step_size, args.weight_decay, + args.inference, args.profile) + +if args.profile: + rename_profile_file('points', SplineConv.__name__) From e6ffafe6720052b3219f14186cfae89e6f2dcad3 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 13 Jul 2022 13:12:21 +0000 Subject: [PATCH 21/24] update --- benchmark/points/train_eval.py | 43 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/benchmark/points/train_eval.py b/benchmark/points/train_eval.py index ca0cfa6fe6fc..4fee844987fd 100644 --- a/benchmark/points/train_eval.py +++ b/benchmark/points/train_eval.py @@ -13,12 +13,13 @@ def run_train(train_dataset, test_dataset, model, epochs, batch_size, lr, lr_decay_factor, lr_decay_step_size, weight_decay): + model = model.to(device) optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + train_loader = DataLoader(train_dataset, batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size, shuffle=False) for epoch in range(1, epochs + 1): - print("Epoch {} starts".format(epoch)) if torch.cuda.is_available(): torch.cuda.synchronize() @@ -40,34 +41,31 @@ def run_train(train_dataset, test_dataset, model, epochs, batch_size, lr, param_group['lr'] = lr_decay_factor * param_group['lr'] +@torch.no_grad() def run_inference(test_dataset, model, epochs, batch_size, profiling): model = model.to(device) test_loader = DataLoader(test_dataset, batch_size, shuffle=False) for epoch in range(1, epochs + 1): - print("Epoch {} starts".format(epoch)) if epoch == epochs: - if profiling: - with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: - inference(model, test_loader, device) - p.step() - else: - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_start = time.time() - - inference(model, test_loader, device) - - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_end = time.time() - duration = t_end - t_start - print("End-to-End time: {} s".format(duration), flush=True) - else: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_start = time.time() + + inference(model, test_loader, device) + + if epoch == epochs: + if torch.cuda.is_available(): + torch.cuda.synchronize() + t_end = time.time() + duration = t_end - t_start + print(f'End-to-End Inference Time: {duration:.8f}s', flush=True) + + if profiling: + with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + on_trace_ready=trace_handler) as p: inference(model, test_loader, device) + p.step() def run(train_dataset, test_dataset, model, epochs, batch_size, lr, @@ -92,6 +90,7 @@ def train(model, optimizer, train_loader, device): optimizer.step() +@torch.no_grad() def test(model, test_loader, device): model.eval() From fea0f4301c084a440d91a7e86fb7c84c85b50e24 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 13 Jul 2022 13:13:32 +0000 Subject: [PATCH 22/24] reset --- examples/hetero/to_hetero_mag.py | 47 +++------------------------ examples/pna.py | 55 ++++---------------------------- 2 files changed, 12 insertions(+), 90 deletions(-) diff --git a/examples/hetero/to_hetero_mag.py b/examples/hetero/to_hetero_mag.py index aa002c961ce0..6605038c9af3 100644 --- a/examples/hetero/to_hetero_mag.py +++ b/examples/hetero/to_hetero_mag.py @@ -1,23 +1,18 @@ import argparse import os.path as osp -import time import torch import torch.nn.functional as F from torch.nn import ReLU -from torch.profiler import ProfilerActivity, profile from tqdm import tqdm import torch_geometric.transforms as T from torch_geometric.datasets import OGB_MAG from torch_geometric.loader import HGTLoader, NeighborLoader from torch_geometric.nn import Linear, SAGEConv, Sequential, to_hetero -from torch_geometric.profile import rename_profile_file, trace_handler parser = argparse.ArgumentParser() parser.add_argument('--use_hgt_loader', action='store_true') -parser.add_argument('--inference', action='store_true') -parser.add_argument('--profile', action='store_true') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @@ -98,42 +93,10 @@ def test(loader): return total_correct / total_examples -@torch.no_grad() -def inference(loader): - model.eval() - for batch in tqdm(loader): - batch = batch.to(device, 'edge_index') - model(batch.x_dict, batch.edge_index_dict) - - init_params() # Initialize parameters. -if not args.inference: - optimizer = torch.optim.Adam(model.parameters(), lr=0.01) +optimizer = torch.optim.Adam(model.parameters(), lr=0.01) - for epoch in range(1, 21): - loss = train() - val_acc = test(val_loader) - print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_acc:.4f}') -else: - for epoch in range(1, 21): - if epoch == 20: - if args.profile: - with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: - inference(val_loader) - p.step() - rename_profile_file('to_hetero_mag') - else: - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_start = time.time() - inference(val_loader) - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_end = time.time() - duration = t_end - t_start - print("End-to-End time: {} s".format(duration), flush=True) - else: - inference(val_loader) +for epoch in range(1, 21): + loss = train() + val_acc = test(val_loader) + print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_acc:.4f}') diff --git a/examples/pna.py b/examples/pna.py index a7ee23bd6367..4697f49d7121 100644 --- a/examples/pna.py +++ b/examples/pna.py @@ -1,24 +1,15 @@ -import argparse import os.path as osp -import time import torch import torch.nn.functional as F from torch.nn import Embedding, Linear, ModuleList, ReLU, Sequential from torch.optim.lr_scheduler import ReduceLROnPlateau -from torch.profiler import ProfilerActivity, profile from torch_geometric.datasets import ZINC from torch_geometric.loader import DataLoader from torch_geometric.nn import BatchNorm, PNAConv, global_add_pool -from torch_geometric.profile import rename_profile_file, trace_handler from torch_geometric.utils import degree -parser = argparse.ArgumentParser() -parser.add_argument('--inference', action='store_true') -parser.add_argument('--profile', action='store_true') -args = parser.parse_args() - path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ZINC') train_dataset = ZINC(path, subset=True, split='train') val_dataset = ZINC(path, subset=True, split='val') @@ -109,42 +100,10 @@ def test(loader): return total_error / len(loader.dataset) -@torch.no_grad() -def inference(loader): - model.eval() - for data in loader: - data = data.to(device) - model(data.x, data.edge_index, data.edge_attr, data.batch) - - -if not args.inference: - for epoch in range(1, 301): - loss = train(epoch) - val_mae = test(val_loader) - test_mae = test(test_loader) - scheduler.step(val_mae) - print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, ' - f'Test: {test_mae:.4f}') -else: - for epoch in range(1, 301): - if epoch == 300: - if args.profile: - with profile( - activities=[ - ProfilerActivity.CPU, ProfilerActivity.CUDA - ], on_trace_ready=trace_handler) as p: - inference(test_loader) - p.step() - rename_profile_file('pna') - else: - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_start = time.time() - inference(test_loader) - if torch.cuda.is_available(): - torch.cuda.synchronize() - t_end = time.time() - duration = t_end - t_start - print("End-to-End time: {} s".format(duration), flush=True) - else: - inference(test_loader) +for epoch in range(1, 301): + loss = train(epoch) + val_mae = test(val_loader) + test_mae = test(test_loader) + scheduler.step(val_mae) + print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Val: {val_mae:.4f}, ' + f'Test: {test_mae:.4f}') From fd39e2691fd9dd37fe2650129fdbe4daed5aadf5 Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 13 Jul 2022 13:19:43 +0000 Subject: [PATCH 23/24] update --- test/profile/test_profile.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/profile/test_profile.py b/test/profile/test_profile.py index 7fbd63422459..8e1d5e197f2f 100644 --- a/test/profile/test_profile.py +++ b/test/profile/test_profile.py @@ -12,10 +12,11 @@ timeit, trace_handler, ) -from torch_geometric.testing import withCUDA +from torch_geometric.testing import onlyFullTest, withCUDA @withCUDA +@onlyFullTest def test_profile(get_dataset): dataset = get_dataset(name='PubMed') data = dataset[0].cuda() @@ -67,6 +68,7 @@ def test(model, x, edge_index, y): assert stats_summary.max_nvidia_smi_used_cuda > 0 +@onlyFullTest def test_trace_handler(get_dataset): dataset = get_dataset(name='PubMed') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @@ -76,7 +78,6 @@ def test_trace_handler(get_dataset): model.eval() for epoch in range(3): - print("epoch ", epoch) with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], on_trace_ready=trace_handler) as p: model(data.x, data.edge_index) From 7fd675be387f86efea6d439e65a82390bb26b62a Mon Sep 17 00:00:00 2001 From: rusty1s Date: Wed, 13 Jul 2022 13:22:07 +0000 Subject: [PATCH 24/24] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5c350e81fb8..5902d685449e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [2.0.5] - 2022-MM-DD ### Added +- Added inference benchmarks ([#4892](https://github.com/pyg-team/pytorch_geometric/pull/4892)) - Added `unbatch_edge_index` functionality for splitting an `edge_index` tensor according to a `batch` vector ([#4903](https://github.com/pyg-team/pytorch_geometric/pull/4903)) - Added node-wise normalization mode in `LayerNorm` ([#4944](https://github.com/pyg-team/pytorch_geometric/pull/4944)) - Added support for `normalization_resolver` ([#4926](https://github.com/pyg-team/pytorch_geometric/pull/4926), [#4951](https://github.com/pyg-team/pytorch_geometric/pull/4951), [#4958](https://github.com/pyg-team/pytorch_geometric/pull/4958), [#4959](https://github.com/pyg-team/pytorch_geometric/pull/4959))