pyg-team · mszarma · Jul 21, 2022 · Jul 4, 2022 · Jul 4, 2022 · Jul 7, 2022
diff --git a/benchmark/inference/edgeconv.py b/benchmark/inference/edgeconv.py
@@ -0,0 +1,39 @@
+import torch
+import torch.nn.functional as F
+from torch.nn import Linear as Lin
+from torch.nn import ReLU
+from torch.nn import Sequential as Seq
+from tqdm import tqdm
+
+from torch_geometric.nn import EdgeConv
+
+
+class EdgeConvNet(torch.nn.Module):
+    def __init__(self, input_channels, hidden_channels, out_channels,
+                 num_layers):
+        super().__init__()
+        nn_in = Seq(Lin(2 * input_channels, hidden_channels), ReLU(),
+                    Lin(hidden_channels, hidden_channels))
+        nn_hid = Seq(Lin(2 * hidden_channels, hidden_channels), ReLU(),
+                     Lin(hidden_channels, hidden_channels))
+        nn_out = Seq(Lin(2 * hidden_channels, hidden_channels), ReLU(),
+                     Lin(hidden_channels, out_channels))
+        self.convs = torch.nn.ModuleList()
+        self.convs.append(EdgeConv(nn_in))
+        for _ in range(num_layers - 2):
+            self.convs.append(EdgeConv(nn_hid))
+        self.convs.append(EdgeConv(nn_out))
+
+    def forward(self, x, edge_index):
+        for i, conv in enumerate(self.convs):
+            x = conv(x, edge_index)
+            if i < len(self.convs) - 1:
+                x = x.relu_()
+        return x
+
+    @torch.no_grad()
+    def inference(self, subgraph_loader, device):
+        for batch in tqdm(subgraph_loader):
+            batch = batch.to(device)
+            batch_size = batch.batch_size
+            out = self(batch.x, batch.edge_index)[:batch_size]
diff --git a/benchmark/inference/gat.py b/benchmark/inference/gat.py
@@ -0,0 +1,50 @@
+import torch
+import torch.nn.functional as F
+from torch.nn import Linear
+from tqdm import tqdm
+
+from torch_geometric.nn import GATConv
+
+
+class GATBlock(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, heads, last_layer=False,
+                 **conv_kwargs):
+        super().__init__()
+
+        self.conv = GATConv(in_channels, out_channels, heads, **conv_kwargs)
+        self.skip = Linear(
+            in_channels, out_channels if last_layer else out_channels * heads)
+        self.last_layer = last_layer
+
+    def forward(self, x, edge_index):
+        x = self.conv(x, edge_index)
+        # TODO: how to use skip connection with NeighborLoader?
+        # x = x + self.skip(?)
+        return x if self.last_layer else F.elu(x)
+
+
+class GATNet(torch.nn.Module):
+    def __init__(self, in_channels, hidden_channels, out_channels, heads,
+                 num_layers):
+        super().__init__()
+
+        self.layers = torch.nn.ModuleList()
+        self.layers.append(GATBlock(in_channels, hidden_channels, heads))
+        for _ in range(num_layers - 2):
+            self.layers.append(
+                GATBlock(hidden_channels * heads, hidden_channels, heads))
+        self.layers.append(
+            GATBlock(hidden_channels * heads, out_channels, heads,
+                     last_layer=True, concat=False))
+
+    def forward(self, x, edge_index):
+        for layer in self.layers:
+            x = layer(x, edge_index)
+        return x
+
+    @torch.no_grad()
+    def inference(self, subgraph_loader, device):
+        for batch in tqdm(subgraph_loader):
+            batch = batch.to(device)
+            batch_size = batch.batch_size
+            out = self(batch.x, batch.edge_index)[:batch_size]
diff --git a/benchmark/inference/gcn.py b/benchmark/inference/gcn.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn.functional as F
+from tqdm import tqdm
+
+from torch_geometric.nn import GCNConv
+
+
+class GCN(torch.nn.Module):
+    def __init__(self, in_channels, hidden_channels, out_channels, num_layers):
+        super(GCN, self).__init__()
+
+        self.convs = torch.nn.ModuleList()
+        self.convs.append(GCNConv(in_channels, hidden_channels))
+        for _ in range(num_layers - 2):
+            self.convs.append(GCNConv(hidden_channels, hidden_channels))
+        self.convs.append(GCNConv(hidden_channels, out_channels))
+
+    def forward(self, x, edge_index):
+        for conv in self.convs[:-1]:
+            x = conv(x, edge_index)
+            x = F.relu(x)
+        x = self.convs[-1](x, edge_index)
+        return x
+
+    @torch.no_grad()
+    def inference(self, subgraph_loader, device):
+        for batch in tqdm(subgraph_loader):
+            batch = batch.to(device)
+            batch_size = batch.batch_size
+            out = self(batch.x, batch.edge_index)[:batch_size]
diff --git a/benchmark/inference/graphsage.py b/benchmark/inference/graphsage.py
@@ -0,0 +1,42 @@
+import torch
+from tqdm import tqdm
+
+from torch_geometric.nn import SAGEConv, to_hetero
+
+
+class SAGE_HETERO:
+    def __init__(self, hidden_channels, output_channels, num_layers) -> None:
+        self.model = None
+        self.hidden_channels = hidden_channels
+        self.output_channels = output_channels
+        self.num_layers = num_layers
+
+    def create_hetero(self, metadata):
+        model = SAGE_FOR_HETERO(self.hidden_channels, self.output_channels,
+                                self.num_layers)
+        self.model = to_hetero(model, metadata, aggr='sum')
+
+    def inference(self, loader, device):
+        self.model.eval()
+        for batch in tqdm(loader):
+            batch = batch.to(device)
+            batch_size = batch['paper'].batch_size
+            out = self.model(batch.x_dict,
+                             batch.edge_index_dict)['paper'][:batch_size]
+
+
+class SAGE_FOR_HETERO(torch.nn.Module):
+    def __init__(self, hidden_channels, out_channels, num_layers):
+        super().__init__()
+        self.convs = torch.nn.ModuleList()
+        self.convs.append(SAGEConv((-1, -1), hidden_channels))
+        for i in range(num_layers - 2):
+            self.convs.append(SAGEConv((-1, -1), hidden_channels))
+        self.convs.append(SAGEConv((-1, -1), out_channels))
+
+    def forward(self, x, edge_index):
+        for i, conv in enumerate(self.convs):
+            x = conv(x, edge_index)
+            if i < len(self.convs) - 1:
+                x = x.relu_()
+        return x
@@ -0,0 +1,128 @@
+import argparse
+import copy
+from timeit import default_timer
+
+import torch
+from ogb.nodeproppred import PygNodePropPredDataset
+from utils import get_dataset, get_degree, get_model
+
+from torch_geometric.loader import NeighborLoader
+
+supported_sets = {
+    'ogbn-mag': ['rgat', 'rgcn'],
+    'reddit': ['edge_conv', 'gat', 'gcn', 'pna_conv'],
+    'ogbn-products': ['edge_conv', 'gat', 'gcn', 'pna_conv'],
+}
+
+
+def run(args: argparse.ArgumentParser) -> None:
+
+    print('BENCHMARK STARTS')
+    if args.pure_gnn_mode:
+        print('PURE GNN MODE ACTIVATED')
+    for dataset_name in args.datasets:
+        print(f'Dataset: {dataset_name}')
+        dataset = get_dataset(
+            dataset_name, args.root, PygNodePropPredDataset
+            if dataset_name == 'ogbn-products' else None)
+
+        mask = ('paper', None) if dataset_name == 'ogbn-mag' else None
+
+        data = dataset[0].to(args.device)
+        inputs_channels = data.x_dict['paper'].size(
-        inputs_channels = data.x_dict['paper'].size(
+        inputs_channels = data['paper'].num_features
-        inputs_channels = data.x_dict['paper'].size(
+        inputs_channels = data['paper'].num_features
+            -1) if dataset_name == 'ogbn-mag' else dataset.num_features
+
+        for model_name in args.models:
+            if model_name not in supported_sets[dataset_name]:
+                print(f'Configuration of {dataset_name} + {model_name} '
+                      f'not supported. Skipping.')
+                continue
+            print(f'Evaluation bench for {model_name}:')
+            if model_name == 'pna_conv':
+                loader = NeighborLoader(
+                    copy.copy(data),
+                    num_neighbors=[-1],
+                    input_nodes=mask,
+                    batch_size=1024,
+                    shuffle=False,
+                    num_workers=args.num_workers,
+                )
+                degree = get_degree(loader)
+
+            for batch_size in args.eval_batch_sizes:
+                subgraph_loader = NeighborLoader(
+                    copy.copy(data),
+                    num_neighbors=[-1],
+                    input_nodes=mask,
+                    batch_size=batch_size,
+                    shuffle=False,
+                    num_workers=args.num_workers,
+                )
+                subgraph_loader.data.n_id = torch.arange(data.num_nodes)
+
+                for layers in args.num_layers:
+                    for hidden_channels in args.num_hidden_channels:
+                        print(
+                            '-----------------------------------------------')
+                        print(f'Batch size={batch_size}, '
+                              f'Layers amount={layers}, '
+                              f'Hidden features size={hidden_channels}')
+                        params = {
+                            'inputs_channels': inputs_channels,
+                            'hidden_channels': hidden_channels,
+                            'output_channels': dataset.num_classes,
+                            'num_heads': args.num_heads,
+                            'num_layers': layers,
+                        }
+                        if model_name == 'pna_conv':
+                            params['degree'] = degree
+
+                        model = get_model(
+                            model_name, params, metadata=data.metadata()
+                            if dataset_name == 'ogbn-mag' else None)
+
+                        if args.pure_gnn_mode:
+                            prebatched_samples = []
+                            for i, batch in enumerate(subgraph_loader):
+                                if i == args.prebatched_samples:
+                                    break
+                                prebatched_samples.append(batch)
+                            subgraph_loader = prebatched_samples
+
+                        start = default_timer()
+                        model.inference(subgraph_loader, args.device)
+                        stop = default_timer()
+                        print(f'Inference time={stop-start:.3f}\n')
+
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser('GNN inference benchmark')
+
+    argparser.add_argument('--device', default='cpu', type=str)
+    argparser.add_argument(
+        '--pure-gnn-mode', action='store_true',
+        help='turn on pure gnn efficiency bench - firstly prepare batches')
+    argparser.add_argument('--prebatched_samples', default=3, type=int,
+                           help='number of preloaded batches in pure_gnn mode')
+    argparser.add_argument('--datasets', nargs='+',
+                           default=['ogbn-mag', 'ogbn-products',
+                                    'reddit'], type=str)
+    argparser.add_argument(
+        '--models', nargs='+',
+        default=['edge_conv', 'gat', 'gcn', 'pna_conv', 'rgat',
+                 'rgcn'], type=str)
+    argparser.add_argument('--root', default='../../data', type=str)
+    argparser.add_argument('--eval-batch-sizes', nargs='+',
+                           default=[512, 1024, 2048, 4096, 8192], type=int)
+    argparser.add_argument('--num-layers', nargs='+', default=[1, 2, 3],
+                           type=int)
+    argparser.add_argument('--num-hidden-channels', nargs='+',
+                           default=[64, 128, 256], type=int)
+    argparser.add_argument(
+        '--num-heads', default=3, type=int,
+        help='number of hidden attention heads, applies only for gat and rgat')
+    argparser.add_argument('--num-workers', default=2, type=int)
+
+    args = argparser.parse_args()
+
+    run(args)
diff --git a/benchmark/inference/pna.py b/benchmark/inference/pna.py
@@ -0,0 +1,37 @@
+import torch
+from tqdm import tqdm
+
+from torch_geometric.nn import PNAConv
+
+
+class PNANet(torch.nn.Module):
+    def __init__(self, input_channels, hidden_channels, out_channels,
+                 num_layers, degree):
+        super().__init__()
+        self.aggregators = ['mean', 'min', 'max', 'std']
+        self.scalers = ['identity', 'amplification', 'attenuation']
+        self.convs = torch.nn.ModuleList()
+        self.convs.append(
+            PNAConv(input_channels, hidden_channels, self.aggregators,
+                    self.scalers, degree))
+        for i in range(num_layers - 2):
+            self.convs.append(
+                PNAConv(hidden_channels, hidden_channels, self.aggregators,
+                        self.scalers, degree))
+        self.convs.append(
+            PNAConv(hidden_channels, out_channels, self.aggregators,
+                    self.scalers, degree))
+
+    def forward(self, x, edge_index):
+        for i, conv in enumerate(self.convs):
+            x = conv(x, edge_index)
+            if i < len(self.convs) - 1:
+                x = x.relu_()
+        return x
+
+    @torch.no_grad()
+    def inference(self, subgraph_loader, device):
+        for batch in tqdm(subgraph_loader):
+            batch = batch.to(device)
+            batch_size = batch.batch_size
+            out = self(batch.x, batch.edge_index)[:batch_size]
diff --git a/benchmark/inference/rgat.py b/benchmark/inference/rgat.py
@@ -0,0 +1,50 @@
+import torch
+import torch.nn.functional as F
+from tqdm import tqdm
+
+from torch_geometric.nn import GATConv, to_hetero
+
+
+class GAT_HETERO:
+    def __init__(self, hidden_channels, output_channels, num_layers,
+                 num_heads) -> None:
+        self.model = None
+        self.hidden_channels = hidden_channels
+        self.output_channels = output_channels
+        self.num_layers = num_layers
+        self.num_heads = num_heads
+
+    def create_hetero(self, metadata):
+        model = GAT_FOR_HETERO(self.hidden_channels, self.output_channels,
+                               self.num_layers, self.num_heads)
+        self.model = to_hetero(model, metadata, aggr='sum')
+
+    def inference(self, loader, device):
+        self.model.eval()
+        for batch in tqdm(loader):
+            batch = batch.to(device)
+            batch_size = batch['paper'].batch_size
+            out = self.model(batch.x_dict,
+                             batch.edge_index_dict)['paper'][:batch_size]
+
+
+class GAT_FOR_HETERO(torch.nn.Module):
+    def __init__(self, hidden_channels, out_channels, num_layers, heads):
+        super().__init__()
+        self.convs = torch.nn.ModuleList()
+        self.convs.append(
+            GATConv((-1, -1), hidden_channels, heads=heads,
+                    add_self_loops=False))
+        for _ in range(num_layers - 2):
+            self.convs.append(
+                GATConv((-1, -1), hidden_channels, heads=heads,
+                        add_self_loops=False))
+        self.convs.append(
+            GATConv((-1, -1), out_channels, heads=heads, add_self_loops=False))
+
+    def forward(self, x, edge_index):
+        for i, conv in enumerate(self.convs):
+            x = conv(x, edge_index)
+            if i < len(self.convs) - 1:
+                x = x.relu_()
+        return x