Skip to content

Commit

Permalink
[GraphBolt][CUDA] Refactor gb.unique_and_compact, add async_op.
Browse files Browse the repository at this point in the history
  • Loading branch information
mfbalin committed Sep 9, 2024
1 parent e8022e9 commit b010268
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions python/dgl/graphbolt/internal/sample_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ def unique_and_compact(
],
rank: int = 0,
world_size: int = 1,
async_op: bool = False,
):
"""
Compact a list of nodes tensor. The `rank` and `world_size` parameters are
relevant when using Cooperative Minibatching, which was initially proposed
in `Deep Graph Library PR#4337<https://github.com/dmlc/dgl/pull/4337>`__ and
was later first fully described in
`Cooperative Minibatching in Graph Neural Networks
<https://arxiv.org/abs/2310.12403>`__
<https://arxiv.org/abs/2310.12403>`__.
Cooperation between the GPUs eliminates duplicate work performed across the
GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when
performing GNN minibatching.
Expand All @@ -48,6 +49,9 @@ def unique_and_compact(
The rank of the current process.
world_size : int
The number of processes.
async_op: bool
Boolean indicating whether the call is asynchronous. If so, the result
can be obtained by calling wait on the returned future.
Returns
-------
Expand All @@ -63,27 +67,35 @@ def unique_and_compact(
"""
is_heterogeneous = isinstance(nodes, dict)

def unique_and_compact_per_type(nodes):
nums = [node.size(0) for node in nodes]
nodes = torch.cat(nodes)
empty_tensor = nodes.new_empty(0)
unique, compacted, _, offsets = torch.ops.graphbolt.unique_and_compact(
nodes, empty_tensor, empty_tensor, rank, world_size
)
compacted = compacted.split(nums)
return unique, list(compacted), offsets

if not is_heterogeneous:

Check warning on line 70 in python/dgl/graphbolt/internal/sample_utils.py

View workflow job for this annotation

GitHub Actions / lintrunner

UFMT format

Run `lintrunner -a` to apply this patch.
homo_ntype = 'a'
nodes = {homo_ntype: nodes}

nums = {}
concat_nodes, empties = [], []
for ntype, nodes_of_type in nodes.items():
nums[ntype] = [node.size(0) for node in nodes_of_type]
concat_nodes.append(torch.cat(nodes_of_type))
empties.append(concat_nodes[-1].new_empty(0))
unique_fn = (
torch.ops.graphbolt.unique_and_compact_batched_async
if async_op
else torch.ops.graphbolt.unique_and_compact_batched
)
results = unique_fn(concat_nodes, empties, empties, rank, world_size)
unique, compacted, offsets = {}, {}, {}
for ntype, result in zip(nodes.keys(), results):
(
unique[ntype],
concat_compacted,
_,
offsets[ntype],
) = result
compacted[ntype] = list(concat_compacted.split(nums[ntype]))
if is_heterogeneous:
unique, compacted, offsets = {}, {}, {}
for ntype, nodes_of_type in nodes.items():
(
unique[ntype],
compacted[ntype],
offsets[ntype],
) = unique_and_compact_per_type(nodes_of_type)
return unique, compacted, offsets
else:
return unique_and_compact_per_type(nodes)
return unique[homo_ntype], compacted[homo_ntype], offsets[homo_ntype]


def compact_temporal_nodes(nodes, nodes_timestamp):
Expand Down Expand Up @@ -161,7 +173,7 @@ def unique_and_compact_csc_formats(
`Deep Graph Library PR#4337<https://github.com/dmlc/dgl/pull/4337>`__
and was later first fully described in
`Cooperative Minibatching in Graph Neural Networks
<https://arxiv.org/abs/2310.12403>`__
<https://arxiv.org/abs/2310.12403>`__.
Cooperation between the GPUs eliminates duplicate work performed across the
GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when
performing GNN minibatching.
Expand Down

0 comments on commit b010268

Please sign in to comment.