From 41d82490f326aaf62c3e6c580e45796613546f4d Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Wed, 11 Dec 2024 11:35:10 -0800 Subject: [PATCH] Control flushing by NCCL_NET_FORCE_FLUSH and RCCL_NET_HDP_FLUSH --- src/graph/paths.cc | 4 ++-- src/transport/net.cc | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/graph/paths.cc b/src/graph/paths.cc index de78d804d..67c937fae 100644 --- a/src/graph/paths.cc +++ b/src/graph/paths.cc @@ -450,7 +450,7 @@ ncclResult_t ncclTopoCheckGdr(struct ncclTopoSystem* system, int64_t busId, int6 } // Set to 0 to disable the flush on Hopper when using GDR -NCCL_PARAM(NetForceFlush, "NET_FORCE_FLUSH", 0); +NCCL_PARAM(NetForceFlush, "NET_FORCE_FLUSH", 1); // Determine whether we need to flush the GDR recv buffers ncclResult_t ncclTopoNeedFlush(struct ncclTopoSystem* system, int64_t busId, int* flush) { @@ -458,7 +458,7 @@ ncclResult_t ncclTopoNeedFlush(struct ncclTopoSystem* system, int64_t busId, int NCCLCHECK(ncclTopoIdToIndex(system, GPU, busId, &g)); struct ncclTopoNode* gpu = system->nodes[GPU].nodes+g; #if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) - *flush = 1; + *flush = ncclParamNetForceFlush(); #else // Flush is required on Ampere and earlier *flush = gpu->gpu.cudaCompCap < 90 ? 1 : ncclParamNetForceFlush(); diff --git a/src/transport/net.cc b/src/transport/net.cc index 47b1c565b..bcd0da656 100644 --- a/src/transport/net.cc +++ b/src/transport/net.cc @@ -1354,6 +1354,8 @@ static ncclResult_t sendProxyProgress(struct ncclProxyState* proxyState, struct return ncclSuccess; } +RCCL_PARAM(NetHdpFlush, "NET_HDP_FLUSH", 1); + static ncclResult_t recvProxyProgress(struct ncclProxyState* proxyState, struct ncclProxyArgs* args) { #if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT) g_npkit_net_poll_cnt++; @@ -1543,8 +1545,10 @@ static ncclResult_t recvProxyProgress(struct ncclProxyState* proxyState, struct if (totalSize > 0 && p == NCCL_PROTO_SIMPLE && needFlush) { // GDRCOPY support struct recvNetResources* resources = (struct recvNetResources*) (subGroup->connection->transportResources); - if (resources->curr_hdp_reg) *resources->curr_hdp_reg = 0x1; - __sync_synchronize(); + if (rcclParamNetHdpFlush() && resources->curr_hdp_reg) { + *resources->curr_hdp_reg = 0x1; + __sync_synchronize(); + } if (resources->gdcFlush) { #if defined (__x86_64__) // Force a PCI-E read from GPU memory