diff --git a/src/init.cc b/src/init.cc index 1c6b14f04..134d3eb0d 100644 --- a/src/init.cc +++ b/src/init.cc @@ -122,7 +122,7 @@ static constexpr int64_t defaultEnableMscclpp = 0; RCCL_PARAM(MscclppEnabled, "MSCCLPP_ENABLE", defaultEnableMscclpp); // GDRCOPY support: Off by default -NCCL_PARAM(GdrCopyEnable, "GDRCOPY_ENABLE", 0); +NCCL_PARAM(GdrCopyEnable, "GDRCOPY_ENABLE", 1); // GDRCOPY support gdr_t ncclGdrCopy = NULL; diff --git a/src/transport/net.cc b/src/transport/net.cc index ed7fd780c..ca2ddce5c 100644 --- a/src/transport/net.cc +++ b/src/transport/net.cc @@ -1558,7 +1558,12 @@ static ncclResult_t recvProxyProgress(struct ncclProxyState* proxyState, struct if (resources->gdcFlush) { #if defined (__x86_64__) // Force a PCI-E read from GPU memory + static bool once = true; asm volatile ("mov (%0), %%eax" :: "l"(resources->gdcFlush) : "%eax"); + if (once) { + once = false; + INFO(NCCL_INIT, "%s: issued GDC flush", __func__); + } #else WARN("NET: GDR Flush only supported on x86_64"); return ncclInternalError;