Skip to content

Commit

Permalink
Add NIC write path HDP flush
Browse files Browse the repository at this point in the history
  • Loading branch information
wenkaidu committed Nov 28, 2024
1 parent a00b620 commit c41b9d6
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/transport/net.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,11 @@ static ncclResult_t recvSetup(struct ncclComm* comm, struct ncclTopoGraph* graph
NCCLCHECK(ncclTopoCheckGdr(comm->topo, myInfo->busId, netId, 0, &req.useGdr));

// Determine whether we need to flush the GDR buffer on recv or not
if (req.useGdr) NCCLCHECK(ncclTopoNeedFlush(comm->topo, myInfo->busId, &req.needFlush));
if (req.useGdr) {
NCCLCHECK(ncclTopoNeedFlush(comm->topo, myInfo->busId, &req.needFlush));
CUDACHECK(hipDeviceGetAttribute((int*)&req.curr_hdp_reg, hipDeviceAttributeHdpMemFlushCntl, myInfo->cudaDev));
recv->conn.curr_hdp_reg = req.curr_hdp_reg;
}

// We don't support PXN on receive yet
tpProxyRank = comm->topParentRanks[myInfo->rank];
Expand Down Expand Up @@ -1586,8 +1590,9 @@ static ncclResult_t recvProxyProgress(struct ncclProxyState* proxyState, struct
sub->transmitted += args->sliceSteps;
for (uint64_t step=sub->transmitted-args->sliceSteps; step<sub->transmitted; step++) ncclProfilingRecord(args, s+i, step, ncclProxyProfileRecvGPUWait);
if (step < sub->nsteps) {
__sync_synchronize();
struct recvNetResources* resources = (struct recvNetResources*) (sub->connection->transportResources);
*resources->curr_hdp_reg = 0x1;
__sync_synchronize();
volatile uint64_t* recvTail = resources->gdcSync ? resources->gdcSync : &resources->recvMem->tail;
if (sub->reg) {
// We may have added more net steps, but reg operations only have a single step w.r.t. the GPU.
Expand Down

0 comments on commit c41b9d6

Please sign in to comment.