diff --git a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp index ad0ce1fa1ccf..b3d34dc22e96 100644 --- a/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp +++ b/lib/Dialect/TritonGPU/Transforms/Pipeliner/MatmulLoopPipeline.cpp @@ -115,12 +115,16 @@ createAsyncCopy(scf::ForOp &forOp, tt::LoadOp loadOp, Value alloc, alloc.replaceAllUsesWith(viewLoad.getResult()); alloc.erase(); } else { + SmallVector allocsToErase; for (Operation *user : loadOp->getUsers()) { if (auto alloc = dyn_cast(user)) { alloc.replaceAllUsesWith(viewLoad.getResult()); - alloc.erase(); + allocsToErase.push_back(alloc); } } + for (auto alloc : allocsToErase) { + alloc.erase(); + } auto sharedLoad = builder.create(loc, loadOp.getType(), viewLoad); loadOp->replaceAllUsesWith(sharedLoad->getResults());