From e4fe7a6c790556791c3850096c337ce57d5aa023 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 19 Jun 2024 17:28:43 +0200 Subject: [PATCH] Prezero RefPosition memory --- src/coreclr/jit/lsra.cpp | 54 ++++++++++++++++++++--------------- src/coreclr/jit/lsra.h | 47 ++++++++++++------------------ src/coreclr/jit/lsraarm64.cpp | 10 +++---- src/coreclr/jit/lsrabuild.cpp | 28 +++++++++++++----- src/coreclr/jit/lsraxarch.cpp | 18 ++++++------ 5 files changed, 85 insertions(+), 72 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9018933d5e5bc..8cb05159d27b1 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -801,7 +801,9 @@ LinearScan::LinearScan(Compiler* theCompiler) : compiler(theCompiler) , intervals(theCompiler->getAllocator(CMK_LSRA_Interval)) , allocationPassComplete(false) - , refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition)) + , allRefPositionsHead(nullptr) + , allRefPositionsTail(nullptr) + , allRefPositionsTailSlot(&allRefPositionsHead) , killHead(nullptr) , killTail(&killHead) , listNodePool(theCompiler) @@ -827,6 +829,8 @@ LinearScan::LinearScan(Compiler* theCompiler) firstColdLoc = MaxLocation; + currentRefPositionsBuffer = currentRefPositionsBufferEnd = nullptr; + #ifdef DEBUG maxNodeLocation = 0; consecutiveRegistersLocation = 0; @@ -2457,8 +2461,8 @@ void LinearScan::checkLastUses(BasicBlock* block) VARSET_TP computedLive(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); bool foundDiff = false; - RefPositionReverseIterator currentRefPosition = refPositions.rbegin(); - for (; currentRefPosition->refType != RefTypeBB; currentRefPosition++) + RefPosition* currentRefPosition = getAllRefPositionsTail(); + for (; currentRefPosition->refType != RefTypeBB; currentRefPosition = currentRefPosition->prevAllRefPosition) { // We should never see ParamDefs or ZeroInits within a basic block. assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit); @@ -2517,7 +2521,7 @@ void LinearScan::checkLastUses(BasicBlock* block) } } - assert(currentRefPosition != refPositions.rend()); + assert(currentRefPosition != nullptr); } VARSET_TP liveInNotComputedLive(VarSetOps::Diff(compiler, block->bbLiveIn, computedLive)); @@ -4862,9 +4866,9 @@ void LinearScan::dumpRefPositions(const char* str) printf("------------\n"); printf("REFPOSITIONS %s: \n", str); printf("------------\n"); - for (RefPosition& refPos : refPositions) + for (RefPosition* refPos = allRefPositionsHead; refPos != nullptr; refPos = refPos->nextAllRefPosition) { - refPos.dump(this); + refPos->dump(this); } } #endif // DEBUG @@ -5049,8 +5053,10 @@ void LinearScan::allocateRegistersMinimal() bool handledBlockEnd = false; - for (RefPosition& currentRefPosition : refPositions) + for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition) { + RefPosition& currentRefPosition = *pCurrentRefPosition; + // TODO: Can we combine this with the freeing of registers below? It might // mess with the dump, since this was previously being done before the call below // to dumpRegRecords. @@ -5733,8 +5739,9 @@ void LinearScan::allocateRegisters() bool handledBlockEnd = false; - for (RefPosition& currentRefPosition : refPositions) + for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition) { + RefPosition& currentRefPosition = *pCurrentRefPosition; RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; // TODO: Can we combine this with the freeing of registers below? It might @@ -8018,12 +8025,12 @@ void LinearScan::resolveRegisters() } // handle incoming arguments and special temps - RefPositionIterator currentRefPosition = refPositions.begin(); + RefPosition* currentRefPosition = allRefPositionsHead; if (localVarsEnregistered) { VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum]; - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit) { @@ -8050,7 +8057,7 @@ void LinearScan::resolveRegisters() } else { - assert(currentRefPosition == refPositions.end() || + assert(currentRefPosition == nullptr || (currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit)); } @@ -8071,7 +8078,7 @@ void LinearScan::resolveRegisters() } // Handle the DummyDefs, updating the incoming var location. - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (currentRefPosition->refType != RefTypeDummyDef) { @@ -8097,12 +8104,12 @@ void LinearScan::resolveRegisters() } // The next RefPosition should be for the block. Move past it. - assert(currentRefPosition != refPositions.end()); + assert(currentRefPosition != nullptr); assert(currentRefPosition->refType == RefTypeBB); - ++currentRefPosition; + currentRefPosition = currentRefPosition->nextAllRefPosition; // Handle the RefPositions for the block - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (currentRefPosition->refType == RefTypeBB || currentRefPosition->refType == RefTypeDummyDef) { @@ -10170,7 +10177,7 @@ void LinearScan::dumpLsraStats(FILE* file) fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount); fprintf(file, "Total number of Intervals: %d\n", static_cast((intervals.size() == 0 ? 0 : (intervals.size() - 1)))); - fprintf(file, "Total number of RefPositions: %d\n", static_cast(refPositions.size() - 1)); + fprintf(file, "Total number of RefPositions: %d\n", static_cast(numRefPositions)); // compute total number of spill temps created unsigned numSpillTemps = 0; @@ -10877,7 +10884,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) // currentRefPosition is not used for LSRA_DUMP_PRE // We keep separate iterators for defs, so that we can print them // on the lhs of the dump - RefPositionIterator currentRefPosition = refPositions.begin(); + RefPosition* currentRefPosition = allRefPositionsHead; switch (mode) { @@ -10898,7 +10905,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) if (mode != LSRA_DUMP_PRE) { printf("Incoming Parameters: "); - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (currentRefPosition->refType == RefTypeBB) { @@ -10944,7 +10951,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) { bool printedBlockHeader = false; // We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { Interval* interval = nullptr; if (currentRefPosition->isIntervalRef()) @@ -11036,7 +11043,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) // and combining the fixed regs with their associated def or use bool killPrinted = false; RefPosition* lastFixedRegRefPos = nullptr; - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (!(currentRefPosition->nodeLocation == tree->gtSeqNum || currentRefPosition->nodeLocation == tree->gtSeqNum + 1)) @@ -11443,10 +11450,10 @@ void LinearScan::dumpRegRecordHeader() maxNodeLocation = (maxNodeLocation == 0) ? 1 : maxNodeLocation; // corner case of a method with an infinite loop // without any GenTree nodes assert(maxNodeLocation >= 1); - assert(refPositions.size() >= 1); + assert(numRefPositions >= 1); int treeIdWidth = 9; /* '[XXXXX] '*/ int nodeLocationWidth = (int)log10((double)maxNodeLocation) + 1; - int refPositionWidth = (int)log10((double)refPositions.size()) + 1; + int refPositionWidth = (int)log10((double)numRefPositions) + 1; int refTypeInfoWidth = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */; int locationAndRPNumWidth = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */; int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth; @@ -11971,8 +11978,9 @@ void LinearScan::verifyFinalAllocation() BasicBlock* currentBlock = nullptr; GenTree* firstBlockEndResolutionNode = nullptr; LsraLocation currentLocation = MinLocation; - for (RefPosition& currentRefPosition : refPositions) + for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition) { + RefPosition& currentRefPosition = *pCurrentRefPosition; Interval* interval = nullptr; RegRecord* regRecord = nullptr; regNumber regNum = REG_NA; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5d49673ffc4be..eca5c31265716 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -444,9 +444,6 @@ inline bool RefTypeIsDef(RefType refType) typedef regNumberSmall* VarToRegMap; typedef jitstd::list IntervalList; -typedef jitstd::list RefPositionList; -typedef jitstd::list::iterator RefPositionIterator; -typedef jitstd::list::reverse_iterator RefPositionReverseIterator; class Referenceable { @@ -1704,13 +1701,20 @@ class LinearScan : public LinearScanInterface return enregisterLocalVars; } - // Ordered list of RefPositions - RefPositionList refPositions; + char* currentRefPositionsBuffer; + char* currentRefPositionsBufferEnd; + + RefPosition* allRefPositionsHead; + RefPosition* allRefPositionsTail; + RefPosition** allRefPositionsTailSlot; // Head of linked list of RefTypeKill ref positions RefPosition* killHead; // Tail slot of linked list of RefTypeKill ref positions RefPosition** killTail; +#ifdef DEBUG + unsigned numRefPositions = 0; +#endif // Per-block variable location mappings: an array indexed by block number that yields a // pointer to an array of regNumber, one per variable. @@ -1909,6 +1913,11 @@ class LinearScan : public LinearScanInterface regsBusyUntilKill = RBM_NONE; } + RefPosition* getAllRefPositionsTail() + { + return allRefPositionsTail; + } + bool conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition); // This method should not be used and is here to retain old behavior. @@ -2463,11 +2472,14 @@ class RefPosition Referenceable* referent; - // nextRefPosition is the next in code order. + // nextRefPosition is the next RP in code order associated with the referent. // Note that in either case there is no need for these to be doubly linked, as they // are only traversed in the forward direction, and are not moved. RefPosition* nextRefPosition; + RefPosition* nextAllRefPosition; + RefPosition* prevAllRefPosition; + // The remaining fields are common to both options union { @@ -2595,33 +2607,12 @@ class RefPosition LsraLocation nodeLocation, GenTree* treeNode, RefType refType DEBUG_ARG(GenTree* buildNode)) - : referent(nullptr) - , nextRefPosition(nullptr) - , treeNode(treeNode) - , registerAssignment(RBM_NONE) + : treeNode(treeNode) , bbNum(bbNum) , nodeLocation(nodeLocation) , refType(refType) - , multiRegIdx(0) -#ifdef TARGET_ARM64 - , needsConsecutive(false) - , regCount(0) -#endif - , lastUse(false) - , reload(false) - , spillAfter(false) - , singleDefSpill(false) - , writeThru(false) - , copyReg(false) - , moveReg(false) - , isPhysRegRef(false) - , isFixedRegRef(false) - , isLocalDefUse(false) - , delayRegFree(false) - , outOfOrder(false) #ifdef DEBUG , minRegCandidateCount(1) - , rpNum(0) , buildNode(buildNode) #endif { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9b8ce0559864f..0ba82f3940610 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2074,12 +2074,12 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwN for (GenTreeFieldList::Use& use : treeNode->AsFieldList()->Uses()) { RefPosition* restoreRefPos = nullptr; - RefPositionIterator prevRefPos = refPositions.backPosition(); + RefPosition* prevRefPos = allRefPositionsTail; currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0); // Check if restore RefPositions were created - RefPositionIterator tailRefPos = refPositions.backPosition(); + RefPosition* tailRefPos = allRefPositionsTail; assert(tailRefPos == currRefPos); prevRefPos++; if (prevRefPos != tailRefPos) @@ -2161,15 +2161,15 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwN } else { - RefPositionIterator refPositionMark = refPositions.backPosition(); + RefPosition* refPositionMark = allRefPositionsTail; int refPositionsAdded = BuildOperandUses(treeNode); if (rmwNode != nullptr) { // Check all the newly created RefPositions for delay free - RefPositionIterator iter = refPositionMark; + RefPosition* iter = refPositionMark; - for (iter++; iter != refPositions.end(); iter++) + for (iter = iter->nextAllRefPosition; iter != nullptr; iter = iter->nextAllRefPosition) { RefPosition* refPositionAdded = &(*iter); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 5e0473ca906a4..d4dcfc421eea0 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -178,14 +178,28 @@ Interval* LinearScan::newInterval(RegisterType theRegisterType) // RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) { - refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType DEBUG_ARG(currBuildNode)); - RefPosition* newRP = &refPositions.back(); + if (currentRefPositionsBuffer == currentRefPositionsBufferEnd) + { + currentRefPositionsBuffer = new (compiler, CMK_LSRA_RefPosition) char[64 * sizeof(RefPosition)]; + currentRefPositionsBufferEnd = currentRefPositionsBuffer + 64 * sizeof(RefPosition); + memset(currentRefPositionsBuffer, 0, 64 * sizeof(RefPosition)); + } + + assert(currentRefPositionsBuffer + sizeof(RefPosition) <= currentRefPositionsBufferEnd); + + RefPosition* newRP = new (currentRefPositionsBuffer, jitstd::placement_t()) RefPosition(curBBNum, nodeLocation, treeNode, refType DEBUG_ARG(currBuildNode)); + currentRefPositionsBuffer += sizeof(RefPosition); + + newRP->prevAllRefPosition = allRefPositionsTail; + *allRefPositionsTailSlot = allRefPositionsTail = newRP; + allRefPositionsTailSlot = &newRP->nextAllRefPosition; + #ifdef DEBUG // Reset currBuildNode so we do not set it for subsequent refpositions belonging // to the same treeNode and hence, avoid printing it for every refposition inside // the allocation table. currBuildNode = nullptr; - newRP->rpNum = static_cast(refPositions.size() - 1); + newRP->rpNum = numRefPositions++; if (!enregisterLocalVars) { assert(!((refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || (refType == RefTypeDummyDef) || @@ -1768,7 +1782,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // If we are constraining the registers for allocation, we will modify all the RefPositions // we've built for this node after we've created them. In order to do that, we'll remember // the last RefPosition prior to those created for this node. - RefPositionIterator refPositionMark = refPositions.backPosition(); + RefPosition* refPositionMark = allRefPositionsTail; int oldDefListCount = defList.Count(); currBuildNode = tree; #endif // DEBUG @@ -1793,8 +1807,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // First, we count them. unsigned minRegCount = 0; - RefPositionIterator iter = refPositionMark; - for (iter++; iter != refPositions.end(); iter++) + RefPosition* iter = refPositionMark; + for (iter = iter->nextAllRefPosition; iter != nullptr; iter = iter->nextAllRefPosition) { RefPosition* newRefPosition = &(*iter); if (newRefPosition->isIntervalRef()) @@ -1838,7 +1852,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // add one less than the maximum number of registers args to 'minRegCount'. minRegCount += MAX_REG_ARG - 1; } - for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) + for (refPositionMark = refPositionMark->nextAllRefPosition; refPositionMark != nullptr; refPositionMark = refPositionMark->nextAllRefPosition) { RefPosition* newRefPosition = &(*refPositionMark); unsigned minRegCountForRef = minRegCount; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 5a966679f6cf0..46ea43d3f3f40 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -901,8 +901,8 @@ int LinearScan::BuildSelect(GenTreeOp* select) GenTree* trueVal = select->gtOp1; GenTree* falseVal = select->gtOp2; - RefPositionIterator op1UsesPrev = refPositions.backPosition(); - assert(op1UsesPrev != refPositions.end()); + RefPosition* op1UsesPrev = getAllRefPositionsTail(); + assert(op1UsesPrev != nullptr); RefPosition* uncontainedTrueRP = nullptr; if (trueVal->isContained()) @@ -915,7 +915,7 @@ int LinearScan::BuildSelect(GenTreeOp* select) srcCount++; } - RefPositionIterator op2UsesPrev = refPositions.backPosition(); + RefPosition* op2UsesPrev = getAllRefPositionsTail(); RefPosition* uncontainedFalseRP = nullptr; if (falseVal->isContained()) @@ -959,19 +959,19 @@ int LinearScan::BuildSelect(GenTreeOp* select) // intervals for the ref positions we built above. It marks one of the uses // as delay freed when it finds interference (almost never). // - RefPositionIterator op1Use = op1UsesPrev; + RefPosition* op1Use = op1UsesPrev; while (op1Use != op2UsesPrev) { - ++op1Use; + op1Use = op1Use->nextAllRefPosition; if (op1Use->refType != RefTypeUse) { continue; } - RefPositionIterator op2Use = op2UsesPrev; - ++op2Use; - while (op2Use != refPositions.end()) + RefPosition* op2Use = op2UsesPrev; + op2Use = op2Use->nextAllRefPosition; + while (op2Use != nullptr) { if (op2Use->refType == RefTypeUse) { @@ -981,7 +981,7 @@ int LinearScan::BuildSelect(GenTreeOp* select) break; } - ++op2Use; + op2Use = op2Use->nextAllRefPosition; } } }