From e4fe7a6c790556791c3850096c337ce57d5aa023 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 19 Jun 2024 17:28:43 +0200 Subject: [PATCH 1/2] Prezero RefPosition memory --- src/coreclr/jit/lsra.cpp | 54 ++++++++++++++++++++--------------- src/coreclr/jit/lsra.h | 47 ++++++++++++------------------ src/coreclr/jit/lsraarm64.cpp | 10 +++---- src/coreclr/jit/lsrabuild.cpp | 28 +++++++++++++----- src/coreclr/jit/lsraxarch.cpp | 18 ++++++------ 5 files changed, 85 insertions(+), 72 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9018933d5e5bc..8cb05159d27b1 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -801,7 +801,9 @@ LinearScan::LinearScan(Compiler* theCompiler) : compiler(theCompiler) , intervals(theCompiler->getAllocator(CMK_LSRA_Interval)) , allocationPassComplete(false) - , refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition)) + , allRefPositionsHead(nullptr) + , allRefPositionsTail(nullptr) + , allRefPositionsTailSlot(&allRefPositionsHead) , killHead(nullptr) , killTail(&killHead) , listNodePool(theCompiler) @@ -827,6 +829,8 @@ LinearScan::LinearScan(Compiler* theCompiler) firstColdLoc = MaxLocation; + currentRefPositionsBuffer = currentRefPositionsBufferEnd = nullptr; + #ifdef DEBUG maxNodeLocation = 0; consecutiveRegistersLocation = 0; @@ -2457,8 +2461,8 @@ void LinearScan::checkLastUses(BasicBlock* block) VARSET_TP computedLive(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); bool foundDiff = false; - RefPositionReverseIterator currentRefPosition = refPositions.rbegin(); - for (; currentRefPosition->refType != RefTypeBB; currentRefPosition++) + RefPosition* currentRefPosition = getAllRefPositionsTail(); + for (; currentRefPosition->refType != RefTypeBB; currentRefPosition = currentRefPosition->prevAllRefPosition) { // We should never see ParamDefs or ZeroInits within a basic block. assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit); @@ -2517,7 +2521,7 @@ void LinearScan::checkLastUses(BasicBlock* block) } } - assert(currentRefPosition != refPositions.rend()); + assert(currentRefPosition != nullptr); } VARSET_TP liveInNotComputedLive(VarSetOps::Diff(compiler, block->bbLiveIn, computedLive)); @@ -4862,9 +4866,9 @@ void LinearScan::dumpRefPositions(const char* str) printf("------------\n"); printf("REFPOSITIONS %s: \n", str); printf("------------\n"); - for (RefPosition& refPos : refPositions) + for (RefPosition* refPos = allRefPositionsHead; refPos != nullptr; refPos = refPos->nextAllRefPosition) { - refPos.dump(this); + refPos->dump(this); } } #endif // DEBUG @@ -5049,8 +5053,10 @@ void LinearScan::allocateRegistersMinimal() bool handledBlockEnd = false; - for (RefPosition& currentRefPosition : refPositions) + for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition) { + RefPosition& currentRefPosition = *pCurrentRefPosition; + // TODO: Can we combine this with the freeing of registers below? It might // mess with the dump, since this was previously being done before the call below // to dumpRegRecords. @@ -5733,8 +5739,9 @@ void LinearScan::allocateRegisters() bool handledBlockEnd = false; - for (RefPosition& currentRefPosition : refPositions) + for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition) { + RefPosition& currentRefPosition = *pCurrentRefPosition; RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; // TODO: Can we combine this with the freeing of registers below? It might @@ -8018,12 +8025,12 @@ void LinearScan::resolveRegisters() } // handle incoming arguments and special temps - RefPositionIterator currentRefPosition = refPositions.begin(); + RefPosition* currentRefPosition = allRefPositionsHead; if (localVarsEnregistered) { VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum]; - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit) { @@ -8050,7 +8057,7 @@ void LinearScan::resolveRegisters() } else { - assert(currentRefPosition == refPositions.end() || + assert(currentRefPosition == nullptr || (currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit)); } @@ -8071,7 +8078,7 @@ void LinearScan::resolveRegisters() } // Handle the DummyDefs, updating the incoming var location. - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (currentRefPosition->refType != RefTypeDummyDef) { @@ -8097,12 +8104,12 @@ void LinearScan::resolveRegisters() } // The next RefPosition should be for the block. Move past it. - assert(currentRefPosition != refPositions.end()); + assert(currentRefPosition != nullptr); assert(currentRefPosition->refType == RefTypeBB); - ++currentRefPosition; + currentRefPosition = currentRefPosition->nextAllRefPosition; // Handle the RefPositions for the block - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (currentRefPosition->refType == RefTypeBB || currentRefPosition->refType == RefTypeDummyDef) { @@ -10170,7 +10177,7 @@ void LinearScan::dumpLsraStats(FILE* file) fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount); fprintf(file, "Total number of Intervals: %d\n", static_cast((intervals.size() == 0 ? 0 : (intervals.size() - 1)))); - fprintf(file, "Total number of RefPositions: %d\n", static_cast(refPositions.size() - 1)); + fprintf(file, "Total number of RefPositions: %d\n", static_cast(numRefPositions)); // compute total number of spill temps created unsigned numSpillTemps = 0; @@ -10877,7 +10884,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) // currentRefPosition is not used for LSRA_DUMP_PRE // We keep separate iterators for defs, so that we can print them // on the lhs of the dump - RefPositionIterator currentRefPosition = refPositions.begin(); + RefPosition* currentRefPosition = allRefPositionsHead; switch (mode) { @@ -10898,7 +10905,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) if (mode != LSRA_DUMP_PRE) { printf("Incoming Parameters: "); - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (currentRefPosition->refType == RefTypeBB) { @@ -10944,7 +10951,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) { bool printedBlockHeader = false; // We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { Interval* interval = nullptr; if (currentRefPosition->isIntervalRef()) @@ -11036,7 +11043,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) // and combining the fixed regs with their associated def or use bool killPrinted = false; RefPosition* lastFixedRegRefPos = nullptr; - for (; currentRefPosition != refPositions.end(); ++currentRefPosition) + for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition) { if (!(currentRefPosition->nodeLocation == tree->gtSeqNum || currentRefPosition->nodeLocation == tree->gtSeqNum + 1)) @@ -11443,10 +11450,10 @@ void LinearScan::dumpRegRecordHeader() maxNodeLocation = (maxNodeLocation == 0) ? 1 : maxNodeLocation; // corner case of a method with an infinite loop // without any GenTree nodes assert(maxNodeLocation >= 1); - assert(refPositions.size() >= 1); + assert(numRefPositions >= 1); int treeIdWidth = 9; /* '[XXXXX] '*/ int nodeLocationWidth = (int)log10((double)maxNodeLocation) + 1; - int refPositionWidth = (int)log10((double)refPositions.size()) + 1; + int refPositionWidth = (int)log10((double)numRefPositions) + 1; int refTypeInfoWidth = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */; int locationAndRPNumWidth = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */; int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth; @@ -11971,8 +11978,9 @@ void LinearScan::verifyFinalAllocation() BasicBlock* currentBlock = nullptr; GenTree* firstBlockEndResolutionNode = nullptr; LsraLocation currentLocation = MinLocation; - for (RefPosition& currentRefPosition : refPositions) + for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition) { + RefPosition& currentRefPosition = *pCurrentRefPosition; Interval* interval = nullptr; RegRecord* regRecord = nullptr; regNumber regNum = REG_NA; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5d49673ffc4be..eca5c31265716 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -444,9 +444,6 @@ inline bool RefTypeIsDef(RefType refType) typedef regNumberSmall* VarToRegMap; typedef jitstd::list IntervalList; -typedef jitstd::list RefPositionList; -typedef jitstd::list::iterator RefPositionIterator; -typedef jitstd::list::reverse_iterator RefPositionReverseIterator; class Referenceable { @@ -1704,13 +1701,20 @@ class LinearScan : public LinearScanInterface return enregisterLocalVars; } - // Ordered list of RefPositions - RefPositionList refPositions; + char* currentRefPositionsBuffer; + char* currentRefPositionsBufferEnd; + + RefPosition* allRefPositionsHead; + RefPosition* allRefPositionsTail; + RefPosition** allRefPositionsTailSlot; // Head of linked list of RefTypeKill ref positions RefPosition* killHead; // Tail slot of linked list of RefTypeKill ref positions RefPosition** killTail; +#ifdef DEBUG + unsigned numRefPositions = 0; +#endif // Per-block variable location mappings: an array indexed by block number that yields a // pointer to an array of regNumber, one per variable. @@ -1909,6 +1913,11 @@ class LinearScan : public LinearScanInterface regsBusyUntilKill = RBM_NONE; } + RefPosition* getAllRefPositionsTail() + { + return allRefPositionsTail; + } + bool conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition); // This method should not be used and is here to retain old behavior. @@ -2463,11 +2472,14 @@ class RefPosition Referenceable* referent; - // nextRefPosition is the next in code order. + // nextRefPosition is the next RP in code order associated with the referent. // Note that in either case there is no need for these to be doubly linked, as they // are only traversed in the forward direction, and are not moved. RefPosition* nextRefPosition; + RefPosition* nextAllRefPosition; + RefPosition* prevAllRefPosition; + // The remaining fields are common to both options union { @@ -2595,33 +2607,12 @@ class RefPosition LsraLocation nodeLocation, GenTree* treeNode, RefType refType DEBUG_ARG(GenTree* buildNode)) - : referent(nullptr) - , nextRefPosition(nullptr) - , treeNode(treeNode) - , registerAssignment(RBM_NONE) + : treeNode(treeNode) , bbNum(bbNum) , nodeLocation(nodeLocation) , refType(refType) - , multiRegIdx(0) -#ifdef TARGET_ARM64 - , needsConsecutive(false) - , regCount(0) -#endif - , lastUse(false) - , reload(false) - , spillAfter(false) - , singleDefSpill(false) - , writeThru(false) - , copyReg(false) - , moveReg(false) - , isPhysRegRef(false) - , isFixedRegRef(false) - , isLocalDefUse(false) - , delayRegFree(false) - , outOfOrder(false) #ifdef DEBUG , minRegCandidateCount(1) - , rpNum(0) , buildNode(buildNode) #endif { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9b8ce0559864f..0ba82f3940610 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2074,12 +2074,12 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwN for (GenTreeFieldList::Use& use : treeNode->AsFieldList()->Uses()) { RefPosition* restoreRefPos = nullptr; - RefPositionIterator prevRefPos = refPositions.backPosition(); + RefPosition* prevRefPos = allRefPositionsTail; currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0); // Check if restore RefPositions were created - RefPositionIterator tailRefPos = refPositions.backPosition(); + RefPosition* tailRefPos = allRefPositionsTail; assert(tailRefPos == currRefPos); prevRefPos++; if (prevRefPos != tailRefPos) @@ -2161,15 +2161,15 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwN } else { - RefPositionIterator refPositionMark = refPositions.backPosition(); + RefPosition* refPositionMark = allRefPositionsTail; int refPositionsAdded = BuildOperandUses(treeNode); if (rmwNode != nullptr) { // Check all the newly created RefPositions for delay free - RefPositionIterator iter = refPositionMark; + RefPosition* iter = refPositionMark; - for (iter++; iter != refPositions.end(); iter++) + for (iter = iter->nextAllRefPosition; iter != nullptr; iter = iter->nextAllRefPosition) { RefPosition* refPositionAdded = &(*iter); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 5e0473ca906a4..d4dcfc421eea0 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -178,14 +178,28 @@ Interval* LinearScan::newInterval(RegisterType theRegisterType) // RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType) { - refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType DEBUG_ARG(currBuildNode)); - RefPosition* newRP = &refPositions.back(); + if (currentRefPositionsBuffer == currentRefPositionsBufferEnd) + { + currentRefPositionsBuffer = new (compiler, CMK_LSRA_RefPosition) char[64 * sizeof(RefPosition)]; + currentRefPositionsBufferEnd = currentRefPositionsBuffer + 64 * sizeof(RefPosition); + memset(currentRefPositionsBuffer, 0, 64 * sizeof(RefPosition)); + } + + assert(currentRefPositionsBuffer + sizeof(RefPosition) <= currentRefPositionsBufferEnd); + + RefPosition* newRP = new (currentRefPositionsBuffer, jitstd::placement_t()) RefPosition(curBBNum, nodeLocation, treeNode, refType DEBUG_ARG(currBuildNode)); + currentRefPositionsBuffer += sizeof(RefPosition); + + newRP->prevAllRefPosition = allRefPositionsTail; + *allRefPositionsTailSlot = allRefPositionsTail = newRP; + allRefPositionsTailSlot = &newRP->nextAllRefPosition; + #ifdef DEBUG // Reset currBuildNode so we do not set it for subsequent refpositions belonging // to the same treeNode and hence, avoid printing it for every refposition inside // the allocation table. currBuildNode = nullptr; - newRP->rpNum = static_cast(refPositions.size() - 1); + newRP->rpNum = numRefPositions++; if (!enregisterLocalVars) { assert(!((refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || (refType == RefTypeDummyDef) || @@ -1768,7 +1782,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // If we are constraining the registers for allocation, we will modify all the RefPositions // we've built for this node after we've created them. In order to do that, we'll remember // the last RefPosition prior to those created for this node. - RefPositionIterator refPositionMark = refPositions.backPosition(); + RefPosition* refPositionMark = allRefPositionsTail; int oldDefListCount = defList.Count(); currBuildNode = tree; #endif // DEBUG @@ -1793,8 +1807,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // First, we count them. unsigned minRegCount = 0; - RefPositionIterator iter = refPositionMark; - for (iter++; iter != refPositions.end(); iter++) + RefPosition* iter = refPositionMark; + for (iter = iter->nextAllRefPosition; iter != nullptr; iter = iter->nextAllRefPosition) { RefPosition* newRefPosition = &(*iter); if (newRefPosition->isIntervalRef()) @@ -1838,7 +1852,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // add one less than the maximum number of registers args to 'minRegCount'. minRegCount += MAX_REG_ARG - 1; } - for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++) + for (refPositionMark = refPositionMark->nextAllRefPosition; refPositionMark != nullptr; refPositionMark = refPositionMark->nextAllRefPosition) { RefPosition* newRefPosition = &(*refPositionMark); unsigned minRegCountForRef = minRegCount; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 5a966679f6cf0..46ea43d3f3f40 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -901,8 +901,8 @@ int LinearScan::BuildSelect(GenTreeOp* select) GenTree* trueVal = select->gtOp1; GenTree* falseVal = select->gtOp2; - RefPositionIterator op1UsesPrev = refPositions.backPosition(); - assert(op1UsesPrev != refPositions.end()); + RefPosition* op1UsesPrev = getAllRefPositionsTail(); + assert(op1UsesPrev != nullptr); RefPosition* uncontainedTrueRP = nullptr; if (trueVal->isContained()) @@ -915,7 +915,7 @@ int LinearScan::BuildSelect(GenTreeOp* select) srcCount++; } - RefPositionIterator op2UsesPrev = refPositions.backPosition(); + RefPosition* op2UsesPrev = getAllRefPositionsTail(); RefPosition* uncontainedFalseRP = nullptr; if (falseVal->isContained()) @@ -959,19 +959,19 @@ int LinearScan::BuildSelect(GenTreeOp* select) // intervals for the ref positions we built above. It marks one of the uses // as delay freed when it finds interference (almost never). // - RefPositionIterator op1Use = op1UsesPrev; + RefPosition* op1Use = op1UsesPrev; while (op1Use != op2UsesPrev) { - ++op1Use; + op1Use = op1Use->nextAllRefPosition; if (op1Use->refType != RefTypeUse) { continue; } - RefPositionIterator op2Use = op2UsesPrev; - ++op2Use; - while (op2Use != refPositions.end()) + RefPosition* op2Use = op2UsesPrev; + op2Use = op2Use->nextAllRefPosition; + while (op2Use != nullptr) { if (op2Use->refType == RefTypeUse) { @@ -981,7 +981,7 @@ int LinearScan::BuildSelect(GenTreeOp* select) break; } - ++op2Use; + op2Use = op2Use->nextAllRefPosition; } } } From 032c76f6ced33132a5d6270eb98276932b3c5178 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 19 Jun 2024 18:34:55 +0200 Subject: [PATCH 2/2] JIT: Prezero memory for RefPosition and Interval Switch these to use intrusive linked lists and a prezeroed buffer. --- src/coreclr/jit/lsra.cpp | 54 +++++++++++++++-------------------- src/coreclr/jit/lsra.h | 51 ++++++++++++--------------------- src/coreclr/jit/lsrabuild.cpp | 28 ++++++++++++------ 3 files changed, 61 insertions(+), 72 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 8cb05159d27b1..711524f2c1bf6 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -799,13 +799,7 @@ LinearScanInterface* getLinearScanAllocator(Compiler* comp) // LinearScan::LinearScan(Compiler* theCompiler) : compiler(theCompiler) - , intervals(theCompiler->getAllocator(CMK_LSRA_Interval)) , allocationPassComplete(false) - , allRefPositionsHead(nullptr) - , allRefPositionsTail(nullptr) - , allRefPositionsTailSlot(&allRefPositionsHead) - , killHead(nullptr) - , killTail(&killHead) , listNodePool(theCompiler) { availableRegCount = ACTUAL_REG_COUNT; @@ -4998,11 +4992,10 @@ void LinearScan::allocateRegistersMinimal() DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegistersMinimal")); // at start, nothing is active except for register args - for (Interval& interval : intervals) + for (Interval* interval = intervalsHead; interval != nullptr; interval = interval->nextInterval) { - Interval* currentInterval = &interval; - currentInterval->recentRefPosition = nullptr; - assert(!currentInterval->isActive); + interval->recentRefPosition = nullptr; + assert(!interval->isActive); } resetRegState(); @@ -5619,12 +5612,12 @@ void LinearScan::allocateRegistersMinimal() // provide a Reset function (!) - we'll probably replace this so don't bother // adding it - for (Interval& interval : intervals) + for (Interval* interval = intervalsHead; interval != nullptr; interval = interval->nextInterval) { - if (interval.isActive) + if (interval->isActive) { printf("Active "); - interval.dump(this->compiler); + interval->dump(this->compiler); } } @@ -5646,17 +5639,16 @@ void LinearScan::allocateRegisters() DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegisters")); // at start, nothing is active except for register args - for (Interval& interval : intervals) + for (Interval* interval = intervalsHead; interval != nullptr; interval = interval->nextInterval) { - Interval* currentInterval = &interval; - currentInterval->recentRefPosition = nullptr; - currentInterval->isActive = false; - if (currentInterval->isLocalVar && !stressInitialParamReg()) + interval->recentRefPosition = nullptr; + interval->isActive = false; + if (interval->isLocalVar && !stressInitialParamReg()) { - LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); - if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr) + LclVarDsc* varDsc = interval->getLocalVar(compiler); + if (varDsc->lvIsRegArg && interval->firstRefPosition != nullptr) { - currentInterval->isActive = true; + interval->isActive = true; } } } @@ -6921,12 +6913,12 @@ void LinearScan::allocateRegisters() // provide a Reset function (!) - we'll probably replace this so don't bother // adding it - for (Interval& interval : intervals) + for (Interval* interval = intervalsHead; interval != nullptr; interval = interval->nextInterval) { - if (interval.isActive) + if (interval->isActive) { printf("Active "); - interval.dump(this->compiler); + interval->dump(this->compiler); } } @@ -10176,7 +10168,7 @@ void LinearScan::dumpLsraStats(FILE* file) fprintf(file, "Total Tracked Vars: %d\n", compiler->lvaTrackedCount); fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount); fprintf(file, "Total number of Intervals: %d\n", - static_cast((intervals.size() == 0 ? 0 : (intervals.size() - 1)))); + static_cast((numIntervals == 0 ? 0 : (numIntervals - 1)))); fprintf(file, "Total number of RefPositions: %d\n", static_cast(numRefPositions)); // compute total number of spill temps created @@ -10682,11 +10674,11 @@ void LinearScan::dumpDefList() void LinearScan::lsraDumpIntervals(const char* msg) { printf("\nLinear scan intervals %s:\n", msg); - for (Interval& interval : intervals) + for (Interval* interval = intervalsHead; interval != nullptr; interval = interval->nextInterval) { // only dump something if it has references // if (interval->firstRefPosition) - interval.dump(this->compiler); + interval->dump(this->compiler); } printf("\n"); @@ -11424,7 +11416,7 @@ void LinearScan::dumpRegRecordHeader() // First, determine the width of each register column (which holds a reg name in the // header, and an interval name in each subsequent row). - int intervalNumberWidth = (int)log10((double)intervals.size()) + 1; + int intervalNumberWidth = (int)log10((double)numIntervals) + 1; // The regColumnWidth includes the identifying character (I or V) and an 'i', 'p' or 'a' (inactive, // partially-spilled or active) regColumnWidth = intervalNumberWidth + 2; @@ -11967,10 +11959,10 @@ void LinearScan::verifyFinalAllocation() physRegRecord->assignedInterval = nullptr; } - for (Interval& interval : intervals) + for (Interval* interval = intervalsHead; interval != nullptr; interval = interval->nextInterval) { - interval.assignedReg = nullptr; - interval.physReg = REG_NA; + interval->assignedReg = nullptr; + interval->physReg = REG_NA; } DBEXEC(VERBOSE, dumpRegRecordTitle()); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index eca5c31265716..d5e050924b845 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1620,7 +1620,14 @@ class LinearScan : public LinearScanInterface RefPosition* activeRefPosition; #endif // DEBUG - IntervalList intervals; + char* currentIntervalsBuffer = nullptr; + char* currentIntervalsBufferEnd = nullptr; + + Interval* intervalsHead = nullptr; + Interval** intervalsTailSlot = &intervalsHead; +#ifdef DEBUG + unsigned numIntervals = 0; +#endif RegRecord physRegs[REG_COUNT]; @@ -1701,17 +1708,17 @@ class LinearScan : public LinearScanInterface return enregisterLocalVars; } - char* currentRefPositionsBuffer; - char* currentRefPositionsBufferEnd; + char* currentRefPositionsBuffer = nullptr; + char* currentRefPositionsBufferEnd = nullptr; - RefPosition* allRefPositionsHead; - RefPosition* allRefPositionsTail; - RefPosition** allRefPositionsTailSlot; + RefPosition* allRefPositionsHead = nullptr; + RefPosition* allRefPositionsTail = nullptr; + RefPosition** allRefPositionsTailSlot = &allRefPositionsHead; // Head of linked list of RefTypeKill ref positions - RefPosition* killHead; + RefPosition* killHead = nullptr; // Tail slot of linked list of RefTypeKill ref positions - RefPosition** killTail; + RefPosition** killTail = &killHead; #ifdef DEBUG unsigned numRefPositions = 0; #endif @@ -2181,32 +2188,7 @@ class Interval : public Referenceable Interval(RegisterType registerType, SingleTypeRegSet registerPreferences) : Referenceable(registerType) , registerPreferences(registerPreferences) - , registerAversion(RBM_NONE) - , relatedInterval(nullptr) - , assignedReg(nullptr) - , varNum(0) , physReg(REG_COUNT) - , isActive(false) - , isLocalVar(false) - , isSplit(false) - , isSpilled(false) - , isInternal(false) - , isStructField(false) - , isPromotedStruct(false) - , hasConflictingDefUse(false) - , hasInterferingUses(false) - , isSpecialPutArg(false) - , preferCalleeSave(false) - , isConstant(false) -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - , isUpperVector(false) - , isPartiallySpilled(false) -#endif - , isWriteThru(false) - , isSingleDef(false) -#ifdef DEBUG - , intervalIndex(0) -#endif { } @@ -2221,6 +2203,9 @@ class Interval : public Referenceable void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l); + // Next interval in linked list of all intervals + Interval* nextInterval; + // Fixed registers for which this Interval has a preference SingleTypeRegSet registerPreferences; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index d4dcfc421eea0..07a8f281b7cb3 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -153,11 +153,23 @@ void RefInfoListNodePool::ReturnNode(RefInfoListNode* listNode) // Interval* LinearScan::newInterval(RegisterType theRegisterType) { - intervals.emplace_back(theRegisterType, allRegs(theRegisterType)); - Interval* newInt = &intervals.back(); + if (currentIntervalsBuffer == currentIntervalsBufferEnd) + { + currentIntervalsBuffer = new (compiler, CMK_LSRA_Interval) char[32 * sizeof(Interval)]; + currentIntervalsBufferEnd = currentIntervalsBuffer + 32 * sizeof(Interval); + memset(currentIntervalsBuffer, 0, 32 * sizeof(Interval)); + } + + assert(currentIntervalsBuffer + sizeof(Interval) <= currentIntervalsBufferEnd); + + Interval* newInt = new (currentIntervalsBuffer, jitstd::placement_t()) Interval(theRegisterType, allRegs(theRegisterType)); + currentIntervalsBuffer += sizeof(Interval); + + *intervalsTailSlot = newInt; + intervalsTailSlot = &newInt->nextInterval; #ifdef DEBUG - newInt->intervalIndex = static_cast(intervals.size() - 1); + newInt->intervalIndex = static_cast(1); #endif // DEBUG DBEXEC(VERBOSE, newInt->dump(this->compiler)); @@ -1457,16 +1469,16 @@ void LinearScan::makeUpperVectorInterval(unsigned varIndex) Interval* LinearScan::getUpperVectorInterval(unsigned varIndex) { // TODO-Throughput: Consider creating a map from varIndex to upperVector interval. - for (Interval& interval : intervals) + for (Interval* interval = intervalsHead; interval != nullptr; interval = interval->nextInterval) { - if (interval.isLocalVar) + if (interval->isLocalVar) { continue; } - noway_assert(interval.isUpperVector); - if (interval.relatedInterval->getVarIndex(compiler) == varIndex) + noway_assert(interval->isUpperVector); + if (interval->relatedInterval->getVarIndex(compiler) == varIndex) { - return &interval; + return interval; } } unreached();