Skip to content

Commit

Permalink
Prezero RefPosition memory
Browse files Browse the repository at this point in the history
  • Loading branch information
jakobbotsch committed Jun 19, 2024
1 parent a5c1c9f commit e4fe7a6
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 72 deletions.
54 changes: 31 additions & 23 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,9 @@ LinearScan::LinearScan(Compiler* theCompiler)
: compiler(theCompiler)
, intervals(theCompiler->getAllocator(CMK_LSRA_Interval))
, allocationPassComplete(false)
, refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition))
, allRefPositionsHead(nullptr)
, allRefPositionsTail(nullptr)
, allRefPositionsTailSlot(&allRefPositionsHead)
, killHead(nullptr)
, killTail(&killHead)
, listNodePool(theCompiler)
Expand All @@ -827,6 +829,8 @@ LinearScan::LinearScan(Compiler* theCompiler)

firstColdLoc = MaxLocation;

currentRefPositionsBuffer = currentRefPositionsBufferEnd = nullptr;

#ifdef DEBUG
maxNodeLocation = 0;
consecutiveRegistersLocation = 0;
Expand Down Expand Up @@ -2457,8 +2461,8 @@ void LinearScan::checkLastUses(BasicBlock* block)
VARSET_TP computedLive(VarSetOps::MakeCopy(compiler, block->bbLiveOut));

bool foundDiff = false;
RefPositionReverseIterator currentRefPosition = refPositions.rbegin();
for (; currentRefPosition->refType != RefTypeBB; currentRefPosition++)
RefPosition* currentRefPosition = getAllRefPositionsTail();
for (; currentRefPosition->refType != RefTypeBB; currentRefPosition = currentRefPosition->prevAllRefPosition)
{
// We should never see ParamDefs or ZeroInits within a basic block.
assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit);
Expand Down Expand Up @@ -2517,7 +2521,7 @@ void LinearScan::checkLastUses(BasicBlock* block)
}
}

assert(currentRefPosition != refPositions.rend());
assert(currentRefPosition != nullptr);
}

VARSET_TP liveInNotComputedLive(VarSetOps::Diff(compiler, block->bbLiveIn, computedLive));
Expand Down Expand Up @@ -4862,9 +4866,9 @@ void LinearScan::dumpRefPositions(const char* str)
printf("------------\n");
printf("REFPOSITIONS %s: \n", str);
printf("------------\n");
for (RefPosition& refPos : refPositions)
for (RefPosition* refPos = allRefPositionsHead; refPos != nullptr; refPos = refPos->nextAllRefPosition)
{
refPos.dump(this);
refPos->dump(this);
}
}
#endif // DEBUG
Expand Down Expand Up @@ -5049,8 +5053,10 @@ void LinearScan::allocateRegistersMinimal()

bool handledBlockEnd = false;

for (RefPosition& currentRefPosition : refPositions)
for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition)
{
RefPosition& currentRefPosition = *pCurrentRefPosition;

// TODO: Can we combine this with the freeing of registers below? It might
// mess with the dump, since this was previously being done before the call below
// to dumpRegRecords.
Expand Down Expand Up @@ -5733,8 +5739,9 @@ void LinearScan::allocateRegisters()

bool handledBlockEnd = false;

for (RefPosition& currentRefPosition : refPositions)
for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition)
{
RefPosition& currentRefPosition = *pCurrentRefPosition;
RefPosition* nextRefPosition = currentRefPosition.nextRefPosition;

// TODO: Can we combine this with the freeing of registers below? It might
Expand Down Expand Up @@ -8018,12 +8025,12 @@ void LinearScan::resolveRegisters()
}

// handle incoming arguments and special temps
RefPositionIterator currentRefPosition = refPositions.begin();
RefPosition* currentRefPosition = allRefPositionsHead;

if (localVarsEnregistered)
{
VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum];
for (; currentRefPosition != refPositions.end(); ++currentRefPosition)
for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition)
{
if (currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit)
{
Expand All @@ -8050,7 +8057,7 @@ void LinearScan::resolveRegisters()
}
else
{
assert(currentRefPosition == refPositions.end() ||
assert(currentRefPosition == nullptr ||
(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit));
}

Expand All @@ -8071,7 +8078,7 @@ void LinearScan::resolveRegisters()
}

// Handle the DummyDefs, updating the incoming var location.
for (; currentRefPosition != refPositions.end(); ++currentRefPosition)
for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition)
{
if (currentRefPosition->refType != RefTypeDummyDef)
{
Expand All @@ -8097,12 +8104,12 @@ void LinearScan::resolveRegisters()
}

// The next RefPosition should be for the block. Move past it.
assert(currentRefPosition != refPositions.end());
assert(currentRefPosition != nullptr);
assert(currentRefPosition->refType == RefTypeBB);
++currentRefPosition;
currentRefPosition = currentRefPosition->nextAllRefPosition;

// Handle the RefPositions for the block
for (; currentRefPosition != refPositions.end(); ++currentRefPosition)
for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition)
{
if (currentRefPosition->refType == RefTypeBB || currentRefPosition->refType == RefTypeDummyDef)
{
Expand Down Expand Up @@ -10170,7 +10177,7 @@ void LinearScan::dumpLsraStats(FILE* file)
fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount);
fprintf(file, "Total number of Intervals: %d\n",
static_cast<unsigned>((intervals.size() == 0 ? 0 : (intervals.size() - 1))));
fprintf(file, "Total number of RefPositions: %d\n", static_cast<unsigned>(refPositions.size() - 1));
fprintf(file, "Total number of RefPositions: %d\n", static_cast<unsigned>(numRefPositions));

// compute total number of spill temps created
unsigned numSpillTemps = 0;
Expand Down Expand Up @@ -10877,7 +10884,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
// currentRefPosition is not used for LSRA_DUMP_PRE
// We keep separate iterators for defs, so that we can print them
// on the lhs of the dump
RefPositionIterator currentRefPosition = refPositions.begin();
RefPosition* currentRefPosition = allRefPositionsHead;

switch (mode)
{
Expand All @@ -10898,7 +10905,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
if (mode != LSRA_DUMP_PRE)
{
printf("Incoming Parameters: ");
for (; currentRefPosition != refPositions.end(); ++currentRefPosition)
for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition)
{
if (currentRefPosition->refType == RefTypeBB)
{
Expand Down Expand Up @@ -10944,7 +10951,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
{
bool printedBlockHeader = false;
// We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks
for (; currentRefPosition != refPositions.end(); ++currentRefPosition)
for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition)
{
Interval* interval = nullptr;
if (currentRefPosition->isIntervalRef())
Expand Down Expand Up @@ -11036,7 +11043,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
// and combining the fixed regs with their associated def or use
bool killPrinted = false;
RefPosition* lastFixedRegRefPos = nullptr;
for (; currentRefPosition != refPositions.end(); ++currentRefPosition)
for (; currentRefPosition != nullptr; currentRefPosition = currentRefPosition->nextAllRefPosition)
{
if (!(currentRefPosition->nodeLocation == tree->gtSeqNum ||
currentRefPosition->nodeLocation == tree->gtSeqNum + 1))
Expand Down Expand Up @@ -11443,10 +11450,10 @@ void LinearScan::dumpRegRecordHeader()
maxNodeLocation = (maxNodeLocation == 0) ? 1 : maxNodeLocation; // corner case of a method with an infinite loop
// without any GenTree nodes
assert(maxNodeLocation >= 1);
assert(refPositions.size() >= 1);
assert(numRefPositions >= 1);
int treeIdWidth = 9; /* '[XXXXX] '*/
int nodeLocationWidth = (int)log10((double)maxNodeLocation) + 1;
int refPositionWidth = (int)log10((double)refPositions.size()) + 1;
int refPositionWidth = (int)log10((double)numRefPositions) + 1;
int refTypeInfoWidth = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */;
int locationAndRPNumWidth = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */;
int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth;
Expand Down Expand Up @@ -11971,8 +11978,9 @@ void LinearScan::verifyFinalAllocation()
BasicBlock* currentBlock = nullptr;
GenTree* firstBlockEndResolutionNode = nullptr;
LsraLocation currentLocation = MinLocation;
for (RefPosition& currentRefPosition : refPositions)
for (RefPosition* pCurrentRefPosition = allRefPositionsHead; pCurrentRefPosition != nullptr; pCurrentRefPosition = pCurrentRefPosition->nextAllRefPosition)
{
RefPosition& currentRefPosition = *pCurrentRefPosition;
Interval* interval = nullptr;
RegRecord* regRecord = nullptr;
regNumber regNum = REG_NA;
Expand Down
47 changes: 19 additions & 28 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,6 @@ inline bool RefTypeIsDef(RefType refType)
typedef regNumberSmall* VarToRegMap;

typedef jitstd::list<Interval> IntervalList;
typedef jitstd::list<RefPosition> RefPositionList;
typedef jitstd::list<RefPosition>::iterator RefPositionIterator;
typedef jitstd::list<RefPosition>::reverse_iterator RefPositionReverseIterator;

class Referenceable
{
Expand Down Expand Up @@ -1704,13 +1701,20 @@ class LinearScan : public LinearScanInterface
return enregisterLocalVars;
}

// Ordered list of RefPositions
RefPositionList refPositions;
char* currentRefPositionsBuffer;
char* currentRefPositionsBufferEnd;

RefPosition* allRefPositionsHead;
RefPosition* allRefPositionsTail;
RefPosition** allRefPositionsTailSlot;

// Head of linked list of RefTypeKill ref positions
RefPosition* killHead;
// Tail slot of linked list of RefTypeKill ref positions
RefPosition** killTail;
#ifdef DEBUG
unsigned numRefPositions = 0;
#endif

// Per-block variable location mappings: an array indexed by block number that yields a
// pointer to an array of regNumber, one per variable.
Expand Down Expand Up @@ -1909,6 +1913,11 @@ class LinearScan : public LinearScanInterface
regsBusyUntilKill = RBM_NONE;
}

RefPosition* getAllRefPositionsTail()
{
return allRefPositionsTail;
}

bool conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition);

// This method should not be used and is here to retain old behavior.
Expand Down Expand Up @@ -2463,11 +2472,14 @@ class RefPosition

Referenceable* referent;

// nextRefPosition is the next in code order.
// nextRefPosition is the next RP in code order associated with the referent.
// Note that in either case there is no need for these to be doubly linked, as they
// are only traversed in the forward direction, and are not moved.
RefPosition* nextRefPosition;

RefPosition* nextAllRefPosition;
RefPosition* prevAllRefPosition;

// The remaining fields are common to both options
union
{
Expand Down Expand Up @@ -2595,33 +2607,12 @@ class RefPosition
LsraLocation nodeLocation,
GenTree* treeNode,
RefType refType DEBUG_ARG(GenTree* buildNode))
: referent(nullptr)
, nextRefPosition(nullptr)
, treeNode(treeNode)
, registerAssignment(RBM_NONE)
: treeNode(treeNode)
, bbNum(bbNum)
, nodeLocation(nodeLocation)
, refType(refType)
, multiRegIdx(0)
#ifdef TARGET_ARM64
, needsConsecutive(false)
, regCount(0)
#endif
, lastUse(false)
, reload(false)
, spillAfter(false)
, singleDefSpill(false)
, writeThru(false)
, copyReg(false)
, moveReg(false)
, isPhysRegRef(false)
, isFixedRegRef(false)
, isLocalDefUse(false)
, delayRegFree(false)
, outOfOrder(false)
#ifdef DEBUG
, minRegCandidateCount(1)
, rpNum(0)
, buildNode(buildNode)
#endif
{
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2074,12 +2074,12 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwN
for (GenTreeFieldList::Use& use : treeNode->AsFieldList()->Uses())
{
RefPosition* restoreRefPos = nullptr;
RefPositionIterator prevRefPos = refPositions.backPosition();
RefPosition* prevRefPos = allRefPositionsTail;

currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0);

// Check if restore RefPositions were created
RefPositionIterator tailRefPos = refPositions.backPosition();
RefPosition* tailRefPos = allRefPositionsTail;
assert(tailRefPos == currRefPos);
prevRefPos++;
if (prevRefPos != tailRefPos)
Expand Down Expand Up @@ -2161,15 +2161,15 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwN
}
else
{
RefPositionIterator refPositionMark = refPositions.backPosition();
RefPosition* refPositionMark = allRefPositionsTail;
int refPositionsAdded = BuildOperandUses(treeNode);

if (rmwNode != nullptr)
{
// Check all the newly created RefPositions for delay free
RefPositionIterator iter = refPositionMark;
RefPosition* iter = refPositionMark;

for (iter++; iter != refPositions.end(); iter++)
for (iter = iter->nextAllRefPosition; iter != nullptr; iter = iter->nextAllRefPosition)
{
RefPosition* refPositionAdded = &(*iter);

Expand Down
28 changes: 21 additions & 7 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,28 @@ Interval* LinearScan::newInterval(RegisterType theRegisterType)
//
RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
{
refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType DEBUG_ARG(currBuildNode));
RefPosition* newRP = &refPositions.back();
if (currentRefPositionsBuffer == currentRefPositionsBufferEnd)
{
currentRefPositionsBuffer = new (compiler, CMK_LSRA_RefPosition) char[64 * sizeof(RefPosition)];
currentRefPositionsBufferEnd = currentRefPositionsBuffer + 64 * sizeof(RefPosition);
memset(currentRefPositionsBuffer, 0, 64 * sizeof(RefPosition));
}

assert(currentRefPositionsBuffer + sizeof(RefPosition) <= currentRefPositionsBufferEnd);

RefPosition* newRP = new (currentRefPositionsBuffer, jitstd::placement_t()) RefPosition(curBBNum, nodeLocation, treeNode, refType DEBUG_ARG(currBuildNode));
currentRefPositionsBuffer += sizeof(RefPosition);

newRP->prevAllRefPosition = allRefPositionsTail;
*allRefPositionsTailSlot = allRefPositionsTail = newRP;
allRefPositionsTailSlot = &newRP->nextAllRefPosition;

#ifdef DEBUG
// Reset currBuildNode so we do not set it for subsequent refpositions belonging
// to the same treeNode and hence, avoid printing it for every refposition inside
// the allocation table.
currBuildNode = nullptr;
newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1);
newRP->rpNum = numRefPositions++;
if (!enregisterLocalVars)
{
assert(!((refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || (refType == RefTypeDummyDef) ||
Expand Down Expand Up @@ -1768,7 +1782,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc
// If we are constraining the registers for allocation, we will modify all the RefPositions
// we've built for this node after we've created them. In order to do that, we'll remember
// the last RefPosition prior to those created for this node.
RefPositionIterator refPositionMark = refPositions.backPosition();
RefPosition* refPositionMark = allRefPositionsTail;
int oldDefListCount = defList.Count();
currBuildNode = tree;
#endif // DEBUG
Expand All @@ -1793,8 +1807,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc
// First, we count them.
unsigned minRegCount = 0;

RefPositionIterator iter = refPositionMark;
for (iter++; iter != refPositions.end(); iter++)
RefPosition* iter = refPositionMark;
for (iter = iter->nextAllRefPosition; iter != nullptr; iter = iter->nextAllRefPosition)
{
RefPosition* newRefPosition = &(*iter);
if (newRefPosition->isIntervalRef())
Expand Down Expand Up @@ -1838,7 +1852,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc
// add one less than the maximum number of registers args to 'minRegCount'.
minRegCount += MAX_REG_ARG - 1;
}
for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++)
for (refPositionMark = refPositionMark->nextAllRefPosition; refPositionMark != nullptr; refPositionMark = refPositionMark->nextAllRefPosition)
{
RefPosition* newRefPosition = &(*refPositionMark);
unsigned minRegCountForRef = minRegCount;
Expand Down
Loading

0 comments on commit e4fe7a6

Please sign in to comment.