Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Visit blocks in RPO during LSRA #107927

Merged
merged 8 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
310 changes: 38 additions & 272 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,11 +913,10 @@ LinearScan::LinearScan(Compiler* theCompiler)
// Note that we don't initialize the bbVisitedSet until we do the first traversal
// This is so that any blocks that are added during the first traversal
// are accounted for (and we don't have BasicBlockEpoch issues).
blockSequencingDone = false;
blockSequence = nullptr;
blockSequenceWorkList = nullptr;
curBBSeqNum = 0;
bbSeqCount = 0;
blockSequencingDone = false;
blockSequence = nullptr;
curBBSeqNum = 0;
bbSeqCount = 0;

// Information about each block, including predecessor blocks used for variable locations at block entry.
blockInfo = nullptr;
Expand All @@ -926,39 +925,6 @@ LinearScan::LinearScan(Compiler* theCompiler)
tgtPrefUse = nullptr;
}

//------------------------------------------------------------------------
// getNextCandidateFromWorkList: Get the next candidate for block sequencing
//
// Arguments:
// None.
//
// Return Value:
// The next block to be placed in the sequence.
//
// Notes:
// This method currently always returns the next block in the list, and relies on having
// blocks added to the list only when they are "ready", and on the
// addToBlockSequenceWorkList() method to insert them in the proper order.
// However, a block may be in the list and already selected, if it was subsequently
// encountered as both a flow and layout successor of the most recently selected
// block.
//
BasicBlock* LinearScan::getNextCandidateFromWorkList()
{
BasicBlockList* nextWorkList = nullptr;
for (BasicBlockList* workList = blockSequenceWorkList; workList != nullptr; workList = nextWorkList)
{
nextWorkList = workList->next;
BasicBlock* candBlock = workList->block;
removeFromBlockSequenceWorkList(workList, nullptr);
if (!isBlockVisited(candBlock))
{
return candBlock;
}
}
return nullptr;
}

//------------------------------------------------------------------------
// setBlockSequence: Determine the block order for register allocation.
//
Expand All @@ -969,8 +935,7 @@ BasicBlock* LinearScan::getNextCandidateFromWorkList()
// None
//
// Notes:
// On return, the blockSequence array contains the blocks, in the order in which they
// will be allocated.
// On return, the blockSequence array contains the blocks in reverse post-order.
// This method clears the bbVisitedSet on LinearScan, and when it returns the set
// contains all the bbNums for the block.
//
Expand All @@ -986,19 +951,20 @@ void LinearScan::setBlockSequence()
// Initialize the "visited" blocks set.
bbVisitedSet = BlockSetOps::MakeEmpty(compiler);

BlockSet readySet(BlockSetOps::MakeEmpty(compiler));
BlockSet predSet(BlockSetOps::MakeEmpty(compiler));

assert(blockSequence == nullptr && bbSeqCount == 0);
blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
bbNumMaxBeforeResolution = compiler->fgBBNumMax;
blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
assert((blockSequence == nullptr) && (bbSeqCount == 0));
FlowGraphDfsTree* const dfsTree = compiler->fgComputeDfs</* useProfile */ true>();
blockSequence = dfsTree->GetPostOrder();
bbNumMaxBeforeResolution = compiler->fgBBNumMax;
blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];

assert(blockSequenceWorkList == nullptr);
// Flip the DFS traversal to get the reverse post-order traversal
// (this is the order in which blocks will be allocated)
for (unsigned left = 0, right = dfsTree->GetPostOrderCount() - 1; left < right; left++, right--)
{
std::swap(blockSequence[left], blockSequence[right]);
}

verifiedAllBBs = false;
hasCriticalEdges = false;
BasicBlock* nextBlock;
// We use a bbNum of 0 for entry RefPositions.
// The other information in blockInfo[0] will never be used.
blockInfo[0].weight = BB_UNITY_WEIGHT;
Expand All @@ -1009,14 +975,9 @@ void LinearScan::setBlockSequence()
}
#endif // TRACK_LSRA_STATS

JITDUMP("Start LSRA Block Sequence: \n");
for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock)
{
auto visitBlock = [this](BasicBlock* block) {
JITDUMP("Current block: " FMT_BB "\n", block->bbNum);
blockSequence[bbSeqCount] = block;
markBlockVisited(block);
bbSeqCount++;
nextBlock = nullptr;

// Initialize the blockInfo.
// predBBNum will be set later.
Expand Down Expand Up @@ -1087,73 +1048,41 @@ void LinearScan::setBlockSequence()
assert(!"Switch with single successor");
}

for (unsigned succIndex = 0; succIndex < numSuccs; succIndex++)
for (BasicBlock* const succ : block->Succs(compiler))
{
BasicBlock* succ = block->GetSucc(succIndex, compiler);
if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr)
if (checkForCriticalOutEdge && (succ->GetUniquePred(compiler) == nullptr))
{
blockInfo[block->bbNum].hasCriticalOutEdge = true;
hasCriticalEdges = true;
// We can stop checking now.
checkForCriticalOutEdge = false;
}

if (isTraversalLayoutOrder() || isBlockVisited(succ))
{
continue;
}

// We've now seen a predecessor, so add it to the work list and the "readySet".
// It will be inserted in the worklist according to the specified traversal order
// (i.e. pred-first or random, since layout order is handled above).
if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
{
JITDUMP("\tSucc block: " FMT_BB, succ->bbNum);
addToBlockSequenceWorkList(readySet, succ, predSet);
BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
break;
}
}
};

// For layout order, simply use bbNext
if (isTraversalLayoutOrder())
{
nextBlock = block->Next();
continue;
}
JITDUMP("Start LSRA Block Sequence: \n");
for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++)
{
visitBlock(blockSequence[i]);
}

while (nextBlock == nullptr)
// If the DFS didn't visit any blocks, add them to the end of blockSequence
if (dfsTree->GetPostOrderCount() < compiler->fgBBcount)
{
// Unvisited blocks are more likely to be at the back of the list, so iterate backwards
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the examples where there will be left over unvisited blocks?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any blocks that don't have FlowEdges into them won't be visited by the RPO traversal. For every example I looked at, this happens when we have throw blocks without any explicit predecessors. For example:

BBnum BBid ref try hnd preds           weight   [IL range]   [jump]                            [EH region]        [flags]
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
BB01 [0000]  1                             1    [000..00D)-> BB06(0.5),BB02(0.5)     ( cond )                     i LIR hascall idxlen
BB02 [0008]  1       BB01                  0.25 [00D..???)-> BB07(0.01),BB03(0.99)   ( cond )                     LIR internal idxlen
BB03 [0010]  2       BB02,BB05             3.96 [00D..01C)-> BB05(0.5),BB04(0.5)     ( cond )                     i LIR loophead idxlen bwd
BB04 [0006]  1       BB03                  0.99 [???..???)-> BB05(1)                 (always)                     i LIR idxlen
BB05 [0004]  2       BB03,BB04             3.96 [01C..026)-> BB03(0.5),BB06(0.5)     ( cond )                     i LIR hascall idxlen bwd
BB06 [0019]  3       BB01,BB05,BB09        1    [026..031)                           (return)                     i LIR gcsafe newobj
BB07 [0011]  2       BB02,BB09             0.04 [00D..01C)-> BB09(0.5),BB08(0.5)     ( cond )                     i LIR loophead idxlen bwd
BB08 [0013]  1       BB07                  0.01 [???..???)-> BB09(1)                 (always)                     i LIR idxlen
BB09 [0014]  2       BB07,BB08             0.04 [01C..026)-> BB07(0.5),BB06(0.5)     ( cond )                     i LIR hascall idxlen bwd
BB10 [0020]  0                             0    [???..???)                           (throw )                     i LIR rare keep internal
---------------------------------------------------------------------------------------------------------------------------------------------------------------------

BB10 would've been removed in previous phases if it weren't for the BBF_DONT_REMOVE flag set on it. StackLevelSetter, which runs right before LSRA, has some logic for removing throw blocks that are no longer needed, even if they're flagged with BBF_DONT_REMOVE. So if we still see these blocks by the time we get to LSRA, I guess they're needed?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edges to the throw helpers are added during codegen. So they will appear unreached but are needed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since such blocks will be cold (I assume), I am wondering if we should add logic to LSRA to quickly assign registers in such blocks, similar to the way we do for MinOpts, to save some TP time. I am not sure if there will be resolution added to the hot blocks because of this decision and if there is a way to add resolution too in cold blocks.

unsigned i = dfsTree->GetPostOrderCount();
for (BasicBlock* block = compiler->fgLastBB; i < compiler->fgBBcount; block = block->Prev())
{
nextBlock = getNextCandidateFromWorkList();

// TODO-Throughput: We would like to bypass this traversal if we know we've handled all
// the blocks - but fgBBcount does not appear to be updated when blocks are removed.
if (nextBlock == nullptr /* && bbSeqCount != compiler->fgBBcount*/ && !verifiedAllBBs)
{
// If we don't encounter all blocks by traversing the regular successor links, do a full
// traversal of all the blocks, and add them in layout order.
// This may include:
// - internal-only blocks which may not be in the flow graph
// - blocks that have become unreachable due to optimizations, but that are strongly
// connected (these are not removed)
// - EH blocks

for (BasicBlock* const seqBlock : compiler->Blocks())
{
if (!isBlockVisited(seqBlock))
{
JITDUMP("\tUnvisited block: " FMT_BB, seqBlock->bbNum);
addToBlockSequenceWorkList(readySet, seqBlock, predSet);
BlockSetOps::AddElemD(compiler, readySet, seqBlock->bbNum);
}
}
verifiedAllBBs = true;
}
else
assert(block != nullptr);
if (!isBlockVisited(block))
{
break;
visitBlock(block);
blockSequence[i++] = block;
}
}
}

bbSeqCount = compiler->fgBBcount;
blockSequencingDone = true;

#ifdef DEBUG
Expand Down Expand Up @@ -1199,169 +1128,6 @@ void LinearScan::setBlockSequence()
#endif
}

//------------------------------------------------------------------------
// compareBlocksForSequencing: Compare two basic blocks for sequencing order.
//
// Arguments:
// block1 - the first block for comparison
// block2 - the second block for comparison
// useBlockWeights - whether to use block weights for comparison
//
// Return Value:
// -1 if block1 is preferred.
// 0 if the blocks are equivalent.
// 1 if block2 is preferred.
//
// Notes:
// See addToBlockSequenceWorkList.
//
int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights)
{
if (useBlockWeights)
{
weight_t weight1 = block1->getBBWeight(compiler);
weight_t weight2 = block2->getBBWeight(compiler);

if (weight1 > weight2)
{
return -1;
}
else if (weight1 < weight2)
{
return 1;
}
}

// If weights are the same prefer LOWER bbnum
if (block1->bbNum < block2->bbNum)
{
return -1;
}
else if (block1->bbNum == block2->bbNum)
{
return 0;
}
else
{
return 1;
}
}

//------------------------------------------------------------------------
// addToBlockSequenceWorkList: Add a BasicBlock to the work list for sequencing.
//
// Arguments:
// sequencedBlockSet - the set of blocks that are already sequenced
// block - the new block to be added
// predSet - the buffer to save predecessors set. A block set allocated by the caller used here as a
// temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block
// set with every call to this function
//
// Return Value:
// None.
//
// Notes:
// The first block in the list will be the next one to be sequenced, as soon
// as we encounter a block whose successors have all been sequenced, in pred-first
// order, or the very next block if we are traversing in random order (once implemented).
// This method uses a comparison method to determine the order in which to place
// the blocks in the list. This method queries whether all predecessors of the
// block are sequenced at the time it is added to the list and if so uses block weights
// for inserting the block. A block is never inserted ahead of its predecessors.
// A block at the time of insertion may not have all its predecessors sequenced, in
// which case it will be sequenced based on its block number. Once a block is inserted,
// its priority\order will not be changed later once its remaining predecessors are
// sequenced. This would mean that work list may not be sorted entirely based on
// block weights alone.
//
// Note also that, when random traversal order is implemented, this method
// should insert the blocks into the list in random order, so that we can always
// simply select the first block in the list.
//
void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet)
{
// The block that is being added is not already sequenced
assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));

// Get predSet of block
BlockSetOps::ClearD(compiler, predSet);
for (BasicBlock* const predBlock : block->PredBlocks())
{
BlockSetOps::AddElemD(compiler, predSet, predBlock->bbNum);
}

// If either a rarely run block or all its preds are already sequenced, use block's weight to sequence
bool useBlockWeight = block->isRunRarely() || BlockSetOps::IsSubset(compiler, sequencedBlockSet, predSet);
JITDUMP(", Criteria: %s", useBlockWeight ? "weight" : "bbNum");

BasicBlockList* prevNode = nullptr;
BasicBlockList* nextNode = blockSequenceWorkList;
while (nextNode != nullptr)
{
int seqResult;

if (nextNode->block->isRunRarely())
{
// If the block that is yet to be sequenced is a rarely run block, always use block weights for sequencing
seqResult = compareBlocksForSequencing(nextNode->block, block, true);
}
else if (BlockSetOps::IsMember(compiler, predSet, nextNode->block->bbNum))
{
// always prefer unsequenced pred blocks
seqResult = -1;
}
else
{
seqResult = compareBlocksForSequencing(nextNode->block, block, useBlockWeight);
}

if (seqResult > 0)
{
break;
}

prevNode = nextNode;
nextNode = nextNode->next;
}

BasicBlockList* newListNode = new (compiler, CMK_LSRA) BasicBlockList(block, nextNode);
if (prevNode == nullptr)
{
blockSequenceWorkList = newListNode;
}
else
{
prevNode->next = newListNode;
}

#ifdef DEBUG
nextNode = blockSequenceWorkList;
JITDUMP(", Worklist: [");
while (nextNode != nullptr)
{
JITDUMP(FMT_BB " ", nextNode->block->bbNum);
nextNode = nextNode->next;
}
JITDUMP("]\n");
#endif
}

void LinearScan::removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode)
{
if (listNode == blockSequenceWorkList)
{
assert(prevNode == nullptr);
blockSequenceWorkList = listNode->next;
}
else
{
assert(prevNode != nullptr && prevNode->next == listNode);
prevNode->next = listNode->next;
}
// TODO-Cleanup: consider merging Compiler::BlockListNode and BasicBlockList
// compiler->FreeBlockListNode(listNode);
}

// Initialize the block order for allocation (called each time a new traversal begins).
BasicBlock* LinearScan::startBlockSequence()
{
Expand Down
Loading
Loading