From 35fb3f31e555ac3777926eab871e1a6868ffd08f Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 16 Sep 2024 16:05:12 -0400 Subject: [PATCH 1/7] Visit blocks in RPO during LSRA --- src/coreclr/jit/lsra.cpp | 74 ++++------------------------------- src/coreclr/jit/lsrabuild.cpp | 2 +- 2 files changed, 8 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index dd766c51e3da7..cf4c0a90a34fc 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -986,19 +986,15 @@ void LinearScan::setBlockSequence() // Initialize the "visited" blocks set. bbVisitedSet = BlockSetOps::MakeEmpty(compiler); - BlockSet readySet(BlockSetOps::MakeEmpty(compiler)); - BlockSet predSet(BlockSetOps::MakeEmpty(compiler)); - assert(blockSequence == nullptr && bbSeqCount == 0); - blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount]; + FlowGraphDfsTree* const dfsTree = compiler->fgComputeDfs(); + blockSequence = new (compiler, CMK_LSRA) BasicBlock*[dfsTree->GetPostOrderCount()]; bbNumMaxBeforeResolution = compiler->fgBBNumMax; blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; assert(blockSequenceWorkList == nullptr); - verifiedAllBBs = false; hasCriticalEdges = false; - BasicBlock* nextBlock; // We use a bbNum of 0 for entry RefPositions. // The other information in blockInfo[0] will never be used. blockInfo[0].weight = BB_UNITY_WEIGHT; @@ -1010,13 +1006,13 @@ void LinearScan::setBlockSequence() #endif // TRACK_LSRA_STATS JITDUMP("Start LSRA Block Sequence: \n"); - for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock) + for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) { + BasicBlock* const block = dfsTree->GetPostOrder(i - 1); JITDUMP("Current block: " FMT_BB "\n", block->bbNum); blockSequence[bbSeqCount] = block; markBlockVisited(block); bbSeqCount++; - nextBlock = nullptr; // Initialize the blockInfo. // predBBNum will be set later. @@ -1087,69 +1083,13 @@ void LinearScan::setBlockSequence() assert(!"Switch with single successor"); } - for (unsigned succIndex = 0; succIndex < numSuccs; succIndex++) + for (BasicBlock* const succ : block->Succs(compiler)) { - BasicBlock* succ = block->GetSucc(succIndex, compiler); - if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr) + if (checkForCriticalOutEdge && (succ->GetUniquePred(compiler) == nullptr)) { blockInfo[block->bbNum].hasCriticalOutEdge = true; hasCriticalEdges = true; // We can stop checking now. - checkForCriticalOutEdge = false; - } - - if (isTraversalLayoutOrder() || isBlockVisited(succ)) - { - continue; - } - - // We've now seen a predecessor, so add it to the work list and the "readySet". - // It will be inserted in the worklist according to the specified traversal order - // (i.e. pred-first or random, since layout order is handled above). - if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum)) - { - JITDUMP("\tSucc block: " FMT_BB, succ->bbNum); - addToBlockSequenceWorkList(readySet, succ, predSet); - BlockSetOps::AddElemD(compiler, readySet, succ->bbNum); - } - } - - // For layout order, simply use bbNext - if (isTraversalLayoutOrder()) - { - nextBlock = block->Next(); - continue; - } - - while (nextBlock == nullptr) - { - nextBlock = getNextCandidateFromWorkList(); - - // TODO-Throughput: We would like to bypass this traversal if we know we've handled all - // the blocks - but fgBBcount does not appear to be updated when blocks are removed. - if (nextBlock == nullptr /* && bbSeqCount != compiler->fgBBcount*/ && !verifiedAllBBs) - { - // If we don't encounter all blocks by traversing the regular successor links, do a full - // traversal of all the blocks, and add them in layout order. - // This may include: - // - internal-only blocks which may not be in the flow graph - // - blocks that have become unreachable due to optimizations, but that are strongly - // connected (these are not removed) - // - EH blocks - - for (BasicBlock* const seqBlock : compiler->Blocks()) - { - if (!isBlockVisited(seqBlock)) - { - JITDUMP("\tUnvisited block: " FMT_BB, seqBlock->bbNum); - addToBlockSequenceWorkList(readySet, seqBlock, predSet); - BlockSetOps::AddElemD(compiler, readySet, seqBlock->bbNum); - } - } - verifiedAllBBs = true; - } - else - { break; } } @@ -1160,7 +1100,7 @@ void LinearScan::setBlockSequence() // Make sure that we've visited all the blocks. for (BasicBlock* const block : compiler->Blocks()) { - assert(isBlockVisited(block)); + assert(isBlockVisited(block) || (block->bbPreds == nullptr)); } JITDUMP("Final LSRA Block Sequence:\n"); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3c390ee213941..3303cbe46c115 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2853,7 +2853,7 @@ void LinearScan::buildIntervals() // Make sure we don't have any blocks that were not visited for (BasicBlock* const block : compiler->Blocks()) { - assert(isBlockVisited(block)); + assert(isBlockVisited(block) || (block->bbPreds == nullptr)); } if (VERBOSE) From c86c069c9d75e3174ed27b1c955387811f0c92ec Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 16 Sep 2024 16:19:25 -0400 Subject: [PATCH 2/7] Dead code removal --- src/coreclr/jit/lsra.cpp | 199 --------------------------------------- src/coreclr/jit/lsra.h | 8 -- 2 files changed, 207 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index cf4c0a90a34fc..cc9ac931af601 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -915,7 +915,6 @@ LinearScan::LinearScan(Compiler* theCompiler) // are accounted for (and we don't have BasicBlockEpoch issues). blockSequencingDone = false; blockSequence = nullptr; - blockSequenceWorkList = nullptr; curBBSeqNum = 0; bbSeqCount = 0; @@ -926,39 +925,6 @@ LinearScan::LinearScan(Compiler* theCompiler) tgtPrefUse = nullptr; } -//------------------------------------------------------------------------ -// getNextCandidateFromWorkList: Get the next candidate for block sequencing -// -// Arguments: -// None. -// -// Return Value: -// The next block to be placed in the sequence. -// -// Notes: -// This method currently always returns the next block in the list, and relies on having -// blocks added to the list only when they are "ready", and on the -// addToBlockSequenceWorkList() method to insert them in the proper order. -// However, a block may be in the list and already selected, if it was subsequently -// encountered as both a flow and layout successor of the most recently selected -// block. -// -BasicBlock* LinearScan::getNextCandidateFromWorkList() -{ - BasicBlockList* nextWorkList = nullptr; - for (BasicBlockList* workList = blockSequenceWorkList; workList != nullptr; workList = nextWorkList) - { - nextWorkList = workList->next; - BasicBlock* candBlock = workList->block; - removeFromBlockSequenceWorkList(workList, nullptr); - if (!isBlockVisited(candBlock)) - { - return candBlock; - } - } - return nullptr; -} - //------------------------------------------------------------------------ // setBlockSequence: Determine the block order for register allocation. // @@ -992,8 +958,6 @@ void LinearScan::setBlockSequence() bbNumMaxBeforeResolution = compiler->fgBBNumMax; blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; - assert(blockSequenceWorkList == nullptr); - hasCriticalEdges = false; // We use a bbNum of 0 for entry RefPositions. // The other information in blockInfo[0] will never be used. @@ -1139,169 +1103,6 @@ void LinearScan::setBlockSequence() #endif } -//------------------------------------------------------------------------ -// compareBlocksForSequencing: Compare two basic blocks for sequencing order. -// -// Arguments: -// block1 - the first block for comparison -// block2 - the second block for comparison -// useBlockWeights - whether to use block weights for comparison -// -// Return Value: -// -1 if block1 is preferred. -// 0 if the blocks are equivalent. -// 1 if block2 is preferred. -// -// Notes: -// See addToBlockSequenceWorkList. -// -int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights) -{ - if (useBlockWeights) - { - weight_t weight1 = block1->getBBWeight(compiler); - weight_t weight2 = block2->getBBWeight(compiler); - - if (weight1 > weight2) - { - return -1; - } - else if (weight1 < weight2) - { - return 1; - } - } - - // If weights are the same prefer LOWER bbnum - if (block1->bbNum < block2->bbNum) - { - return -1; - } - else if (block1->bbNum == block2->bbNum) - { - return 0; - } - else - { - return 1; - } -} - -//------------------------------------------------------------------------ -// addToBlockSequenceWorkList: Add a BasicBlock to the work list for sequencing. -// -// Arguments: -// sequencedBlockSet - the set of blocks that are already sequenced -// block - the new block to be added -// predSet - the buffer to save predecessors set. A block set allocated by the caller used here as a -// temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block -// set with every call to this function -// -// Return Value: -// None. -// -// Notes: -// The first block in the list will be the next one to be sequenced, as soon -// as we encounter a block whose successors have all been sequenced, in pred-first -// order, or the very next block if we are traversing in random order (once implemented). -// This method uses a comparison method to determine the order in which to place -// the blocks in the list. This method queries whether all predecessors of the -// block are sequenced at the time it is added to the list and if so uses block weights -// for inserting the block. A block is never inserted ahead of its predecessors. -// A block at the time of insertion may not have all its predecessors sequenced, in -// which case it will be sequenced based on its block number. Once a block is inserted, -// its priority\order will not be changed later once its remaining predecessors are -// sequenced. This would mean that work list may not be sorted entirely based on -// block weights alone. -// -// Note also that, when random traversal order is implemented, this method -// should insert the blocks into the list in random order, so that we can always -// simply select the first block in the list. -// -void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet) -{ - // The block that is being added is not already sequenced - assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum)); - - // Get predSet of block - BlockSetOps::ClearD(compiler, predSet); - for (BasicBlock* const predBlock : block->PredBlocks()) - { - BlockSetOps::AddElemD(compiler, predSet, predBlock->bbNum); - } - - // If either a rarely run block or all its preds are already sequenced, use block's weight to sequence - bool useBlockWeight = block->isRunRarely() || BlockSetOps::IsSubset(compiler, sequencedBlockSet, predSet); - JITDUMP(", Criteria: %s", useBlockWeight ? "weight" : "bbNum"); - - BasicBlockList* prevNode = nullptr; - BasicBlockList* nextNode = blockSequenceWorkList; - while (nextNode != nullptr) - { - int seqResult; - - if (nextNode->block->isRunRarely()) - { - // If the block that is yet to be sequenced is a rarely run block, always use block weights for sequencing - seqResult = compareBlocksForSequencing(nextNode->block, block, true); - } - else if (BlockSetOps::IsMember(compiler, predSet, nextNode->block->bbNum)) - { - // always prefer unsequenced pred blocks - seqResult = -1; - } - else - { - seqResult = compareBlocksForSequencing(nextNode->block, block, useBlockWeight); - } - - if (seqResult > 0) - { - break; - } - - prevNode = nextNode; - nextNode = nextNode->next; - } - - BasicBlockList* newListNode = new (compiler, CMK_LSRA) BasicBlockList(block, nextNode); - if (prevNode == nullptr) - { - blockSequenceWorkList = newListNode; - } - else - { - prevNode->next = newListNode; - } - -#ifdef DEBUG - nextNode = blockSequenceWorkList; - JITDUMP(", Worklist: ["); - while (nextNode != nullptr) - { - JITDUMP(FMT_BB " ", nextNode->block->bbNum); - nextNode = nextNode->next; - } - JITDUMP("]\n"); -#endif -} - -void LinearScan::removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode) -{ - if (listNode == blockSequenceWorkList) - { - assert(prevNode == nullptr); - blockSequenceWorkList = listNode->next; - } - else - { - assert(prevNode != nullptr && prevNode->next == listNode); - prevNode->next = listNode->next; - } - // TODO-Cleanup: consider merging Compiler::BlockListNode and BasicBlockList - // compiler->FreeBlockListNode(listNode); -} - // Initialize the block order for allocation (called each time a new traversal begins). BasicBlock* LinearScan::startBlockSequence() { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 6ae08910afc10..e1cc3868e3b7a 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1659,20 +1659,12 @@ class LinearScan : public LinearScanInterface // The order in which the blocks will be allocated. // This is any array of BasicBlock*, in the order in which they should be traversed. BasicBlock** blockSequence; - // The verifiedAllBBs flag indicates whether we have verified that all BBs have been - // included in the blockSeuqence above, during setBlockSequence(). - bool verifiedAllBBs; void setBlockSequence(); - int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights); - BasicBlockList* blockSequenceWorkList; bool blockSequencingDone; #ifdef DEBUG // LSRA must not change number of blocks and blockEpoch that it initializes at start. unsigned blockEpoch; #endif // DEBUG - void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet); - void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode); - BasicBlock* getNextCandidateFromWorkList(); // Indicates whether the allocation pass has been completed. bool allocationPassComplete; From 4a9e0ffdc09231c0f6ce90e3ca6bac734280038e Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 17 Sep 2024 12:52:27 -0400 Subject: [PATCH 3/7] Add unvisited blocks manually --- src/coreclr/jit/lsra.cpp | 77 +++++++++++++++++++++++++---------- src/coreclr/jit/lsra.h | 16 ++++---- src/coreclr/jit/lsraarm64.cpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 4 +- src/coreclr/jit/lsraxarch.cpp | 4 +- 5 files changed, 68 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index cc9ac931af601..5734ae270da91 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -913,10 +913,10 @@ LinearScan::LinearScan(Compiler* theCompiler) // Note that we don't initialize the bbVisitedSet until we do the first traversal // This is so that any blocks that are added during the first traversal // are accounted for (and we don't have BasicBlockEpoch issues). - blockSequencingDone = false; - blockSequence = nullptr; - curBBSeqNum = 0; - bbSeqCount = 0; + blockSequencingDone = false; + dfsTraversal = nullptr; + curBBSeqNum = 0; + bbSeqCount = 0; // Information about each block, including predecessor blocks used for variable locations at block entry. blockInfo = nullptr; @@ -935,7 +935,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // None // // Notes: -// On return, the blockSequence array contains the blocks, in the order in which they +// On return, the dfsTraversal array contains the blocks, in reverse order in which they // will be allocated. // This method clears the bbVisitedSet on LinearScan, and when it returns the set // contains all the bbNums for the block. @@ -952,13 +952,13 @@ void LinearScan::setBlockSequence() // Initialize the "visited" blocks set. bbVisitedSet = BlockSetOps::MakeEmpty(compiler); - assert(blockSequence == nullptr && bbSeqCount == 0); + assert((dfsTraversal == nullptr) && (bbSeqCount == 0)); FlowGraphDfsTree* const dfsTree = compiler->fgComputeDfs(); - blockSequence = new (compiler, CMK_LSRA) BasicBlock*[dfsTree->GetPostOrderCount()]; - bbNumMaxBeforeResolution = compiler->fgBBNumMax; - blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; + dfsTraversal = dfsTree->GetPostOrder(); + bbNumMaxBeforeResolution = compiler->fgBBNumMax; + blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; + hasCriticalEdges = false; - hasCriticalEdges = false; // We use a bbNum of 0 for entry RefPositions. // The other information in blockInfo[0] will never be used. blockInfo[0].weight = BB_UNITY_WEIGHT; @@ -969,14 +969,9 @@ void LinearScan::setBlockSequence() } #endif // TRACK_LSRA_STATS - JITDUMP("Start LSRA Block Sequence: \n"); - for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) - { - BasicBlock* const block = dfsTree->GetPostOrder(i - 1); + auto visitBlock = [=](BasicBlock* block) { JITDUMP("Current block: " FMT_BB "\n", block->bbNum); - blockSequence[bbSeqCount] = block; markBlockVisited(block); - bbSeqCount++; // Initialize the blockInfo. // predBBNum will be set later. @@ -1057,14 +1052,48 @@ void LinearScan::setBlockSequence() break; } } + }; + + JITDUMP("Start LSRA Block Sequence: \n"); + for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) + { + visitBlock(dfsTraversal[i - 1]); + } + + // If the DFS didn't visit any blocks, add them to the beginning of dfsTraversal so we visit them last in RPO + assert(compiler->fgBBcount >= dfsTree->GetPostOrderCount()); + const unsigned numUnvisitedBlocks = compiler->fgBBcount - dfsTree->GetPostOrderCount(); + + if (numUnvisitedBlocks != 0) + { + // Make space at the beginning of dfsTraversal + // (we allocate an array of size compiler->fgBBcount for the DFS, so this is safe) + memcpy(dfsTraversal + numUnvisitedBlocks, dfsTraversal, (sizeof(BasicBlock*) * dfsTree->GetPostOrderCount())); + + // Unvisited throw blocks are more likely to be at the back of the list, so iterate backwards + unsigned i = 0; + for (BasicBlock* block = compiler->fgLastBB; i < numUnvisitedBlocks; block = block->Prev()) + { + assert(block != nullptr); + if (!isBlockVisited(block)) + { + assert(!dfsTree->Contains(block)); + visitBlock(block); + dfsTraversal[i++] = block; + } + } + + assert(i == numUnvisitedBlocks); } + + bbSeqCount = compiler->fgBBcount; blockSequencingDone = true; #ifdef DEBUG // Make sure that we've visited all the blocks. for (BasicBlock* const block : compiler->Blocks()) { - assert(isBlockVisited(block) || (block->bbPreds == nullptr)); + assert(isBlockVisited(block)); } JITDUMP("Final LSRA Block Sequence:\n"); @@ -1115,10 +1144,10 @@ BasicBlock* LinearScan::startBlockSequence() clearVisitedBlocks(); } - BasicBlock* curBB = compiler->fgFirstBB; curBBSeqNum = 0; + BasicBlock* curBB = getBlockFromSequence(curBBSeqNum); curBBNum = curBB->bbNum; - assert(blockSequence[0] == compiler->fgFirstBB); + assert(curBB == compiler->fgFirstBB); markBlockVisited(curBB); return curBB; } @@ -1162,15 +1191,21 @@ BasicBlock* LinearScan::moveToNextBlock() // BasicBlock* LinearScan::getNextBlock() { - assert(blockSequencingDone); unsigned int nextBBSeqNum = curBBSeqNum + 1; if (nextBBSeqNum < bbSeqCount) { - return blockSequence[nextBBSeqNum]; + return getBlockFromSequence(nextBBSeqNum); } return nullptr; } +BasicBlock* LinearScan::getBlockFromSequence(unsigned index) +{ + assert(blockSequencingDone); + assert(index < bbSeqCount); + return dfsTraversal[bbSeqCount - index - 1]; +} + //------------------------------------------------------------------------ // doLinearScan: The main method for register allocation. // diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index e1cc3868e3b7a..14d7297178504 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -635,13 +635,11 @@ class LinearScan : public LinearScanInterface } // Initialize the block traversal for LSRA. - // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array, - // which determines the order in which blocks will be allocated (currently called during Lowering). + // This resets the bbVisitedSet, and on the first invocation sets the dfsTraversal array. BasicBlock* startBlockSequence(); // Move to the next block in sequence, updating the current block information. BasicBlock* moveToNextBlock(); - // Get the next block to be scheduled without changing the current block, - // but updating the blockSequence during the first iteration if it is not fully computed. + // Get the next block to be scheduled without changing the current block. BasicBlock* getNextBlock(); // This is called during code generation to update the location of variables @@ -1656,11 +1654,11 @@ class LinearScan : public LinearScanInterface BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated)); - // The order in which the blocks will be allocated. - // This is any array of BasicBlock*, in the order in which they should be traversed. - BasicBlock** blockSequence; - void setBlockSequence(); - bool blockSequencingDone; + // We will allocate blocks in reverse post-order by iterating dfsTraversal backwards + BasicBlock** dfsTraversal; + BasicBlock* getBlockFromSequence(unsigned index); + void setBlockSequence(); + bool blockSequencingDone; #ifdef DEBUG // LSRA must not change number of blocks and blockEpoch that it initializes at start. unsigned blockEpoch; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 79bf3c16778df..e11ed865e4a02 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1955,7 +1955,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou LIR::Use use; GenTree* user = nullptr; - if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(embOp2Node, &use)) + if (LIR::AsRange(getBlockFromSequence(curBBSeqNum)).TryGetUse(embOp2Node, &use)) { user = use.User(); } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3303cbe46c115..e9ea40291835d 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1128,7 +1128,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo // to be done before making this change. // Also note that we avoid setting callee-save preferences for floating point. This may need // revisiting, and note that it doesn't currently apply to SIMD types, only float or double. - // if (!blockSequence[curBBSeqNum]->isRunRarely()) + // if (!getBlockFromSequence(curBBSeqNum)->isRunRarely()) if (enregisterLocalVars) { VarSetOps::Iter iter(compiler, currentLiveVars); @@ -2853,7 +2853,7 @@ void LinearScan::buildIntervals() // Make sure we don't have any blocks that were not visited for (BasicBlock* const block : compiler->Blocks()) { - assert(isBlockVisited(block) || (block->bbPreds == nullptr)); + assert(isBlockVisited(block)); } if (VERBOSE) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 7f50c8be9b507..fed0274b1e1de 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2392,7 +2392,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou LIR::Use use; GenTree* user = nullptr; - if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) + if (LIR::AsRange(getBlockFromSequence(curBBSeqNum)).TryGetUse(intrinsicTree, &use)) { user = use.User(); } @@ -2578,7 +2578,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou LIR::Use use; GenTree* user = nullptr; - if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) + if (LIR::AsRange(getBlockFromSequence(curBBSeqNum)).TryGetUse(intrinsicTree, &use)) { user = use.User(); } From c1c7b0b01321a4ba6f0ee515ef90da7f416a9338 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 17 Sep 2024 13:00:59 -0400 Subject: [PATCH 4/7] Remove LSRA traversal order config --- src/coreclr/jit/lsra.h | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 14d7297178504..ec9aa52479fb4 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -814,32 +814,6 @@ class LinearScan : public LinearScanInterface return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0); } - // This controls the order in which basic blocks are visited during allocation - enum LsraTraversalOrder - { - LSRA_TRAVERSE_LAYOUT = 0x20, - LSRA_TRAVERSE_PRED_FIRST = 0x40, - LSRA_TRAVERSE_RANDOM = 0x60, // NYI - LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, - LSRA_TRAVERSE_MASK = 0x60 - }; - LsraTraversalOrder getLsraTraversalOrder() - { - if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0) - { - return LSRA_TRAVERSE_DEFAULT; - } - return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK); - } - bool isTraversalLayoutOrder() - { - return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT; - } - bool isTraversalPredFirstOrder() - { - return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST; - } - // This controls whether lifetimes should be extended to the entire method. // Note that this has no effect under MinOpts enum LsraExtendLifetimes From 831ee14f06824d48134ec9cf9b3f0155055fb676 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 17 Sep 2024 15:53:01 -0400 Subject: [PATCH 5/7] Revert "Add unvisited blocks manually" This reverts commit 4a9e0ffdc09231c0f6ce90e3ca6bac734280038e. --- src/coreclr/jit/lsra.cpp | 77 ++++++++++------------------------- src/coreclr/jit/lsra.h | 16 ++++---- src/coreclr/jit/lsraarm64.cpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 4 +- src/coreclr/jit/lsraxarch.cpp | 4 +- 5 files changed, 35 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5734ae270da91..cc9ac931af601 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -913,10 +913,10 @@ LinearScan::LinearScan(Compiler* theCompiler) // Note that we don't initialize the bbVisitedSet until we do the first traversal // This is so that any blocks that are added during the first traversal // are accounted for (and we don't have BasicBlockEpoch issues). - blockSequencingDone = false; - dfsTraversal = nullptr; - curBBSeqNum = 0; - bbSeqCount = 0; + blockSequencingDone = false; + blockSequence = nullptr; + curBBSeqNum = 0; + bbSeqCount = 0; // Information about each block, including predecessor blocks used for variable locations at block entry. blockInfo = nullptr; @@ -935,7 +935,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // None // // Notes: -// On return, the dfsTraversal array contains the blocks, in reverse order in which they +// On return, the blockSequence array contains the blocks, in the order in which they // will be allocated. // This method clears the bbVisitedSet on LinearScan, and when it returns the set // contains all the bbNums for the block. @@ -952,13 +952,13 @@ void LinearScan::setBlockSequence() // Initialize the "visited" blocks set. bbVisitedSet = BlockSetOps::MakeEmpty(compiler); - assert((dfsTraversal == nullptr) && (bbSeqCount == 0)); + assert(blockSequence == nullptr && bbSeqCount == 0); FlowGraphDfsTree* const dfsTree = compiler->fgComputeDfs(); - dfsTraversal = dfsTree->GetPostOrder(); - bbNumMaxBeforeResolution = compiler->fgBBNumMax; - blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; - hasCriticalEdges = false; + blockSequence = new (compiler, CMK_LSRA) BasicBlock*[dfsTree->GetPostOrderCount()]; + bbNumMaxBeforeResolution = compiler->fgBBNumMax; + blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; + hasCriticalEdges = false; // We use a bbNum of 0 for entry RefPositions. // The other information in blockInfo[0] will never be used. blockInfo[0].weight = BB_UNITY_WEIGHT; @@ -969,9 +969,14 @@ void LinearScan::setBlockSequence() } #endif // TRACK_LSRA_STATS - auto visitBlock = [=](BasicBlock* block) { + JITDUMP("Start LSRA Block Sequence: \n"); + for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) + { + BasicBlock* const block = dfsTree->GetPostOrder(i - 1); JITDUMP("Current block: " FMT_BB "\n", block->bbNum); + blockSequence[bbSeqCount] = block; markBlockVisited(block); + bbSeqCount++; // Initialize the blockInfo. // predBBNum will be set later. @@ -1052,48 +1057,14 @@ void LinearScan::setBlockSequence() break; } } - }; - - JITDUMP("Start LSRA Block Sequence: \n"); - for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) - { - visitBlock(dfsTraversal[i - 1]); - } - - // If the DFS didn't visit any blocks, add them to the beginning of dfsTraversal so we visit them last in RPO - assert(compiler->fgBBcount >= dfsTree->GetPostOrderCount()); - const unsigned numUnvisitedBlocks = compiler->fgBBcount - dfsTree->GetPostOrderCount(); - - if (numUnvisitedBlocks != 0) - { - // Make space at the beginning of dfsTraversal - // (we allocate an array of size compiler->fgBBcount for the DFS, so this is safe) - memcpy(dfsTraversal + numUnvisitedBlocks, dfsTraversal, (sizeof(BasicBlock*) * dfsTree->GetPostOrderCount())); - - // Unvisited throw blocks are more likely to be at the back of the list, so iterate backwards - unsigned i = 0; - for (BasicBlock* block = compiler->fgLastBB; i < numUnvisitedBlocks; block = block->Prev()) - { - assert(block != nullptr); - if (!isBlockVisited(block)) - { - assert(!dfsTree->Contains(block)); - visitBlock(block); - dfsTraversal[i++] = block; - } - } - - assert(i == numUnvisitedBlocks); } - - bbSeqCount = compiler->fgBBcount; blockSequencingDone = true; #ifdef DEBUG // Make sure that we've visited all the blocks. for (BasicBlock* const block : compiler->Blocks()) { - assert(isBlockVisited(block)); + assert(isBlockVisited(block) || (block->bbPreds == nullptr)); } JITDUMP("Final LSRA Block Sequence:\n"); @@ -1144,10 +1115,10 @@ BasicBlock* LinearScan::startBlockSequence() clearVisitedBlocks(); } + BasicBlock* curBB = compiler->fgFirstBB; curBBSeqNum = 0; - BasicBlock* curBB = getBlockFromSequence(curBBSeqNum); curBBNum = curBB->bbNum; - assert(curBB == compiler->fgFirstBB); + assert(blockSequence[0] == compiler->fgFirstBB); markBlockVisited(curBB); return curBB; } @@ -1191,21 +1162,15 @@ BasicBlock* LinearScan::moveToNextBlock() // BasicBlock* LinearScan::getNextBlock() { + assert(blockSequencingDone); unsigned int nextBBSeqNum = curBBSeqNum + 1; if (nextBBSeqNum < bbSeqCount) { - return getBlockFromSequence(nextBBSeqNum); + return blockSequence[nextBBSeqNum]; } return nullptr; } -BasicBlock* LinearScan::getBlockFromSequence(unsigned index) -{ - assert(blockSequencingDone); - assert(index < bbSeqCount); - return dfsTraversal[bbSeqCount - index - 1]; -} - //------------------------------------------------------------------------ // doLinearScan: The main method for register allocation. // diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ec9aa52479fb4..2cfdc3328f601 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -635,11 +635,13 @@ class LinearScan : public LinearScanInterface } // Initialize the block traversal for LSRA. - // This resets the bbVisitedSet, and on the first invocation sets the dfsTraversal array. + // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array, + // which determines the order in which blocks will be allocated (currently called during Lowering). BasicBlock* startBlockSequence(); // Move to the next block in sequence, updating the current block information. BasicBlock* moveToNextBlock(); - // Get the next block to be scheduled without changing the current block. + // Get the next block to be scheduled without changing the current block, + // but updating the blockSequence during the first iteration if it is not fully computed. BasicBlock* getNextBlock(); // This is called during code generation to update the location of variables @@ -1628,11 +1630,11 @@ class LinearScan : public LinearScanInterface BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated)); - // We will allocate blocks in reverse post-order by iterating dfsTraversal backwards - BasicBlock** dfsTraversal; - BasicBlock* getBlockFromSequence(unsigned index); - void setBlockSequence(); - bool blockSequencingDone; + // The order in which the blocks will be allocated. + // This is any array of BasicBlock*, in the order in which they should be traversed. + BasicBlock** blockSequence; + void setBlockSequence(); + bool blockSequencingDone; #ifdef DEBUG // LSRA must not change number of blocks and blockEpoch that it initializes at start. unsigned blockEpoch; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index e11ed865e4a02..79bf3c16778df 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1955,7 +1955,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou LIR::Use use; GenTree* user = nullptr; - if (LIR::AsRange(getBlockFromSequence(curBBSeqNum)).TryGetUse(embOp2Node, &use)) + if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(embOp2Node, &use)) { user = use.User(); } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e9ea40291835d..3303cbe46c115 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1128,7 +1128,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo // to be done before making this change. // Also note that we avoid setting callee-save preferences for floating point. This may need // revisiting, and note that it doesn't currently apply to SIMD types, only float or double. - // if (!getBlockFromSequence(curBBSeqNum)->isRunRarely()) + // if (!blockSequence[curBBSeqNum]->isRunRarely()) if (enregisterLocalVars) { VarSetOps::Iter iter(compiler, currentLiveVars); @@ -2853,7 +2853,7 @@ void LinearScan::buildIntervals() // Make sure we don't have any blocks that were not visited for (BasicBlock* const block : compiler->Blocks()) { - assert(isBlockVisited(block)); + assert(isBlockVisited(block) || (block->bbPreds == nullptr)); } if (VERBOSE) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index fed0274b1e1de..7f50c8be9b507 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2392,7 +2392,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou LIR::Use use; GenTree* user = nullptr; - if (LIR::AsRange(getBlockFromSequence(curBBSeqNum)).TryGetUse(intrinsicTree, &use)) + if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) { user = use.User(); } @@ -2578,7 +2578,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou LIR::Use use; GenTree* user = nullptr; - if (LIR::AsRange(getBlockFromSequence(curBBSeqNum)).TryGetUse(intrinsicTree, &use)) + if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) { user = use.User(); } From 288bc37ca268c850b11b109de1f1bbb71f5ff7ee Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 17 Sep 2024 16:31:19 -0400 Subject: [PATCH 6/7] Simplify blockSequence building --- src/coreclr/jit/lsra.cpp | 57 +++++++++++++++++++++++++++++----------- src/coreclr/jit/lsra.h | 4 +-- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index cc9ac931af601..2462b667fca77 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -913,10 +913,10 @@ LinearScan::LinearScan(Compiler* theCompiler) // Note that we don't initialize the bbVisitedSet until we do the first traversal // This is so that any blocks that are added during the first traversal // are accounted for (and we don't have BasicBlockEpoch issues). - blockSequencingDone = false; - blockSequence = nullptr; - curBBSeqNum = 0; - bbSeqCount = 0; + blockSequencingDone = false; + blockSequence = nullptr; + curBBSeqNum = 0; + bbSeqCount = 0; // Information about each block, including predecessor blocks used for variable locations at block entry. blockInfo = nullptr; @@ -935,8 +935,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // None // // Notes: -// On return, the blockSequence array contains the blocks, in the order in which they -// will be allocated. +// On return, the blockSequence array contains the blocks in reverse post-order. // This method clears the bbVisitedSet on LinearScan, and when it returns the set // contains all the bbNums for the block. // @@ -952,11 +951,18 @@ void LinearScan::setBlockSequence() // Initialize the "visited" blocks set. bbVisitedSet = BlockSetOps::MakeEmpty(compiler); - assert(blockSequence == nullptr && bbSeqCount == 0); + assert((blockSequence == nullptr) && (bbSeqCount == 0)); FlowGraphDfsTree* const dfsTree = compiler->fgComputeDfs(); - blockSequence = new (compiler, CMK_LSRA) BasicBlock*[dfsTree->GetPostOrderCount()]; - bbNumMaxBeforeResolution = compiler->fgBBNumMax; - blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; + blockSequence = dfsTree->GetPostOrder(); + bbNumMaxBeforeResolution = compiler->fgBBNumMax; + blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1]; + + // Flip the DFS traversal to get the reverse post-order traversal + // (this is the order in which blocks will be allocated) + for (unsigned left = 0, right = dfsTree->GetPostOrderCount() - 1; left < right; left++, right--) + { + std::swap(blockSequence[left], blockSequence[right]); + } hasCriticalEdges = false; // We use a bbNum of 0 for entry RefPositions. @@ -969,14 +975,9 @@ void LinearScan::setBlockSequence() } #endif // TRACK_LSRA_STATS - JITDUMP("Start LSRA Block Sequence: \n"); - for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--) - { - BasicBlock* const block = dfsTree->GetPostOrder(i - 1); + auto visitBlock = [this](BasicBlock* block) { JITDUMP("Current block: " FMT_BB "\n", block->bbNum); - blockSequence[bbSeqCount] = block; markBlockVisited(block); - bbSeqCount++; // Initialize the blockInfo. // predBBNum will be set later. @@ -1057,7 +1058,31 @@ void LinearScan::setBlockSequence() break; } } + }; + + JITDUMP("Start LSRA Block Sequence: \n"); + for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++) + { + visitBlock(blockSequence[i]); } + + // If the DFS didn't visit any blocks, add them to the end of blockSequence + if (dfsTree->GetPostOrderCount() < compiler->fgBBcount) + { + // Unvisited blocks are more likely to be at the back of the list, so iterate backwards + unsigned i = dfsTree->GetPostOrderCount(); + for (BasicBlock* block = compiler->fgLastBB; i < compiler->fgBBcount; block = block->Prev()) + { + assert(block != nullptr); + if (!isBlockVisited(block)) + { + visitBlock(block); + blockSequence[i++] = block; + } + } + } + + bbSeqCount = compiler->fgBBcount; blockSequencingDone = true; #ifdef DEBUG diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 2cfdc3328f601..2dc111b2065ad 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1633,8 +1633,8 @@ class LinearScan : public LinearScanInterface // The order in which the blocks will be allocated. // This is any array of BasicBlock*, in the order in which they should be traversed. BasicBlock** blockSequence; - void setBlockSequence(); - bool blockSequencingDone; + void setBlockSequence(); + bool blockSequencingDone; #ifdef DEBUG // LSRA must not change number of blocks and blockEpoch that it initializes at start. unsigned blockEpoch; From e18374d7fe29d972237614f6ebb73bc7ce8cfbf6 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 17 Sep 2024 16:35:05 -0400 Subject: [PATCH 7/7] Add back assert --- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 2462b667fca77..c57ae36038e8d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1089,7 +1089,7 @@ void LinearScan::setBlockSequence() // Make sure that we've visited all the blocks. for (BasicBlock* const block : compiler->Blocks()) { - assert(isBlockVisited(block) || (block->bbPreds == nullptr)); + assert(isBlockVisited(block)); } JITDUMP("Final LSRA Block Sequence:\n"); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3303cbe46c115..3c390ee213941 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2853,7 +2853,7 @@ void LinearScan::buildIntervals() // Make sure we don't have any blocks that were not visited for (BasicBlock* const block : compiler->Blocks()) { - assert(isBlockVisited(block) || (block->bbPreds == nullptr)); + assert(isBlockVisited(block)); } if (VERBOSE)