diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 00dfb10b8072b..5702221c02294 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4796,9 +4796,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops); - // Optimize block order + // Run some flow graph optimizations (but don't reorder) // - DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout); + DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow); // Compute reachability sets and dominators. // @@ -4988,6 +4988,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); + // Optimize block order + // + if (opts.OptimizationEnabled()) + { + DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout); + } + // Determine start of cold region if we are hot/cold splitting // DoPhase(this, PHASE_DETERMINE_FIRST_COLD_BLOCK, &Compiler::fgDetermineFirstColdBlock); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 99b5906a58c72..34bb62a0daafe 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5223,7 +5223,7 @@ class Compiler void fgComputeCalledCount(weight_t returnWeight); void fgComputeEdgeWeights(); - bool fgReorderBlocks(); + bool fgReorderBlocks(bool useProfile); PhaseStatus fgDetermineFirstColdBlock(); @@ -5998,6 +5998,7 @@ class Compiler public: PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom. + PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication PhaseStatus optOptimizeLayout(); // Optimize the BasicBlock layout of the method PhaseStatus optSetBlockWeights(); PhaseStatus optFindLoopsPhase(); // Finds loops and records them in the loop table diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 3b1907e86fde0..cb0d284ff56ad 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -55,6 +55,7 @@ CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", #endif // FEATURE_EH_FUNCLETS CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", "MRGTHROW", false, -1, false) CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", "LOOP-INV", false, -1, false) +CompPhaseNameMacro(PHASE_OPTIMIZE_FLOW, "Optimize control flow", "OPT-FLOW", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1, false) CompPhaseNameMacro(PHASE_COMPUTE_REACHABILITY, "Compute blocks reachability", "BL_REACH", false, -1, false) CompPhaseNameMacro(PHASE_SET_BLOCK_WEIGHTS, "Set block weights", "BL-WEIGHTS", false, -1, false) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 6caaf338ffea9..228e01cbf5ad4 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4639,14 +4639,22 @@ bool Compiler::fgExpandRarelyRunBlocks() //----------------------------------------------------------------------------- // fgReorderBlocks: reorder blocks to favor frequent fall through paths, -// move rare blocks to the end of the method/eh region, and move -// funclets to the ends of methods. +// move rare blocks to the end of the method/eh region, and move +// funclets to the ends of methods. +// +// Arguments: +// useProfile - if true, use profile data (if available) to more aggressively +// reorder the blocks. // // Returns: -// True if anything got reordered. Reordering blocks may require changing -// IR to reverse branch conditions. +// True if anything got reordered. Reordering blocks may require changing +// IR to reverse branch conditions. // -bool Compiler::fgReorderBlocks() +// Notes: +// We currently allow profile-driven switch opts even when useProfile is false, +// as they are unlikely to lead to reordering.. +// +bool Compiler::fgReorderBlocks(bool useProfile) { noway_assert(opts.compDbgCode == false); @@ -4726,7 +4734,7 @@ bool Compiler::fgReorderBlocks() continue; } - bool reorderBlock = true; // This is set to false if we decide not to reorder 'block' + bool reorderBlock = useProfile; bool isRare = block->isRunRarely(); BasicBlock* bDest = nullptr; bool forwardBranch = false; @@ -4752,7 +4760,8 @@ bool Compiler::fgReorderBlocks() weight_t profHotWeight = -1; - if (bPrev->hasProfileWeight() && block->hasProfileWeight() && ((bDest == nullptr) || bDest->hasProfileWeight())) + if (useProfile && bPrev->hasProfileWeight() && block->hasProfileWeight() && + ((bDest == nullptr) || bDest->hasProfileWeight())) { // // All blocks have profile information @@ -5682,13 +5691,21 @@ bool Compiler::fgReorderBlocks() if (bPrev->bbJumpKind == BBJ_COND) { /* Reverse the bPrev jump condition */ - Statement* condTestStmt = bPrev->lastStmt(); + Statement* const condTestStmt = bPrev->lastStmt(); + GenTree* const condTest = condTestStmt->GetRootNode(); - GenTree* condTest = condTestStmt->GetRootNode(); noway_assert(condTest->gtOper == GT_JTRUE); - condTest->AsOp()->gtOp1 = gtReverseCond(condTest->AsOp()->gtOp1); + // may need to rethread + // + if (fgStmtListThreaded) + { + JITDUMP("Rethreading " FMT_STMT "\n", condTestStmt->GetID()); + gtSetStmtInfo(condTestStmt); + fgSetStmtSeq(condTestStmt); + } + if (bStart2 == nullptr) { /* Set the new jump dest for bPrev to the rarely run or uncommon block(s) */ diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index a512f918efb74..0f70f0d619df5 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -175,7 +175,7 @@ PhaseStatus Compiler::fgInsertGCPolls() if (createdPollBlocks) { noway_assert(opts.OptimizationEnabled()); - fgReorderBlocks(); + fgReorderBlocks(/* useProfileData */ false); constexpr bool computePreds = true; constexpr bool computeDoms = false; fgUpdateChangedFlowGraph(computePreds, computeDoms); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 6006907e6933e..e58f1c908bcd9 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2330,6 +2330,51 @@ class LoopSearch // prevent assertions from complaining about it. block->bbFlags |= BBF_KEEP_BBJ_ALWAYS; } + + // If block is newNext's only predcessor, move the IR from block to newNext, + // but keep the now-empty block around. + // + // We move the IR because loop recognition has a very limited search capability and + // won't walk from one block's statements to another, even if the blocks form + // a linear chain. So this IR move enhances counted loop recognition. + // + // The logic here is essentially echoing fgCompactBlocks... but we don't call + // that here because we don't want to delete block and do the necessary updates + // to all the other data in flight, and we'd also prefer that newNext be the + // survivor, not block. + // + if ((newNext->bbRefs == 1) && comp->fgCanCompactBlocks(block, newNext)) + { + JITDUMP("Moving stmts from " FMT_BB " to " FMT_BB "\n", block->bbNum, newNext->bbNum); + Statement* stmtList1 = block->firstStmt(); + Statement* stmtList2 = newNext->firstStmt(); + + // Is there anything to move? + // + if (stmtList1 != nullptr) + { + // Append newNext stmts to block's stmts. + // + if (stmtList2 != nullptr) + { + Statement* stmtLast1 = block->lastStmt(); + Statement* stmtLast2 = newNext->lastStmt(); + + stmtLast1->SetNextStmt(stmtList2); + stmtList2->SetPrevStmt(stmtLast1); + stmtList1->SetPrevStmt(stmtLast2); + } + + // Move block's stmts to newNext + // + newNext->bbStmtList = stmtList1; + block->bbStmtList = nullptr; + + // Update newNext's block flags + // + newNext->bbFlags |= (block->bbFlags & BBF_COMPACT_UPD); + } + } } // Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS. @@ -4797,22 +4842,56 @@ PhaseStatus Compiler::optInvertLoops() return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } +//----------------------------------------------------------------------------- +// optOptimizeFlow: simplify flow graph +// +// Returns: +// suitable phase status +// +// Notes: +// Does not do profile-based reordering to try and ensure that +// that we recognize and represent as many loops as possible. +// +PhaseStatus Compiler::optOptimizeFlow() +{ + noway_assert(opts.OptimizationEnabled()); + noway_assert(fgModified == false); + + bool madeChanges = false; + + madeChanges |= fgUpdateFlowGraph(/* allowTailDuplication */ true); + madeChanges |= fgReorderBlocks(/* useProfileData */ false); + madeChanges |= fgUpdateFlowGraph(); + + // fgReorderBlocks can cause IR changes even if it does not modify + // the flow graph. It calls gtPrepareCost which can cause operand swapping. + // Work around this for now. + // + // Note phase status only impacts dumping and checking done post-phase, + // it has no impact on a release build. + // + madeChanges = true; + + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; +} + //----------------------------------------------------------------------------- // optOptimizeLayout: reorder blocks to reduce cost of control flow // // Returns: // suitable phase status // +// Notes: +// Reorders using profile data, if available. +// PhaseStatus Compiler::optOptimizeLayout() { noway_assert(opts.OptimizationEnabled()); - noway_assert(fgModified == false); - bool madeChanges = false; - const bool allowTailDuplication = true; + bool madeChanges = false; - madeChanges |= fgUpdateFlowGraph(allowTailDuplication); - madeChanges |= fgReorderBlocks(); + madeChanges |= fgUpdateFlowGraph(/* allowTailDuplication */ false); + madeChanges |= fgReorderBlocks(/* useProfile */ true); madeChanges |= fgUpdateFlowGraph(); // fgReorderBlocks can cause IR changes even if it does not modify