dotnet · EgorBo · Sep 19, 2024 · Sep 14, 2024 · Sep 14, 2024 · Sep 15, 2024
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
@@ -5170,14 +5170,14 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
         //
         DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion);
 
-        // Optimize block order
-        //
-        DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);
-
         // Conditional to Switch conversion
         //
         DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optSwitchRecognition);
 
+        // Optimize block order
+        //
+        DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);
+
         // Determine start of cold region if we are hot/cold splitting
         //
         DoPhase(this, PHASE_DETERMINE_FIRST_COLD_BLOCK, &Compiler::fgDetermineFirstColdBlock);

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
@@ -7053,6 +7053,7 @@ class Compiler
     PhaseStatus optSwitchRecognition();
     bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
     bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
+    bool optExtendSwitch(BasicBlock* block);
 
     PhaseStatus optInvertLoops();    // Invert loops so they're entered at top and tested at bottom.
     PhaseStatus optOptimizeFlow();   // Simplify flow graph and do tail duplication
@@ -10509,6 +10510,7 @@ class Compiler
         STRESS_MODE(IF_CONVERSION_INNER_LOOPS)                                                  \
         STRESS_MODE(POISON_IMPLICIT_BYREFS)                                                     \
         STRESS_MODE(STORE_BLOCK_UNROLLING)                                                      \
+        STRESS_MODE(DONT_LIMIT_JUMP_TABLE)                                                      \
         STRESS_MODE(COUNT)
 
     enum                compStressArea

diff --git a/src/coreclr/jit/switchrecognition.cpp b/src/coreclr/jit/switchrecognition.cpp
@@ -22,26 +22,33 @@
 //
 PhaseStatus Compiler::optSwitchRecognition()
 {
-// Limit to XARCH, ARM is already doing a great job with such comparisons using
-// a series of ccmp instruction (see ifConvert phase).
-#ifdef TARGET_XARCH
     bool modified = false;
     for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
     {
-        // block->KindIs(BBJ_COND) check is for better throughput.
+#ifdef TARGET_XARCH
+        // Limit to XARCH, ARM is already doing a great job with such comparisons using
+        // a series of ccmp instruction (see ifConvert phase).
         if (block->KindIs(BBJ_COND) && !block->isRunRarely() && optSwitchDetectAndConvert(block))
         {
             JITDUMP("Converted block " FMT_BB " to switch\n", block->bbNum)
             modified = true;
         }
+#endif
+
+        // See if we can merge BBJ_COND into an existing switch block
+        while (block->KindIs(BBJ_SWITCH) && optExtendSwitch(block))
+        {
+            modified = true;
+        }
     }
 
     if (modified)
     {
+        // Remove unreachable blocks
+        fgUpdateFlowGraph();
         fgRenumberBlocks();
         return PhaseStatus::MODIFIED_EVERYTHING;
     }
-#endif
     return PhaseStatus::MODIFIED_NOTHING;
 }
 
@@ -63,8 +70,8 @@ PhaseStatus Compiler::optSwitchRecognition()
 //
 bool IsConstantTestCondBlock(const BasicBlock* block,
                              bool              allowSideEffects,
-                             BasicBlock**      trueTarget,
-                             BasicBlock**      falseTarget,
+                             FlowEdge**        trueEdge,
+                             FlowEdge**        falseEdge,
                              bool*             isReversed,
                              GenTree**         variableNode = nullptr,
                              ssize_t*          cns          = nullptr)
@@ -102,9 +109,9 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
                     return false;
                 }
 
-                *isReversed  = rootNode->gtGetOp1()->OperIs(GT_NE);
-                *trueTarget  = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget();
-                *falseTarget = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget();
+                *isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
+                *trueEdge   = *isReversed ? block->GetFalseEdge() : block->GetTrueEdge();
+                *falseEdge  = *isReversed ? block->GetTrueEdge() : block->GetFalseEdge();
 
                 if (block->FalseTargetIs(block) || block->TrueTargetIs(block))
                 {
@@ -132,6 +139,215 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
     return false;
 }
 
+//------------------------------------------------------------------------------
+// GetSwitchValueOp: Try to extract BBJ_SWITCH's variable (and the offset)
+//
+// Arguments:
+//    switchBlock  - BBJ_SWITCH block to get the switch value from
+//    variableNode - [out] Switch value node
+//    offset       - [out] Offset of the switch value
+//
+// Return Value:
+//    True if switch value was successfully extracted, false otherwise
+//
+static bool GetSwitchValueOp(const BasicBlock* switchBlock, GenTree** variableNode, ssize_t* offset)
+{
+    assert(switchBlock->KindIs(BBJ_SWITCH));
+
+    GenTree* switchNode = switchBlock->lastStmt()->GetRootNode()->gtEffectiveVal();
+    assert(switchNode->OperIs(GT_SWITCH));
+
+    GenTree* switchOp = switchNode->gtGetOp1();
+
+    // In most cases the switch value is either ADD(LCL_VAR, CNS) or LCL_VAR
+    if (switchOp->OperIs(GT_ADD))
+    {
+        // We use gtEffectiveVal() to skip COMMA nodes, those will be evaluated before
+        // any other case node in the switch block anyway.
+        GenTree* op1 = switchOp->gtGetOp1()->gtEffectiveVal();
+        GenTree* op2 = switchOp->gtGetOp2();
+
+        // TODO-JumpTable: We should be able to accept any side-effect-free op1 here
+        if (op1->OperIs(GT_LCL_VAR) && op2->IsCnsIntOrI())
+        {
+            *variableNode = op1;
+            *offset       = op2->AsIntCon()->IconValue();
+            return true;
+        }
+    }
+    else
+    {
+        // We use gtEffectiveVal() to skip COMMA nodes, those will be evaluated before
+        // any other case node in the switch block anyway.
+        GenTree* op1 = switchOp->gtEffectiveVal();
+        if (op1->OperIs(GT_LCL_VAR))
+        {
+            *variableNode = op1;
+            *offset       = 0;
+            return true;
+        }
+    }
+    return false;
+}
+
+//------------------------------------------------------------------------------
+// optExtendSwitch: If one of the switch's targets is a value test block,
+//    and the switch's variable is the same as the value test's variable - try to merge
+//    that test (BBJ_COND) into the switch block. It's useful to assist Roslyn, which
+//    uses fairly conservative heuristics to generate switch blocks and may give up on
+//    some tests that could be merged into the switch.
+//
+// Arguments:
+//    block - BBJ_SWITCH block to extend
+//
+// Return Value:
+//    True if switch was successfully extended, false otherwise
+//
+bool Compiler::optExtendSwitch(BasicBlock* block)
+{
+    assert(block->KindIs(BBJ_SWITCH));
+    JITDUMP("Considering expanding switch " FMT_BB "\n", block->bbNum);
+
+    if (opts.IsInstrumented())
+    {
+        JITDUMP("Instrumentation is enabled - bail out.\n");
+        return false;
+    }
+
+    assert(opts.OptimizationEnabled());
+
+    GenTree* switchOp           = nullptr;
+    ssize_t  switchTargetOffset = 0;
+    if (!GetSwitchValueOp(block, &switchOp, &switchTargetOffset))
+    {
+        JITDUMP("Couldn't extract the switch's variable - bail out.\n");
+        return false;
+    }
+
+    // TODO-JumpTable: Currently, we only expand the default target, but it should be possible
+    // to traverse all non-default edges as well (if they point to similar value tests).
+    // It shouldn't be too hard to implement (might be TP costly, though).
+
+    BBswtDesc* switchTargets = block->GetSwitchTargets();
+    if (!switchTargets->bbsHasDefault)
+    {
+        JITDUMP("Switch doesn't have a default target - bail out.\n");
+        return false;
+    }
+
+    // Now let's see if we can merge the value test in the edgeToExpand's destination block
+    // with the switch block.
+    FlowEdge*   edgeToExpand    = switchTargets->getDefault();
+    BasicBlock* edgeToExpandDst = edgeToExpand->getDestinationBlock();
+
+    // IsConstantTestCondBlock will tell us if the default target is a value test block (e.g. "x == cns")
+    // TODO-JumpTable: Add support for more complex tests like "x > 10 && x < 20"
+    // Also, we should be able to merge two BBJ_SWITCH on the same variable. Roslyn may create such shapes.
+    FlowEdge* testPasses;
+    FlowEdge* testFails;
+    bool      isReversed;
+    GenTree*  variableNode;
+    ssize_t   cns;
+    if (!IsConstantTestCondBlock(edgeToExpandDst, false, &testPasses, &testFails, &isReversed, &variableNode, &cns) ||
+        !edgeToExpandDst->hasSingleStmt())
+    {
+        JITDUMP("The default target is not performing a value test - bail out.\n");
+        return false;
+    }
+
+    BasicBlock* testPassesBb = testPasses->getDestinationBlock();
+    BasicBlock* testFailsBb  = testFails->getDestinationBlock();
+
+    if (!GenTree::Compare(variableNode, switchOp))
+    {
+        JITDUMP("The default target is performing a value test on a different variable - bail out.\n");
+        return false;
+    }
+
+    // We're less conservative than Roslyn, but we still have some limits (unless stress mode is enabled)
+
+    if ((static_cast<size_t>(cns + switchTargetOffset) > SWITCH_MAX_DISTANCE) &&
+        !compStressCompile(STRESS_DONT_LIMIT_JUMP_TABLE, 50))
+    {
+        JITDUMP("Switch value is out of range - bail out.\n");
+        return false;
+    }
+
+    // We need to normalize the test value to the switch's range
+    const ssize_t newTestValue = cns + switchTargetOffset;
+
+    unsigned   oldJumpCnt = switchTargets->bbsCount;
+    unsigned   newJumpCnt = max(oldJumpCnt, (unsigned)(newTestValue + 1 + (switchTargets->bbsHasDefault ? 1 : 0)));
+    FlowEdge** oldJumpTab = switchTargets->bbsDstTab;
+    FlowEdge** newJumpTab = new (this, CMK_FlowEdge) FlowEdge*[newJumpCnt];
+
+    //
+    // Update likelihoods
+    //
+    weight_t edgeToExpandLikelihood = edgeToExpand->getLikelihood();
+    weight_t newTestLikelihood      = edgeToExpandLikelihood * testPasses->getLikelihood();
+    weight_t newDefLikelihood       = edgeToExpandLikelihood * testFails->getLikelihood();
+
+    // However, it is tricky if testPasses or testFails are already presented in the jump table.
+    // Switch block currently doesn't support different likelihoods for edges pointing to the same block.
+    // So we need to adjust the existing likelihoods.
+    for (unsigned i = 0; i < oldJumpCnt; i++)
+    {
+        BasicBlock* edgeDstBb      = oldJumpTab[i]->getDestinationBlock();
+        weight_t    edgeLikelihood = oldJumpTab[i]->getLikelihood();
+
+        if (edgeDstBb == testPassesBb)
+        {
+            newTestLikelihood = edgeLikelihood + edgeToExpandLikelihood * testPasses->getLikelihood();
+        }
+        if (edgeDstBb == testFailsBb)
+        {
+            newDefLikelihood = edgeLikelihood + edgeToExpandLikelihood * testFails->getLikelihood();
+        }
+    }
+
+    //
+    // Create the new jump table
+    //
+    for (unsigned i = 0; i < newJumpCnt; i++)
+    {
+        if (i < oldJumpCnt)
+        {
+            // Not only the default case can point to edgeToExpandDst
+            if (oldJumpTab[i]->getDestinationBlock() == edgeToExpandDst)
+            {
+                fgRemoveRefPred(oldJumpTab[i]);
+            }
+            else
+            {
+                // Just copy the old edge
+                newJumpTab[i] = oldJumpTab[i];
+                continue;
+            }
+        }
+
+        if (i == (unsigned)newTestValue)
+        {
+            newJumpTab[i] = fgAddRefPred(testPassesBb, block);
+            newJumpTab[i]->setLikelihood(newTestLikelihood);
+        }
+        else
+        {
+            newJumpTab[i] = fgAddRefPred(testFailsBb, block);
+            newJumpTab[i]->setLikelihood(newDefLikelihood);
+        }
+    }
+
+    switchTargets->bbsCount  = newJumpCnt;
+    switchTargets->bbsDstTab = newJumpTab;
+
+    // m_switchDescMap cache is invalidated by this change, so it has to be recomputed
+    InvalidateUniqueSwitchSuccMap();
+
+    JITDUMP("Successfully merged the default target's value test with the switch.\n");
+    return true;
+}
+
 //------------------------------------------------------------------------------
 // optSwitchDetectAndConvert : Try to detect a series of conditional blocks which
 //    can be converted into a switch (jump-table) construct. See optSwitchConvert
@@ -147,10 +363,10 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
 {
     assert(firstBlock->KindIs(BBJ_COND));
 
-    GenTree*    variableNode = nullptr;
-    ssize_t     cns          = 0;
-    BasicBlock* trueTarget   = nullptr;
-    BasicBlock* falseTarget  = nullptr;
+    GenTree*  variableNode = nullptr;
+    ssize_t   cns          = 0;
+    FlowEdge* trueTarget   = nullptr;
+    FlowEdge* falseTarget  = nullptr;
 
     // The algorithm is simple - we check that the given block is a constant test block
     // and then try to accumulate as many constant test blocks as possible. Once we hit
@@ -180,10 +396,10 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
         // Now walk the next blocks and see if they are basically the same type of test
         for (const BasicBlock* currBb = firstBlock->Next(); currBb != nullptr; currBb = currBb->Next())
         {
-            GenTree*    currVariableNode = nullptr;
-            ssize_t     currCns          = 0;
-            BasicBlock* currTrueTarget   = nullptr;
-            BasicBlock* currFalseTarget  = nullptr;
+            GenTree*  currVariableNode = nullptr;
+            ssize_t   currCns          = 0;
+            FlowEdge* currTrueTarget   = nullptr;
+            FlowEdge* currFalseTarget  = nullptr;
 
             if (!currBb->hasSingleStmt())
             {
@@ -196,7 +412,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
             if (IsConstantTestCondBlock(currBb, false, &currTrueTarget, &currFalseTarget, &isReversed,
                                         &currVariableNode, &currCns))
             {
-                if (currTrueTarget != trueTarget)
+                if (currTrueTarget->getDestinationBlock() != trueTarget->getDestinationBlock())
                 {
                     // This blocks jumps to a different target, stop searching and process what we already have.
                     return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
@@ -329,12 +545,15 @@ bool Compiler::optSwitchConvert(
         lastBlock = lastBlock->Next();
     }
 
-    BasicBlock* blockIfTrue  = nullptr;
-    BasicBlock* blockIfFalse = nullptr;
-    bool        isReversed   = false;
-    const bool  isTest       = IsConstantTestCondBlock(lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed);
+    FlowEdge*  blockIfTrueEdge  = nullptr;
+    FlowEdge*  blockIfFalseEdge = nullptr;
+    bool       isReversed       = false;
+    const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrueEdge, &blockIfFalseEdge, &isReversed);
     assert(isTest);
 
+    BasicBlock* blockIfTrue  = blockIfTrueEdge->getDestinationBlock();
+    BasicBlock* blockIfFalse = blockIfFalseEdge->getDestinationBlock();
+
     assert(firstBlock->TrueTargetIs(blockIfTrue));
     FlowEdge* const trueEdge  = firstBlock->GetTrueEdge();
     FlowEdge* const falseEdge = firstBlock->GetFalseEdge();