diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 2b9425e6fe502..0f6cf2d95edcf 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5170,14 +5170,14 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion); - // Optimize block order - // - DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout); - // Conditional to Switch conversion // DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optSwitchRecognition); + // Optimize block order + // + DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout); + // Determine start of cold region if we are hot/cold splitting // DoPhase(this, PHASE_DETERMINE_FIRST_COLD_BLOCK, &Compiler::fgDetermineFirstColdBlock); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a23a650a805f2..44c3cf1a9da81 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7053,6 +7053,7 @@ class Compiler PhaseStatus optSwitchRecognition(); bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest); bool optSwitchDetectAndConvert(BasicBlock* firstBlock); + bool optExtendSwitch(BasicBlock* block); PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom. PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication @@ -10509,6 +10510,7 @@ class Compiler STRESS_MODE(IF_CONVERSION_INNER_LOOPS) \ STRESS_MODE(POISON_IMPLICIT_BYREFS) \ STRESS_MODE(STORE_BLOCK_UNROLLING) \ + STRESS_MODE(DONT_LIMIT_JUMP_TABLE) \ STRESS_MODE(COUNT) enum compStressArea diff --git a/src/coreclr/jit/switchrecognition.cpp b/src/coreclr/jit/switchrecognition.cpp index b0d2fe9455230..7642ac836f33d 100644 --- a/src/coreclr/jit/switchrecognition.cpp +++ b/src/coreclr/jit/switchrecognition.cpp @@ -22,26 +22,33 @@ // PhaseStatus Compiler::optSwitchRecognition() { -// Limit to XARCH, ARM is already doing a great job with such comparisons using -// a series of ccmp instruction (see ifConvert phase). -#ifdef TARGET_XARCH bool modified = false; for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next()) { - // block->KindIs(BBJ_COND) check is for better throughput. +#ifdef TARGET_XARCH + // Limit to XARCH, ARM is already doing a great job with such comparisons using + // a series of ccmp instruction (see ifConvert phase). if (block->KindIs(BBJ_COND) && !block->isRunRarely() && optSwitchDetectAndConvert(block)) { JITDUMP("Converted block " FMT_BB " to switch\n", block->bbNum) modified = true; } +#endif + + // See if we can merge BBJ_COND into an existing switch block + while (block->KindIs(BBJ_SWITCH) && optExtendSwitch(block)) + { + modified = true; + } } if (modified) { + // Remove unreachable blocks + fgUpdateFlowGraph(); fgRenumberBlocks(); return PhaseStatus::MODIFIED_EVERYTHING; } -#endif return PhaseStatus::MODIFIED_NOTHING; } @@ -63,8 +70,8 @@ PhaseStatus Compiler::optSwitchRecognition() // bool IsConstantTestCondBlock(const BasicBlock* block, bool allowSideEffects, - BasicBlock** trueTarget, - BasicBlock** falseTarget, + FlowEdge** trueEdge, + FlowEdge** falseEdge, bool* isReversed, GenTree** variableNode = nullptr, ssize_t* cns = nullptr) @@ -102,9 +109,9 @@ bool IsConstantTestCondBlock(const BasicBlock* block, return false; } - *isReversed = rootNode->gtGetOp1()->OperIs(GT_NE); - *trueTarget = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget(); - *falseTarget = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget(); + *isReversed = rootNode->gtGetOp1()->OperIs(GT_NE); + *trueEdge = *isReversed ? block->GetFalseEdge() : block->GetTrueEdge(); + *falseEdge = *isReversed ? block->GetTrueEdge() : block->GetFalseEdge(); if (block->FalseTargetIs(block) || block->TrueTargetIs(block)) { @@ -132,6 +139,215 @@ bool IsConstantTestCondBlock(const BasicBlock* block, return false; } +//------------------------------------------------------------------------------ +// GetSwitchValueOp: Try to extract BBJ_SWITCH's variable (and the offset) +// +// Arguments: +// switchBlock - BBJ_SWITCH block to get the switch value from +// variableNode - [out] Switch value node +// offset - [out] Offset of the switch value +// +// Return Value: +// True if switch value was successfully extracted, false otherwise +// +static bool GetSwitchValueOp(const BasicBlock* switchBlock, GenTree** variableNode, ssize_t* offset) +{ + assert(switchBlock->KindIs(BBJ_SWITCH)); + + GenTree* switchNode = switchBlock->lastStmt()->GetRootNode()->gtEffectiveVal(); + assert(switchNode->OperIs(GT_SWITCH)); + + GenTree* switchOp = switchNode->gtGetOp1(); + + // In most cases the switch value is either ADD(LCL_VAR, CNS) or LCL_VAR + if (switchOp->OperIs(GT_ADD)) + { + // We use gtEffectiveVal() to skip COMMA nodes, those will be evaluated before + // any other case node in the switch block anyway. + GenTree* op1 = switchOp->gtGetOp1()->gtEffectiveVal(); + GenTree* op2 = switchOp->gtGetOp2(); + + // TODO-JumpTable: We should be able to accept any side-effect-free op1 here + if (op1->OperIs(GT_LCL_VAR) && op2->IsCnsIntOrI()) + { + *variableNode = op1; + *offset = op2->AsIntCon()->IconValue(); + return true; + } + } + else + { + // We use gtEffectiveVal() to skip COMMA nodes, those will be evaluated before + // any other case node in the switch block anyway. + GenTree* op1 = switchOp->gtEffectiveVal(); + if (op1->OperIs(GT_LCL_VAR)) + { + *variableNode = op1; + *offset = 0; + return true; + } + } + return false; +} + +//------------------------------------------------------------------------------ +// optExtendSwitch: If one of the switch's targets is a value test block, +// and the switch's variable is the same as the value test's variable - try to merge +// that test (BBJ_COND) into the switch block. It's useful to assist Roslyn, which +// uses fairly conservative heuristics to generate switch blocks and may give up on +// some tests that could be merged into the switch. +// +// Arguments: +// block - BBJ_SWITCH block to extend +// +// Return Value: +// True if switch was successfully extended, false otherwise +// +bool Compiler::optExtendSwitch(BasicBlock* block) +{ + assert(block->KindIs(BBJ_SWITCH)); + JITDUMP("Considering expanding switch " FMT_BB "\n", block->bbNum); + + if (opts.IsInstrumented()) + { + JITDUMP("Instrumentation is enabled - bail out.\n"); + return false; + } + + assert(opts.OptimizationEnabled()); + + GenTree* switchOp = nullptr; + ssize_t switchTargetOffset = 0; + if (!GetSwitchValueOp(block, &switchOp, &switchTargetOffset)) + { + JITDUMP("Couldn't extract the switch's variable - bail out.\n"); + return false; + } + + // TODO-JumpTable: Currently, we only expand the default target, but it should be possible + // to traverse all non-default edges as well (if they point to similar value tests). + // It shouldn't be too hard to implement (might be TP costly, though). + + BBswtDesc* switchTargets = block->GetSwitchTargets(); + if (!switchTargets->bbsHasDefault) + { + JITDUMP("Switch doesn't have a default target - bail out.\n"); + return false; + } + + // Now let's see if we can merge the value test in the edgeToExpand's destination block + // with the switch block. + FlowEdge* edgeToExpand = switchTargets->getDefault(); + BasicBlock* edgeToExpandDst = edgeToExpand->getDestinationBlock(); + + // IsConstantTestCondBlock will tell us if the default target is a value test block (e.g. "x == cns") + // TODO-JumpTable: Add support for more complex tests like "x > 10 && x < 20" + // Also, we should be able to merge two BBJ_SWITCH on the same variable. Roslyn may create such shapes. + FlowEdge* testPasses; + FlowEdge* testFails; + bool isReversed; + GenTree* variableNode; + ssize_t cns; + if (!IsConstantTestCondBlock(edgeToExpandDst, false, &testPasses, &testFails, &isReversed, &variableNode, &cns) || + !edgeToExpandDst->hasSingleStmt()) + { + JITDUMP("The default target is not performing a value test - bail out.\n"); + return false; + } + + BasicBlock* testPassesBb = testPasses->getDestinationBlock(); + BasicBlock* testFailsBb = testFails->getDestinationBlock(); + + if (!GenTree::Compare(variableNode, switchOp)) + { + JITDUMP("The default target is performing a value test on a different variable - bail out.\n"); + return false; + } + + // We're less conservative than Roslyn, but we still have some limits (unless stress mode is enabled) + + if ((static_cast(cns + switchTargetOffset) > SWITCH_MAX_DISTANCE) && + !compStressCompile(STRESS_DONT_LIMIT_JUMP_TABLE, 50)) + { + JITDUMP("Switch value is out of range - bail out.\n"); + return false; + } + + // We need to normalize the test value to the switch's range + const ssize_t newTestValue = cns + switchTargetOffset; + + unsigned oldJumpCnt = switchTargets->bbsCount; + unsigned newJumpCnt = max(oldJumpCnt, (unsigned)(newTestValue + 1 + (switchTargets->bbsHasDefault ? 1 : 0))); + FlowEdge** oldJumpTab = switchTargets->bbsDstTab; + FlowEdge** newJumpTab = new (this, CMK_FlowEdge) FlowEdge*[newJumpCnt]; + + // + // Update likelihoods + // + weight_t edgeToExpandLikelihood = edgeToExpand->getLikelihood(); + weight_t newTestLikelihood = edgeToExpandLikelihood * testPasses->getLikelihood(); + weight_t newDefLikelihood = edgeToExpandLikelihood * testFails->getLikelihood(); + + // However, it is tricky if testPasses or testFails are already presented in the jump table. + // Switch block currently doesn't support different likelihoods for edges pointing to the same block. + // So we need to adjust the existing likelihoods. + for (unsigned i = 0; i < oldJumpCnt; i++) + { + BasicBlock* edgeDstBb = oldJumpTab[i]->getDestinationBlock(); + weight_t edgeLikelihood = oldJumpTab[i]->getLikelihood(); + + if (edgeDstBb == testPassesBb) + { + newTestLikelihood = edgeLikelihood + edgeToExpandLikelihood * testPasses->getLikelihood(); + } + if (edgeDstBb == testFailsBb) + { + newDefLikelihood = edgeLikelihood + edgeToExpandLikelihood * testFails->getLikelihood(); + } + } + + // + // Create the new jump table + // + for (unsigned i = 0; i < newJumpCnt; i++) + { + if (i < oldJumpCnt) + { + // Not only the default case can point to edgeToExpandDst + if (oldJumpTab[i]->getDestinationBlock() == edgeToExpandDst) + { + fgRemoveRefPred(oldJumpTab[i]); + } + else + { + // Just copy the old edge + newJumpTab[i] = oldJumpTab[i]; + continue; + } + } + + if (i == (unsigned)newTestValue) + { + newJumpTab[i] = fgAddRefPred(testPassesBb, block); + newJumpTab[i]->setLikelihood(newTestLikelihood); + } + else + { + newJumpTab[i] = fgAddRefPred(testFailsBb, block); + newJumpTab[i]->setLikelihood(newDefLikelihood); + } + } + + switchTargets->bbsCount = newJumpCnt; + switchTargets->bbsDstTab = newJumpTab; + + // m_switchDescMap cache is invalidated by this change, so it has to be recomputed + InvalidateUniqueSwitchSuccMap(); + + JITDUMP("Successfully merged the default target's value test with the switch.\n"); + return true; +} + //------------------------------------------------------------------------------ // optSwitchDetectAndConvert : Try to detect a series of conditional blocks which // can be converted into a switch (jump-table) construct. See optSwitchConvert @@ -147,10 +363,10 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) { assert(firstBlock->KindIs(BBJ_COND)); - GenTree* variableNode = nullptr; - ssize_t cns = 0; - BasicBlock* trueTarget = nullptr; - BasicBlock* falseTarget = nullptr; + GenTree* variableNode = nullptr; + ssize_t cns = 0; + FlowEdge* trueTarget = nullptr; + FlowEdge* falseTarget = nullptr; // The algorithm is simple - we check that the given block is a constant test block // and then try to accumulate as many constant test blocks as possible. Once we hit @@ -180,10 +396,10 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) // Now walk the next blocks and see if they are basically the same type of test for (const BasicBlock* currBb = firstBlock->Next(); currBb != nullptr; currBb = currBb->Next()) { - GenTree* currVariableNode = nullptr; - ssize_t currCns = 0; - BasicBlock* currTrueTarget = nullptr; - BasicBlock* currFalseTarget = nullptr; + GenTree* currVariableNode = nullptr; + ssize_t currCns = 0; + FlowEdge* currTrueTarget = nullptr; + FlowEdge* currFalseTarget = nullptr; if (!currBb->hasSingleStmt()) { @@ -196,7 +412,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) if (IsConstantTestCondBlock(currBb, false, &currTrueTarget, &currFalseTarget, &isReversed, &currVariableNode, &currCns)) { - if (currTrueTarget != trueTarget) + if (currTrueTarget->getDestinationBlock() != trueTarget->getDestinationBlock()) { // This blocks jumps to a different target, stop searching and process what we already have. return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); @@ -329,12 +545,15 @@ bool Compiler::optSwitchConvert( lastBlock = lastBlock->Next(); } - BasicBlock* blockIfTrue = nullptr; - BasicBlock* blockIfFalse = nullptr; - bool isReversed = false; - const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed); + FlowEdge* blockIfTrueEdge = nullptr; + FlowEdge* blockIfFalseEdge = nullptr; + bool isReversed = false; + const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrueEdge, &blockIfFalseEdge, &isReversed); assert(isTest); + BasicBlock* blockIfTrue = blockIfTrueEdge->getDestinationBlock(); + BasicBlock* blockIfFalse = blockIfFalseEdge->getDestinationBlock(); + assert(firstBlock->TrueTargetIs(blockIfTrue)); FlowEdge* const trueEdge = firstBlock->GetTrueEdge(); FlowEdge* const falseEdge = firstBlock->GetFalseEdge();