Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Extend jump tables #107831

Merged
merged 8 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5170,14 +5170,14 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion);

// Optimize block order
//
DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);

// Conditional to Switch conversion
//
DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optSwitchRecognition);

// Optimize block order
//
DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);

// Determine start of cold region if we are hot/cold splitting
//
DoPhase(this, PHASE_DETERMINE_FIRST_COLD_BLOCK, &Compiler::fgDetermineFirstColdBlock);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7053,6 +7053,7 @@ class Compiler
PhaseStatus optSwitchRecognition();
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
bool optExtendSwitch(BasicBlock* block);

PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication
Expand Down Expand Up @@ -10509,6 +10510,7 @@ class Compiler
STRESS_MODE(IF_CONVERSION_INNER_LOOPS) \
STRESS_MODE(POISON_IMPLICIT_BYREFS) \
STRESS_MODE(STORE_BLOCK_UNROLLING) \
STRESS_MODE(DONT_LIMIT_JUMP_TABLE) \
STRESS_MODE(COUNT)

enum compStressArea
Expand Down
265 changes: 242 additions & 23 deletions src/coreclr/jit/switchrecognition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,33 @@
//
PhaseStatus Compiler::optSwitchRecognition()
{
// Limit to XARCH, ARM is already doing a great job with such comparisons using
// a series of ccmp instruction (see ifConvert phase).
#ifdef TARGET_XARCH
bool modified = false;
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
{
// block->KindIs(BBJ_COND) check is for better throughput.
#ifdef TARGET_XARCH
// Limit to XARCH, ARM is already doing a great job with such comparisons using
// a series of ccmp instruction (see ifConvert phase).
if (block->KindIs(BBJ_COND) && !block->isRunRarely() && optSwitchDetectAndConvert(block))
{
JITDUMP("Converted block " FMT_BB " to switch\n", block->bbNum)
modified = true;
}
#endif

// See if we can merge BBJ_COND into an existing switch block
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed for tier1-instr code this causes a lot of code expansion -- maybe skip doing this if we're instrumenting?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed

while (block->KindIs(BBJ_SWITCH) && optExtendSwitch(block))
{
modified = true;
}
}

if (modified)
{
// Remove unreachable blocks
fgUpdateFlowGraph();
fgRenumberBlocks();
return PhaseStatus::MODIFIED_EVERYTHING;
}
#endif
return PhaseStatus::MODIFIED_NOTHING;
}

Expand All @@ -63,8 +70,8 @@ PhaseStatus Compiler::optSwitchRecognition()
//
bool IsConstantTestCondBlock(const BasicBlock* block,
bool allowSideEffects,
BasicBlock** trueTarget,
BasicBlock** falseTarget,
FlowEdge** trueEdge,
FlowEdge** falseEdge,
bool* isReversed,
GenTree** variableNode = nullptr,
ssize_t* cns = nullptr)
Expand Down Expand Up @@ -102,9 +109,9 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
return false;
}

*isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
*trueTarget = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget();
*falseTarget = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget();
*isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
*trueEdge = *isReversed ? block->GetFalseEdge() : block->GetTrueEdge();
*falseEdge = *isReversed ? block->GetTrueEdge() : block->GetFalseEdge();

if (block->FalseTargetIs(block) || block->TrueTargetIs(block))
{
Expand Down Expand Up @@ -132,6 +139,215 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
return false;
}

//------------------------------------------------------------------------------
// GetSwitchValueOp: Try to extract BBJ_SWITCH's variable (and the offset)
//
// Arguments:
// switchBlock - BBJ_SWITCH block to get the switch value from
// variableNode - [out] Switch value node
// offset - [out] Offset of the switch value
//
// Return Value:
// True if switch value was successfully extracted, false otherwise
//
static bool GetSwitchValueOp(const BasicBlock* switchBlock, GenTree** variableNode, ssize_t* offset)
{
assert(switchBlock->KindIs(BBJ_SWITCH));

GenTree* switchNode = switchBlock->lastStmt()->GetRootNode()->gtEffectiveVal();
assert(switchNode->OperIs(GT_SWITCH));

GenTree* switchOp = switchNode->gtGetOp1();

// In most cases the switch value is either ADD(LCL_VAR, CNS) or LCL_VAR
if (switchOp->OperIs(GT_ADD))
{
// We use gtEffectiveVal() to skip COMMA nodes, those will be evaluated before
// any other case node in the switch block anyway.
GenTree* op1 = switchOp->gtGetOp1()->gtEffectiveVal();
GenTree* op2 = switchOp->gtGetOp2();

// TODO-JumpTable: We should be able to accept any side-effect-free op1 here
if (op1->OperIs(GT_LCL_VAR) && op2->IsCnsIntOrI())
{
*variableNode = op1;
*offset = op2->AsIntCon()->IconValue();
return true;
}
}
else
{
// We use gtEffectiveVal() to skip COMMA nodes, those will be evaluated before
// any other case node in the switch block anyway.
GenTree* op1 = switchOp->gtEffectiveVal();
if (op1->OperIs(GT_LCL_VAR))
{
*variableNode = op1;
*offset = 0;
return true;
}
}
return false;
}

//------------------------------------------------------------------------------
// optExtendSwitch: If one of the switch's targets is a value test block,
// and the switch's variable is the same as the value test's variable - try to merge
// that test (BBJ_COND) into the switch block. It's useful to assist Roslyn, which
// uses fairly conservative heuristics to generate switch blocks and may give up on
// some tests that could be merged into the switch.
//
// Arguments:
// block - BBJ_SWITCH block to extend
//
// Return Value:
// True if switch was successfully extended, false otherwise
//
bool Compiler::optExtendSwitch(BasicBlock* block)
{
assert(block->KindIs(BBJ_SWITCH));
JITDUMP("Considering expanding switch " FMT_BB "\n", block->bbNum);

if (opts.IsInstrumented())
{
JITDUMP("Instrumentation is enabled - bail out.\n");
return false;
}

assert(opts.OptimizationEnabled());

GenTree* switchOp = nullptr;
ssize_t switchTargetOffset = 0;
if (!GetSwitchValueOp(block, &switchOp, &switchTargetOffset))
{
JITDUMP("Couldn't extract the switch's variable - bail out.\n");
return false;
}

// TODO-JumpTable: Currently, we only expand the default target, but it should be possible
// to traverse all non-default edges as well (if they point to similar value tests).
// It shouldn't be too hard to implement (might be TP costly, though).

BBswtDesc* switchTargets = block->GetSwitchTargets();
if (!switchTargets->bbsHasDefault)
{
JITDUMP("Switch doesn't have a default target - bail out.\n");
return false;
}

// Now let's see if we can merge the value test in the edgeToExpand's destination block
// with the switch block.
FlowEdge* edgeToExpand = switchTargets->getDefault();
BasicBlock* edgeToExpandDst = edgeToExpand->getDestinationBlock();

// IsConstantTestCondBlock will tell us if the default target is a value test block (e.g. "x == cns")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you also need to check if edgeToExpandDst has other statements, and if so, bail out.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, oops, good point! I thought that IsConstantTestCondBlock checked for that but it asks caller for that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed

// TODO-JumpTable: Add support for more complex tests like "x > 10 && x < 20"
// Also, we should be able to merge two BBJ_SWITCH on the same variable. Roslyn may create such shapes.
FlowEdge* testPasses;
FlowEdge* testFails;
bool isReversed;
GenTree* variableNode;
ssize_t cns;
if (!IsConstantTestCondBlock(edgeToExpandDst, false, &testPasses, &testFails, &isReversed, &variableNode, &cns) ||
!edgeToExpandDst->hasSingleStmt())
{
JITDUMP("The default target is not performing a value test - bail out.\n");
return false;
}

BasicBlock* testPassesBb = testPasses->getDestinationBlock();
BasicBlock* testFailsBb = testFails->getDestinationBlock();

if (!GenTree::Compare(variableNode, switchOp))
{
JITDUMP("The default target is performing a value test on a different variable - bail out.\n");
return false;
}

// We're less conservative than Roslyn, but we still have some limits (unless stress mode is enabled)

if ((static_cast<size_t>(cns + switchTargetOffset) > SWITCH_MAX_DISTANCE) &&
!compStressCompile(STRESS_DONT_LIMIT_JUMP_TABLE, 50))
{
JITDUMP("Switch value is out of range - bail out.\n");
return false;
}

// We need to normalize the test value to the switch's range
const ssize_t newTestValue = cns + switchTargetOffset;

unsigned oldJumpCnt = switchTargets->bbsCount;
unsigned newJumpCnt = max(oldJumpCnt, (unsigned)(newTestValue + 1 + (switchTargets->bbsHasDefault ? 1 : 0)));
FlowEdge** oldJumpTab = switchTargets->bbsDstTab;
FlowEdge** newJumpTab = new (this, CMK_FlowEdge) FlowEdge*[newJumpCnt];

//
// Update likelihoods
//
weight_t edgeToExpandLikelihood = edgeToExpand->getLikelihood();
weight_t newTestLikelihood = edgeToExpandLikelihood * testPasses->getLikelihood();
weight_t newDefLikelihood = edgeToExpandLikelihood * testFails->getLikelihood();

// However, it is tricky if testPasses or testFails are already presented in the jump table.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might create a new dominant case for switch peeling (which happens later during block layout). If the default case was the most likely case before, and the new non-default case is now the most likely case and is reachable by just one case value, consider setting bbsHasDominantCase and bbsDominantCase.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might create a new dominant case for switch peeling (which happens later during block layout)

I don't think this is true. We currently run switch recognition after optOptimizeLayout, and the late reordering pass in the backend only reorders blocks, without any of the additional opt passes like fgOptimizeSwitchJumps. Perhaps we should consider moving switch recognition before layout?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we should consider moving switch recognition before layout?

Afair, the order of when to do it was driven by diffs, I can check again

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I plan to move layout completely to the backend, and leave all the flow opts (like switch peeling) where they are now, so it might be easier to flip optOptimizeLayout and optSwitchRecognition after that change goes in.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I've re-ordered the phases, diffs didn't change much

// Switch block currently doesn't support different likelihoods for edges pointing to the same block.
// So we need to adjust the existing likelihoods.
for (unsigned i = 0; i < oldJumpCnt; i++)
{
BasicBlock* edgeDstBb = oldJumpTab[i]->getDestinationBlock();
weight_t edgeLikelihood = oldJumpTab[i]->getLikelihood();

if (edgeDstBb == testPassesBb)
{
newTestLikelihood = edgeLikelihood + edgeToExpandLikelihood * testPasses->getLikelihood();
}
if (edgeDstBb == testFailsBb)
{
newDefLikelihood = edgeLikelihood + edgeToExpandLikelihood * testFails->getLikelihood();
}
}

//
// Create the new jump table
//
for (unsigned i = 0; i < newJumpCnt; i++)
{
if (i < oldJumpCnt)
{
// Not only the default case can point to edgeToExpandDst
if (oldJumpTab[i]->getDestinationBlock() == edgeToExpandDst)
{
fgRemoveRefPred(oldJumpTab[i]);
}
else
{
// Just copy the old edge
newJumpTab[i] = oldJumpTab[i];
continue;
}
}

if (i == (unsigned)newTestValue)
{
newJumpTab[i] = fgAddRefPred(testPassesBb, block);
newJumpTab[i]->setLikelihood(newTestLikelihood);
}
else
{
newJumpTab[i] = fgAddRefPred(testFailsBb, block);
newJumpTab[i]->setLikelihood(newDefLikelihood);
}
}

switchTargets->bbsCount = newJumpCnt;
switchTargets->bbsDstTab = newJumpTab;

// m_switchDescMap cache is invalidated by this change, so it has to be recomputed
InvalidateUniqueSwitchSuccMap();

JITDUMP("Successfully merged the default target's value test with the switch.\n");
return true;
}

//------------------------------------------------------------------------------
// optSwitchDetectAndConvert : Try to detect a series of conditional blocks which
// can be converted into a switch (jump-table) construct. See optSwitchConvert
Expand All @@ -147,10 +363,10 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
{
assert(firstBlock->KindIs(BBJ_COND));

GenTree* variableNode = nullptr;
ssize_t cns = 0;
BasicBlock* trueTarget = nullptr;
BasicBlock* falseTarget = nullptr;
GenTree* variableNode = nullptr;
ssize_t cns = 0;
FlowEdge* trueTarget = nullptr;
FlowEdge* falseTarget = nullptr;

// The algorithm is simple - we check that the given block is a constant test block
// and then try to accumulate as many constant test blocks as possible. Once we hit
Expand Down Expand Up @@ -180,10 +396,10 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
// Now walk the next blocks and see if they are basically the same type of test
for (const BasicBlock* currBb = firstBlock->Next(); currBb != nullptr; currBb = currBb->Next())
{
GenTree* currVariableNode = nullptr;
ssize_t currCns = 0;
BasicBlock* currTrueTarget = nullptr;
BasicBlock* currFalseTarget = nullptr;
GenTree* currVariableNode = nullptr;
ssize_t currCns = 0;
FlowEdge* currTrueTarget = nullptr;
FlowEdge* currFalseTarget = nullptr;

if (!currBb->hasSingleStmt())
{
Expand All @@ -196,7 +412,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock)
if (IsConstantTestCondBlock(currBb, false, &currTrueTarget, &currFalseTarget, &isReversed,
&currVariableNode, &currCns))
{
if (currTrueTarget != trueTarget)
if (currTrueTarget->getDestinationBlock() != trueTarget->getDestinationBlock())
{
// This blocks jumps to a different target, stop searching and process what we already have.
return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode);
Expand Down Expand Up @@ -329,12 +545,15 @@ bool Compiler::optSwitchConvert(
lastBlock = lastBlock->Next();
}

BasicBlock* blockIfTrue = nullptr;
BasicBlock* blockIfFalse = nullptr;
bool isReversed = false;
const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed);
FlowEdge* blockIfTrueEdge = nullptr;
FlowEdge* blockIfFalseEdge = nullptr;
bool isReversed = false;
const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrueEdge, &blockIfFalseEdge, &isReversed);
assert(isTest);

BasicBlock* blockIfTrue = blockIfTrueEdge->getDestinationBlock();
BasicBlock* blockIfFalse = blockIfFalseEdge->getDestinationBlock();

assert(firstBlock->TrueTargetIs(blockIfTrue));
FlowEdge* const trueEdge = firstBlock->GetTrueEdge();
FlowEdge* const falseEdge = firstBlock->GetFalseEdge();
Expand Down
Loading