Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: more likelihood checking #99541

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4850,11 +4850,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
DoPhase(this, PHASE_COMPUTE_DOMINATORS, &Compiler::fgComputeDominators);
}

// Disable profile checks now.
// Over time we will move this further and further back in the phase list, as we fix issues.
//
activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;

#ifdef DEBUG
fgDebugCheckLinks();
#endif
Expand Down Expand Up @@ -5135,6 +5130,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
}
#endif // TARGET_ARM

// Disable profile checks now.
// Over time we will move this further and further back in the phase list, as we fix issues.
//
activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;

// Assign registers to variables, etc.

// Create LinearScan before Lowering, so that Lowering can call LinearScan methods
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6807,7 +6807,7 @@ class Compiler
public:
PhaseStatus optOptimizeBools();
PhaseStatus optSwitchRecognition();
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, GenTree* nodeToTest);
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);

PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
Expand Down
10 changes: 6 additions & 4 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5420,13 +5420,15 @@ BasicBlock* Compiler::fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst)
if (bSrc->KindIs(BBJ_COND) && bSrc->FalseTargetIs(bDst) && !bSrc->NextIs(bDst))
{
// Add a new block after bSrc which jumps to 'bDst'
jmpBlk = fgNewBBafter(BBJ_ALWAYS, bSrc, true);
FlowEdge* oldEdge = bSrc->GetFalseEdge();
jmpBlk = fgNewBBafter(BBJ_ALWAYS, bSrc, true);
FlowEdge* const oldEdge = bSrc->GetFalseEdge();
// Access the likelihood of oldEdge before
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I probably should've paid more attention to the diffs this likelihood weirdness would've caused when I changed SetTargetEdge to set the likelihood.

// it gets reset by SetTargetEdge below.
//
FlowEdge* const newEdge = fgAddRefPred(jmpBlk, bSrc, oldEdge);
fgReplacePred(oldEdge, jmpBlk);
jmpBlk->SetTargetEdge(oldEdge);
assert(jmpBlk->TargetIs(bDst));

FlowEdge* newEdge = fgAddRefPred(jmpBlk, bSrc, oldEdge);
bSrc->SetFalseEdge(newEdge);

// When adding a new jmpBlk we will set the bbWeight and bbFlags
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,8 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
// Bottom has Top and Poll as its predecessors. Poll has just Top as a predecessor.
FlowEdge* const trueEdge = fgAddRefPred(bottom, top);
FlowEdge* const falseEdge = fgAddRefPred(poll, top);
trueEdge->setLikelihood(1.0);
falseEdge->setLikelihood(0.0);

FlowEdge* const newEdge = fgAddRefPred(bottom, poll);
poll->SetTargetEdge(newEdge);
Expand Down
106 changes: 93 additions & 13 deletions src/coreclr/jit/helperexpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ bool Compiler::fgExpandRuntimeLookupsForCall(BasicBlock** pBlock, Statement* stm
FlowEdge* const falseEdge = fgAddRefPred(nullcheckBb, sizeCheckBb);
sizeCheckBb->SetTrueEdge(trueEdge);
sizeCheckBb->SetFalseEdge(falseEdge);
trueEdge->setLikelihood(0.2);
falseEdge->setLikelihood(0.8);
}

// fallbackBb is reachable from both nullcheckBb and sizeCheckBb
Expand All @@ -414,10 +416,13 @@ bool Compiler::fgExpandRuntimeLookupsForCall(BasicBlock** pBlock, Statement* stm
FlowEdge* const falseEdge = fgAddRefPred(fastPathBb, nullcheckBb);
nullcheckBb->SetTrueEdge(trueEdge);
nullcheckBb->SetFalseEdge(falseEdge);
trueEdge->setLikelihood(0.2);
falseEdge->setLikelihood(0.8);

//
// Re-distribute weights (see '[weight: X]' on the diagrams above)
// TODO: consider marking fallbackBb as rarely-taken
// TODO: derive block weights from edge likelihoods.
//
block->inheritWeight(prevBb);
if (needsSizeCheck)
Expand Down Expand Up @@ -720,6 +725,8 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
FlowEdge* const falseEdge = fgAddRefPred(fallbackBb, tlsRootNullCondBB);
tlsRootNullCondBB->SetTrueEdge(trueEdge);
tlsRootNullCondBB->SetFalseEdge(falseEdge);
trueEdge->setLikelihood(1.0);
falseEdge->setLikelihood(0.0);

{
FlowEdge* const newEdge = fgAddRefPred(block, fallbackBb);
Expand Down Expand Up @@ -1091,13 +1098,17 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement*
FlowEdge* const falseEdge = fgAddRefPred(threadStaticBlockNullCondBB, maxThreadStaticBlocksCondBB);
maxThreadStaticBlocksCondBB->SetTrueEdge(trueEdge);
maxThreadStaticBlocksCondBB->SetFalseEdge(falseEdge);
trueEdge->setLikelihood(0.0);
falseEdge->setLikelihood(1.0);
}

{
FlowEdge* const trueEdge = fgAddRefPred(fastPathBb, threadStaticBlockNullCondBB);
FlowEdge* const falseEdge = fgAddRefPred(fallbackBb, threadStaticBlockNullCondBB);
threadStaticBlockNullCondBB->SetTrueEdge(trueEdge);
threadStaticBlockNullCondBB->SetFalseEdge(falseEdge);
trueEdge->setLikelihood(1.0);
falseEdge->setLikelihood(0.0);
}

{
Expand Down Expand Up @@ -1471,6 +1482,8 @@ bool Compiler::fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, G
FlowEdge* const falseEdge = fgAddRefPred(helperCallBb, isInitedBb);
isInitedBb->SetTrueEdge(trueEdge);
isInitedBb->SetFalseEdge(falseEdge);
trueEdge->setLikelihood(1.0);
falseEdge->setLikelihood(0.0);
}

//
Expand Down Expand Up @@ -1700,7 +1713,7 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
//
// Block 1: lengthCheckBb (we check that dstLen < srcLen)
//
BasicBlock* lengthCheckBb = fgNewBBafter(BBJ_COND, prevBb, true);
BasicBlock* const lengthCheckBb = fgNewBBafter(BBJ_COND, prevBb, true);
lengthCheckBb->SetFlags(BBF_INTERNAL);

// Set bytesWritten -1 by default, if the fast path is not taken we'll return it as the result.
Expand Down Expand Up @@ -1786,6 +1799,10 @@ bool Compiler::fgVNBasedIntrinsicExpansionForCall_ReadUtf8(BasicBlock** pBlock,
FlowEdge* const falseEdge = fgAddRefPred(fastpathBb, lengthCheckBb);
lengthCheckBb->SetTrueEdge(trueEdge);
lengthCheckBb->SetFalseEdge(falseEdge);

// review: we assume length check always succeeds??
trueEdge->setLikelihood(1.0);
falseEdge->setLikelihood(0.0);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @dotnet/jit-contrib -- what do you think?

}

{
Expand Down Expand Up @@ -2489,46 +2506,107 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,

fgRedirectTargetEdge(firstBb, nullcheckBb);

// We assume obj is 50%/50% null/not-null (TODO-InlineCast: rely on PGO)
// and rely on profile for the slow path.
//
// Alternatively we could profile nulls in the reservoir sample and
// treat that as another "option".
//
// True out of the null check means obj is null.
//
const weight_t nullcheckTrueLikelihood = 0.5;
const weight_t nullcheckFalseLikelihood = 0.5;

{
FlowEdge* const trueEdge = fgAddRefPred(lastBb, nullcheckBb);
nullcheckBb->SetTrueEdge(trueEdge);
trueEdge->setLikelihood(nullcheckTrueLikelihood);
}

if (typeCheckNotNeeded)
{
FlowEdge* const falseEdge = fgAddRefPred(fallbackBb, nullcheckBb);
nullcheckBb->SetFalseEdge(falseEdge);
falseEdge->setLikelihood(nullcheckFalseLikelihood);
}
else
{
FlowEdge* const falseEdge = fgAddRefPred(typeChecksBbs[0], nullcheckBb);
nullcheckBb->SetFalseEdge(falseEdge);
falseEdge->setLikelihood(nullcheckFalseLikelihood);

FlowEdge* const newEdge = fgAddRefPred(lastBb, typeCheckSucceedBb);
typeCheckSucceedBb->SetTargetEdge(newEdge);
}

//
// Re-distribute weights
// Re-distribute weights and set edge likelihoods.
//
// We have the likelihood estimate for each class to test against.
// As with multi-guess GDV, once we've failed the first check, the
// likelihood of the other checks will increase.
//
// For instance, suppose we have 3 classes A, B, C, with likelihoods 0.5, 0.2, 0.1,
// and a residual 0.2 likelihood for none of the above.
//
// We first test for A, this is a 0.5 likelihood of success.
//
// If the test for A fails, we test for B. Because we only reach this second
// test with relative likelihood 0.5, we need to divide (scale up) the remaining
// likelihoods by 0.5, so we end up with 0.4, 0.2, (none of the above: 0.4).
//
// If the test for B also fails, we test for C. Because we only reach
// this second test with relative likelihood 0.6, we need to divide (scale up)
// the remaining likelihoods by by 0.6, so we end up with 0.33, (none of the above: 0.67).
//
// In the code below, instead of updating all the likelihoods each time,
// we remember how much we need to divide by to fix the next likelihood. This divisor is
// 1.0 - (sumOfPreviousLikelihoods)
//
// So for the above, we have
//
// Test | Likelihood | Sum of Previous | Rel. Likelihood
// A | 0.5 | 0.0 | 0.5 / (1 - 0.0) = 0.50
// B | 0.2 | 0.5 | 0.2 / (1 - 0.5) = 0.40
// C | 0.1 | 0.7 | 0.1 / (1 - 0.7) = 0.33
// n/a | 0.2 | 0.8 | 0.2 / (1 - 0.8) = 1.00
//
// The same goes for inherited weights -- the block where we test for B will have
// the weight of A times the likelihood that A's test fails, etc.
//
nullcheckBb->inheritWeight(firstBb);
unsigned totalLikelihood = 0;
weight_t sumOfPreviousLikelihood = 0;
for (int candidateId = 0; candidateId < numOfCandidates; candidateId++)
{
unsigned likelihood = likelihoods[candidateId];
BasicBlock* curTypeCheckBb = typeChecksBbs[candidateId];
if (candidateId == 0)
{
// We assume obj is 50%/50% null/not-null (TODO-InlineCast: rely on PGO)
// and rely on profile for the slow path.
curTypeCheckBb->inheritWeightPercentage(nullcheckBb, 50);
// Predecessor is the nullcheck, control reaches on false.
//
curTypeCheckBb->inheritWeight(nullcheckBb);
curTypeCheckBb->scaleBBWeight(nullcheckBb->GetFalseEdge()->getLikelihood());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once edge likelihood propagation is done, we'll probably want to combine these two methods into one, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will probably add a float overload for inherit weight. I might try and redo the old GDV likelihoods so they're [0..1] instead of [0..100].

}
else
{
BasicBlock* prevTypeCheckBb = typeChecksBbs[candidateId - 1];
curTypeCheckBb->inheritWeightPercentage(prevTypeCheckBb, likelihood);
// Predecessor is the prior type check, control reaches on false.
//
BasicBlock* const prevTypeCheckBb = typeChecksBbs[candidateId - 1];
weight_t prevCheckFailedLikelihood = prevTypeCheckBb->GetFalseEdge()->getLikelihood();
curTypeCheckBb->inheritWeight(prevTypeCheckBb);
curTypeCheckBb->scaleBBWeight(prevCheckFailedLikelihood);
}
totalLikelihood += likelihood;

// Fix likelihood of block's outgoing edges.
//
weight_t likelihood = (weight_t)likelihoods[candidateId] / 100;
weight_t relLikelihood = likelihood / (1.0 - sumOfPreviousLikelihood);

JITDUMP("Candidate %d: likelihood " FMT_WT " relative likelihood " FMT_WT "\n", candidateId, likelihood,
relLikelihood);

curTypeCheckBb->GetTrueEdge()->setLikelihood(relLikelihood);
curTypeCheckBb->GetFalseEdge()->setLikelihood(1.0 - relLikelihood);
sumOfPreviousLikelihood += likelihood;
}

if (fallbackBb->KindIs(BBJ_THROW))
Expand All @@ -2540,13 +2618,15 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt,
assert(fallbackBb->KindIs(BBJ_ALWAYS));
FlowEdge* const newEdge = fgAddRefPred(lastBb, fallbackBb);
fallbackBb->SetTargetEdge(newEdge);

fallbackBb->inheritWeightPercentage(lastTypeCheckBb, 100 - totalLikelihood);
fallbackBb->inheritWeight(lastTypeCheckBb);
weight_t lastTypeCheckFailedLikelihood = lastTypeCheckBb->GetFalseEdge()->getLikelihood();
fallbackBb->scaleBBWeight(lastTypeCheckFailedLikelihood);
}

if (!typeCheckNotNeeded)
{
typeCheckSucceedBb->inheritWeightPercentage(typeChecksBbs[0], totalLikelihood);
typeCheckSucceedBb->inheritWeight(typeChecksBbs[0]);
typeCheckSucceedBb->scaleBBWeight(sumOfPreviousLikelihood);
}

lastBb->inheritWeight(firstBb);
Expand Down
18 changes: 4 additions & 14 deletions src/coreclr/jit/loopcloning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,10 +859,7 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler*
// For "normal" cloning this is probably ok. For GDV cloning this
// may be inaccurate. We should key off the type test likelihood(s).
//
// TODO: this is a bit of out sync with what we do for block weights.
// Reconcile.
//
const weight_t fastLikelihood = 0.999;
const weight_t fastLikelihood = fastPathWeightScaleFactor;

// Choose how to generate the conditions
const bool generateOneConditionPerBlock = true;
Expand Down Expand Up @@ -1961,13 +1958,6 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex
// taking the max with the head block's weight.
ambientWeight = max(ambientWeight, preheader->bbWeight);

// We assume that the fast path will run 99% of the time, and thus should get 99% of the block weights.
// The slow path will, correspondingly, get only 1% of the block weights. It could be argued that we should
// mark the slow path as "run rarely", since it really shouldn't execute (given the currently optimized loop
// conditions) except under exceptional circumstances.
const weight_t fastPathWeightScaleFactor = 0.99;
const weight_t slowPathWeightScaleFactor = 1.0 - fastPathWeightScaleFactor;

// We're going to transform this loop:
//
// preheader --> header
Expand Down Expand Up @@ -2023,18 +2013,18 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex
BasicBlock* slowPreheader = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true);
JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", slowPreheader->bbNum, newPred->bbNum);
slowPreheader->bbWeight = newPred->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
slowPreheader->scaleBBWeight(slowPathWeightScaleFactor);
slowPreheader->scaleBBWeight(LoopCloneContext::slowPathWeightScaleFactor);
newPred = slowPreheader;

// Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path.

BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone));

loop->Duplicate(&newPred, blockMap, slowPathWeightScaleFactor);
loop->Duplicate(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor);

// Scale old blocks to the fast path weight.
loop->VisitLoopBlocks([=](BasicBlock* block) {
block->scaleBBWeight(fastPathWeightScaleFactor);
block->scaleBBWeight(LoopCloneContext::fastPathWeightScaleFactor);
return BasicBlockVisit::Continue;
});

Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/loopcloning.h
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,14 @@ struct NaturalLoopIterInfo;
*/
struct LoopCloneContext
{
// We assume that the fast path will run 99% of the time, and thus should get 99% of the block weights.
// The slow path will, correspondingly, get only 1% of the block weights. It could be argued that we should
// mark the slow path as "run rarely", since it really shouldn't execute (given the currently optimized loop
// conditions) except under exceptional circumstances.
//
static constexpr weight_t fastPathWeightScaleFactor = 0.99;
static constexpr weight_t slowPathWeightScaleFactor = 1.0 - fastPathWeightScaleFactor;

CompAllocator alloc; // The allocator

// The array of optimization opportunities found in each loop. (loop x optimization-opportunities)
Expand Down
Loading
Loading