From 247b81479b0f1e2e24107ed90887be829686c033 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 22 Apr 2024 07:25:48 -0700 Subject: [PATCH] JIT: Move profile checking back until just before inlining (#101011) Fixes the following areas with proper profile updates: * GDV chaining * instrumentation-introduces flow * OSR step blocks * fgSplitEdge (used by instrumentation) Adds checking bypasses for: * callfinally pair tails * original method entries in OSR methods Contributes to #93020 --- src/coreclr/jit/compiler.cpp | 10 ++-- src/coreclr/jit/fgbasic.cpp | 54 +++++++++++++++++---- src/coreclr/jit/fgopt.cpp | 51 ++++++++++++------- src/coreclr/jit/fgprofile.cpp | 50 ++++++++++++++++++- src/coreclr/jit/indirectcalltransformer.cpp | 46 +++++++++++++++++- 5 files changed, 175 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a32867660a01c3..d0bda29cb30891 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4629,11 +4629,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_IMPORTATION, &Compiler::fgImport); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // If this is a failed inline attempt, we're done. // if (compIsForInlining() && compInlineResult->IsFailure()) @@ -4688,6 +4683,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl return; } + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + // At this point in the phase list, all the inlinee phases have // been run, and inlinee compiles have exited, so we should only // get this far if we are jitting the root method. diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 808d591a77f5c8..fb717e1a5a50ea 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -214,10 +214,33 @@ bool Compiler::fgEnsureFirstBBisScratch() block = BasicBlock::New(this); - // If we have profile data the new block will inherit fgFirstBlock's weight + // If we have profile data determine the weight of the scratch BB + // if (fgFirstBB->hasProfileWeight()) { - block->inheritWeight(fgFirstBB); + // If current entry has preds, sum up those weights + // + weight_t nonEntryWeight = 0; + for (FlowEdge* const edge : fgFirstBB->PredEdges()) + { + nonEntryWeight += edge->getLikelyWeight(); + } + + // entry weight is weight not from any pred + // + weight_t const entryWeight = fgFirstBB->bbWeight - nonEntryWeight; + if (entryWeight <= 0) + { + // If the result is clearly nonsensical, just inherit + // + JITDUMP("\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsisent.\n", + fgPgoConsistent ? "is now" : "was already"); + block->inheritWeight(fgFirstBB); + } + else + { + block->setBBProfileWeight(entryWeight); + } } // The new scratch bb will fall through to the old first bb @@ -5062,17 +5085,28 @@ BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ) fgReplaceJumpTarget(curr, succ, newBlock); // And 'succ' has 'newBlock' as a new predecessor. - FlowEdge* const newEdge = fgAddRefPred(succ, newBlock); - newBlock->SetTargetEdge(newEdge); + FlowEdge* const newSuccEdge = fgAddRefPred(succ, newBlock); + newBlock->SetTargetEdge(newSuccEdge); - // This isn't accurate, but it is complex to compute a reasonable number so just assume that we take the - // branch 50% of the time. - // - // TODO: leverage edge likelihood. + // Set weight for newBlock // - if (!curr->KindIs(BBJ_ALWAYS)) + if (curr->KindIs(BBJ_ALWAYS)) + { + newBlock->inheritWeight(curr); + } + else { - newBlock->inheritWeightPercentage(curr, 50); + if (curr->hasProfileWeight()) + { + FlowEdge* const currNewEdge = fgGetPredForBlock(newBlock, curr); + newBlock->setBBProfileWeight(currNewEdge->getLikelyWeight()); + } + else + { + // Todo: use likelihood even w/o profile? + // + newBlock->inheritWeightPercentage(curr, 50); + } } // The bbLiveIn and bbLiveOut are both equal to the bbLiveIn of 'succ' diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index c1b919d37830a2..ac008eb140f9d7 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -731,17 +731,8 @@ PhaseStatus Compiler::fgPostImportationCleanup() // auto addConditionalFlow = [this, entryStateVar, &entryJumpTarget, &addedBlocks](BasicBlock* fromBlock, BasicBlock* toBlock) { - // We may have previously though this try entry was unreachable, but now we're going to - // step through it on the way to the OSR entry. So ensure it has plausible profile weight. - // - if (fgHaveProfileWeights() && !fromBlock->hasProfileWeight()) - { - JITDUMP("Updating block weight for now-reachable try entry " FMT_BB " via " FMT_BB "\n", - fromBlock->bbNum, fgFirstBB->bbNum); - fromBlock->inheritWeight(fgFirstBB); - } - BasicBlock* const newBlock = fgSplitBlockAtBeginning(fromBlock); + newBlock->inheritWeight(fromBlock); fromBlock->SetFlags(BBF_INTERNAL); newBlock->RemoveFlags(BBF_DONT_REMOVE); addedBlocks++; @@ -754,16 +745,40 @@ PhaseStatus Compiler::fgPostImportationCleanup() fgNewStmtAtBeg(fromBlock, jumpIfEntryStateZero); FlowEdge* const osrTryEntryEdge = fgAddRefPred(toBlock, fromBlock); - newBlock->inheritWeight(fromBlock); fromBlock->SetCond(osrTryEntryEdge, normalTryEntryEdge); - // Not sure what the correct edge likelihoods are just yet; - // for now we'll say the OSR path is the likely one. - // - // Todo: can we leverage profile data here to get a better answer? - // - osrTryEntryEdge->setLikelihood(0.9); - normalTryEntryEdge->setLikelihood(0.1); + if (fgHaveProfileWeights()) + { + // We are adding a path from (ultimately) the method entry to "fromBlock" + // Update the profile weight. + // + weight_t const entryWeight = fgFirstBB->bbWeight; + + JITDUMP("Updating block weight for now-reachable try entry " FMT_BB " via " FMT_BB "\n", + fromBlock->bbNum, fgFirstBB->bbNum); + fromBlock->setBBProfileWeight(fromBlock->bbWeight + entryWeight); + + // We updated the weight of fromBlock above. + // + // Set the likelihoods such that the additional weight flows to toBlock + // (and so the "normal path" profile out of fromBlock to newBlock is unaltered) + // + // In some stress cases we may have a zero-weight OSR entry. + // Tolerate this by capping the fromToLikelihood. + // + weight_t const fromWeight = fromBlock->bbWeight; + weight_t const fromToLikelihood = min(1.0, entryWeight / fromWeight); + + osrTryEntryEdge->setLikelihood(fromToLikelihood); + normalTryEntryEdge->setLikelihood(1.0 - fromToLikelihood); + } + else + { + // Just set likelihoods arbitrarily + // + osrTryEntryEdge->setLikelihood(0.9); + normalTryEntryEdge->setLikelihood(0.1); + } entryJumpTarget = fromBlock; }; diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 79cd63e3ac3b51..05f97ffd14ef59 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -1689,16 +1689,42 @@ void EfficientEdgeCountInstrumentor::RelocateProbes() { BasicBlock* intermediary = m_comp->fgNewBBbefore(BBJ_ALWAYS, block, /* extendRegion */ true); intermediary->SetFlags(BBF_IMPORTED); - intermediary->inheritWeight(block); FlowEdge* const newEdge = m_comp->fgAddRefPred(block, intermediary); intermediary->SetTargetEdge(newEdge); NewRelocatedProbe(intermediary, probe->source, probe->target, &leader); SetModifiedFlow(); + // Redirect flow and figure out profile impact. + // + // We don't expect to see mixtures of profiled and unprofiled preds, + // but if we do, fall back to our old default behavior. + // + weight_t weight = 0; + bool allPredsHaveProfile = true; + while (criticalPreds.Height() > 0) { BasicBlock* const pred = criticalPreds.Pop(); m_comp->fgReplaceJumpTarget(pred, block, intermediary); + + if (pred->hasProfileWeight()) + { + FlowEdge* const predIntermediaryEdge = m_comp->fgGetPredForBlock(intermediary, pred); + weight += predIntermediaryEdge->getLikelyWeight(); + } + else + { + allPredsHaveProfile = false; + } + } + + if (allPredsHaveProfile) + { + intermediary->setBBProfileWeight(weight); + } + else + { + intermediary->inheritWeight(block); } } } @@ -4773,6 +4799,19 @@ bool Compiler::fgDebugCheckProfileWeights(ProfileChecks checks) verifyIncoming = false; } + // Original entries in OSR methods that also are + // loop headers. + // + // These will frequently have a profile imbalance as + // synthesis will have injected profile weight for + // method entry, but when we transform flow for OSR, + // only the loop back edges remain. + // + if (block == fgEntryBB) + { + verifyIncoming = false; + } + // Handler entries // if (block->hasEHBoundaryIn()) @@ -4935,6 +4974,15 @@ bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block, ProfileChecks foundEHPreds = false; } + // We almost certainly won't get the likelihoods on a BBJ_EHFINALLYRET right, + // so special-case BBJ_CALLFINALLYRET incoming flow. + // + if (block->isBBCallFinallyPairTail()) + { + incomingLikelyWeight = block->Prev()->bbWeight; + foundEHPreds = false; + } + bool likelyWeightsValid = true; // If we have EH preds we may not have consistent incoming flow. diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index fad60de969d4c3..b8a2b2162fd26f 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -1052,7 +1052,7 @@ class IndirectCallTransformer // thenBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, checkBlock); thenBlock->CopyFlags(currBlock, BBF_SPLIT_GAINED); - thenBlock->inheritWeight(currBlock); + thenBlock->inheritWeight(checkBlock); thenBlock->scaleBBWeight(adjustedThenLikelihood); FlowEdge* const thenRemainderEdge = compiler->fgAddRefPred(remainderBlock, thenBlock); thenBlock->SetTargetEdge(thenRemainderEdge); @@ -1214,10 +1214,52 @@ class IndirectCallTransformer checkStmt = nextStmt; } - // Finally, rewire the cold block to jump to the else block, + // Rewire the cold block to jump to the else block, // not fall through to the check block. // compiler->fgRedirectTargetEdge(coldBlock, elseBlock); + + // Update the profile data + // + if (coldBlock->hasProfileWeight()) + { + // Check block + // + FlowEdge* const coldElseEdge = compiler->fgGetPredForBlock(elseBlock, coldBlock); + weight_t newCheckWeight = checkBlock->bbWeight - coldElseEdge->getLikelyWeight(); + + if (newCheckWeight < 0) + { + // If weights were consistent, we expect at worst a slight underflow. + // + if (compiler->fgPgoConsistent) + { + bool const isReasonableUnderflow = Compiler::fgProfileWeightsEqual(newCheckWeight, 0.0); + assert(isReasonableUnderflow); + + if (!isReasonableUnderflow) + { + compiler->fgPgoConsistent = false; + } + } + + // No matter what, the minimum weight is zero + // + newCheckWeight = 0; + } + checkBlock->setBBProfileWeight(newCheckWeight); + + // Else block + // + FlowEdge* const checkElseEdge = compiler->fgGetPredForBlock(elseBlock, checkBlock); + weight_t const newElseWeight = checkElseEdge->getLikelyWeight() + coldElseEdge->getLikelyWeight(); + elseBlock->setBBProfileWeight(newElseWeight); + + // Then block + // + FlowEdge* const checkThenEdge = compiler->fgGetPredForBlock(thenBlock, checkBlock); + thenBlock->setBBProfileWeight(checkThenEdge->getLikelyWeight()); + } } // When the current candidate has sufficiently high likelihood, scan