Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: make profile data available to inlinees #42277

Merged
merged 2 commits into from
Sep 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 43 additions & 40 deletions src/coreclr/src/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2485,7 +2485,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
{
// The following flags are lost when inlining. (They are removed in
// Compiler::fgInvokeInlineeCompiler().)
assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT));
assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR));
assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE));
assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC));
Expand Down Expand Up @@ -2740,6 +2739,49 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.compFastTailCalls = true;
#endif // FEATURE_FASTTAILCALL

// Profile data
//
fgBlockCounts = nullptr;
fgProfileData_ILSizeMismatch = false;
fgNumProfileRuns = 0;
if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
{
HRESULT hr;
hr = info.compCompHnd->getMethodBlockCounts(info.compMethodHnd, &fgBlockCountsCount, &fgBlockCounts,
&fgNumProfileRuns);

JITDUMP("BBOPT set -- VM query for profile data for %s returned: hr=%0x; counts at %p, %d blocks, %d runs\n",
info.compFullName, hr, fgBlockCounts, fgBlockCountsCount, fgNumProfileRuns);

// a failed result that also has a non-NULL fgBlockCounts
// indicates that the ILSize for the method no longer matches
// the ILSize for the method when profile data was collected.
//
// We will discard the IBC data in this case
//
if (FAILED(hr) && (fgBlockCounts != nullptr))
{
fgProfileData_ILSizeMismatch = true;
fgBlockCounts = nullptr;
}
#ifdef DEBUG
// A successful result implies a non-NULL fgBlockCounts
//
if (SUCCEEDED(hr))
{
assert(fgBlockCounts != nullptr);
}

// A failed result implies a NULL fgBlockCounts
// see implementation of Compiler::fgHaveProfileData()
//
if (FAILED(hr))
{
assert(fgBlockCounts == nullptr);
}
#endif
}

if (compIsForInlining())
{
return;
Expand Down Expand Up @@ -3147,45 +3189,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
#endif
}

fgBlockCounts = nullptr;
fgProfileData_ILSizeMismatch = false;
fgNumProfileRuns = 0;
if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
{
assert(!compIsForInlining());
HRESULT hr;
hr = info.compCompHnd->getMethodBlockCounts(info.compMethodHnd, &fgBlockCountsCount, &fgBlockCounts,
&fgNumProfileRuns);

// a failed result that also has a non-NULL fgBlockCounts
// indicates that the ILSize for the method no longer matches
// the ILSize for the method when profile data was collected.
//
// We will discard the IBC data in this case
//
if (FAILED(hr) && (fgBlockCounts != nullptr))
{
fgProfileData_ILSizeMismatch = true;
fgBlockCounts = nullptr;
}
#ifdef DEBUG
// A successful result implies a non-NULL fgBlockCounts
//
if (SUCCEEDED(hr))
{
assert(fgBlockCounts != nullptr);
}

// A failed result implies a NULL fgBlockCounts
// see implementation of Compiler::fgHaveProfileData()
//
if (FAILED(hr))
{
assert(fgBlockCounts == nullptr);
}
#endif
}

#ifdef DEBUG
// Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK
if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30))
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5384,6 +5384,7 @@ class Compiler
}

bool fgHaveProfileData();
void fgComputeProfileScale();
bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weight);
void fgInstrumentMethod();

Expand Down
176 changes: 166 additions & 10 deletions src/coreclr/src/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,19 +176,153 @@ void Compiler::fgInit()
fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
#endif

fgHasSwitch = false;
fgHasSwitch = false;
fgBlockCounts = nullptr;
}

//------------------------------------------------------------------------
// fgHaveProfileData: check if profile data is available
//
// Returns:
// true if so
//
// Note:
// This now returns true for inlinees. We might consider preserving the
// old behavior for crossgen, since crossgen BBINSTRs still do inlining
// and don't instrument the inlinees.
//
// Thus if BBINSTR and BBOPT do the same inlines (which can happen)
// profile data for an inlinee (if available) will not fully reflect
// the behavior of the inlinee when called from this method.
//
// If this inlinee was not inlined by the BBINSTR run then the
// profile data for the inlinee will reflect this method's influence.
//
// * for ALWAYS_INLINE and FORCE_INLINE cases it is unlikely we'll find
// any profile data, as BBINSTR and BBOPT callers will both inline;
// only indirect callers will invoke the instrumented version to run.
// * for DISCRETIONARY_INLINE cases we may or may not find relevant
// data, depending, but chances are the data is relevant.
//
// TieredPGO data comes from Tier0 methods, which currently do not do
// any inlining; thus inlinee profile data should be available and
// representative.
//
bool Compiler::fgHaveProfileData()
{
if (compIsForInlining() || compIsForImportOnly())
if (compIsForImportOnly())
{
return false;
}

return (fgBlockCounts != nullptr);
}

//------------------------------------------------------------------------
// fgComputeProfileScale: determine how much scaling to apply
// to raw profile count data.
//
// Notes:
// Scaling is only needed for inlinees, and the results of this
// computation are recorded in fields of impInlineInfo.
//
void Compiler::fgComputeProfileScale()
{
// Only applicable to inlinees
assert(compIsForInlining());

// Have we already determined the scale?
if (impInlineInfo->profileScaleState != InlineInfo::ProfileScaleState::UNDETERMINED)
{
return;
}

// No, not yet -- try and compute the scale.
JITDUMP("Computing inlinee profile scale:\n");

// Call site has profile weight?
//
// Todo: handle case of unprofiled caller invoking profiled callee.
//
const BasicBlock* callSiteBlock = impInlineInfo->iciBlock;
if (!callSiteBlock->hasProfileWeight())
{
JITDUMP(" ... call site not profiled\n");
impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
return;
}

const BasicBlock::weight_t callSiteWeight = callSiteBlock->bbWeight;

// Call site has zero count?
//
// Todo: perhaps retain some semblance of callee profile data,
// possibly scaled down severely.
//
if (callSiteWeight == 0)
{
JITDUMP(" ... zero call site count\n");
impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
return;
}

// Callee has profile data?
//
if (!fgHaveProfileData())
{
JITDUMP(" ... no callee profile data\n");
impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
return;
}

// Find callee's unscaled entry weight.
//
// Ostensibly this should be fgCalledCount for the callee, but that's not available
// as it requires some analysis.
//
// For most callees it will be the same as the entry block count.
//
BasicBlock::weight_t calleeWeight = 0;

if (!fgGetProfileWeightForBasicBlock(0, &calleeWeight))
{
JITDUMP(" ... no callee profile data for entry block\n");
impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
return;
}

// We should generally be able to assume calleeWeight >= callSiteWeight.
// If this isn't so, perhaps something is wrong with the profile data
// collection or retrieval.
//
// For now, ignore callee data if we'd need to upscale.
//
if (calleeWeight < callSiteWeight)
{
JITDUMP(" ... callee entry count %d is less than call site count %d\n", calleeWeight, callSiteWeight);
impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
return;
}

// Hence, scale is always in the range (0.0...1.0] -- we are always scaling down callee counts.
//
const double scale = ((double)callSiteWeight) / calleeWeight;
impInlineInfo->profileScaleFactor = scale;
impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::KNOWN;

JITDUMP(" call site count %u callee entry count %u scale %f\n", callSiteWeight, calleeWeight, scale);
}

//------------------------------------------------------------------------
// fgGetProfileWeightForBasicBlock: obtain profile data for a block
//
// Arguments:
// offset - IL offset of the block
// weightWB - [OUT] weight obtained
//
// Returns:
// true if data was found
//
bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weightWB)
{
noway_assert(weightWB != nullptr);
Expand Down Expand Up @@ -229,19 +363,16 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weigh
}
#endif // DEBUG

if (fgHaveProfileData() == false)
if (!fgHaveProfileData())
{
return false;
}

noway_assert(!compIsForInlining());
for (UINT32 i = 0; i < fgBlockCountsCount; i++)
{
if (fgBlockCounts[i].ILOffset == offset)
{
weight = fgBlockCounts[i].ExecutionCount;

*weightWB = weight;
*weightWB = fgBlockCounts[i].ExecutionCount;
return true;
}
}
Expand Down Expand Up @@ -5621,6 +5752,11 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
}
}

if (compIsForInlining())
{
fgComputeProfileScale();
}

do
{
unsigned jmpAddr = DUMMY_INIT(BAD_IL_OFFSET);
Expand Down Expand Up @@ -6043,9 +6179,19 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
curBBdesc->bbCodeOffsEnd = nxtBBoffs;

unsigned profileWeight;

if (fgGetProfileWeightForBasicBlock(curBBoffs, &profileWeight))
{
if (compIsForInlining())
{
if (impInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN)
{
profileWeight = (unsigned)(impInlineInfo->profileScaleFactor * profileWeight);
}
}

curBBdesc->setBBProfileWeight(profileWeight);

if (profileWeight == 0)
{
curBBdesc->bbSetRunRarely();
Expand Down Expand Up @@ -7182,7 +7328,7 @@ unsigned Compiler::fgGetNestingLevel(BasicBlock* block, unsigned* pFinallyNestin
}

//------------------------------------------------------------------------
// fgImport: read the IL forf the method and create jit IR
// fgImport: read the IL for the method and create jit IR
//
// Returns:
// phase status
Expand Down Expand Up @@ -23229,6 +23375,8 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe
inlineInfo.retExprClassHnd = nullptr;
inlineInfo.retExprClassHndIsExact = false;
inlineInfo.inlineResult = inlineResult;
inlineInfo.profileScaleState = InlineInfo::ProfileScaleState::UNDETERMINED;
inlineInfo.profileScaleFactor = 0.0;
#ifdef FEATURE_SIMD
inlineInfo.hasSIMDTypeArgLocalOrReturn = false;
#endif // FEATURE_SIMD
Expand Down Expand Up @@ -23302,7 +23450,6 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe

// The following flags are lost when inlining.
// (This is checked in Compiler::compInitOptions().)
compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBOPT);
compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBINSTR);
compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_PROF_ENTERLEAVE);
compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_DEBUG_EnC);
Expand Down Expand Up @@ -23508,7 +23655,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
const unsigned __int64 inlineeBlockFlags = InlineeCompiler->fgFirstBB->bbFlags;
noway_assert((inlineeBlockFlags & BBF_HAS_JMP) == 0);
noway_assert((inlineeBlockFlags & BBF_KEEP_BBJ_ALWAYS) == 0);
iciBlock->bbFlags |= inlineeBlockFlags;

// Todo: we may want to exclude other flags here.
iciBlock->bbFlags |= (inlineeBlockFlags & ~BBF_RUN_RARELY);

#ifdef DEBUG
if (verbose)
Expand Down Expand Up @@ -23667,6 +23816,13 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
block->bbJumpKind = BBJ_NONE;
}
}

// Update profile weight for callee blocks, if we didn't do it already.
if (pInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN)
{
continue;
}

if (inheritWeight)
{
block->inheritWeight(iciBlock);
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/src/jit/inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,17 @@ struct InlineInfo
GenTreeCall* iciCall; // The GT_CALL node to be inlined.
Statement* iciStmt; // The statement iciCall is in.
BasicBlock* iciBlock; // The basic block iciStmt is in.

// Profile support
enum class ProfileScaleState
{
UNDETERMINED,
KNOWN,
UNAVAILABLE
};

ProfileScaleState profileScaleState;
double profileScaleFactor;
};

// InlineContext tracks the inline history in a method.
Expand Down
Loading