Skip to content

Commit

Permalink
[ctx_prof] Fix the pre-thinlink "use" case
Browse files Browse the repository at this point in the history
Didn't notice in llvm#101338 that the instrumentation in
`llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll` was actually
incorrect.
  • Loading branch information
mtrofin committed Aug 8, 2024
1 parent 37a94b7 commit 18163ad
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class Type;
class PGOCtxProfLoweringPass : public PassInfoMixin<PGOCtxProfLoweringPass> {
public:
explicit PGOCtxProfLoweringPass() = default;
static bool isContextualIRPGOEnabled();
// True if contextual instrumentation is enabled.
static bool isCtxIRPGOInstrEnabled();

PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1173,13 +1173,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
// We don't want to mix pgo ctx gen and pgo gen; we also don't currently
// enable ctx profiling from the frontend.
assert(
!(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) &&
"Enabling both instrumented FDO and contextual instrumentation is not "
"supported.");
assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
"Enabling both instrumented FDO and contextual instrumentation is not "
"supported.");
// Enable contextual profiling instrumentation.
const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
Phase == ThinOrFullLTOPhase::ThinLTOPreLink;

Expand Down Expand Up @@ -1670,8 +1669,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
// In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
// thinlto use the contextual info to perform imports; then use the contextual
// profile in the post-thinlink phase.
if (!UseCtxProfile.empty() && !PGOOpt)
if (!UseCtxProfile.empty() && !PGOOpt) {
addRequiredLTOPreLinkPasses(MPM);
return MPM;
}

// Run partial inlining pass to partially inline functions that have
// large bodies.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ static cl::list<std::string> ContextRoots(
"root of an interesting graph, which will be profiled independently "
"from other similar graphs."));

bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
bool PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() {
return !ContextRoots.empty();
}

Expand Down
17 changes: 10 additions & 7 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
" greater than this threshold."));

extern cl::opt<unsigned> MaxNumVTableAnnotations;
extern cl::opt<std::string> UseCtxProfile;

namespace llvm {
// Command line option to turn on CFG dot dump after profile annotation.
Expand All @@ -338,18 +339,20 @@ extern cl::opt<bool> EnableVTableProfileUse;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm

bool shouldInstrumentForCtxProf() {
return PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() ||
!UseCtxProfile.empty();
}
bool shouldInstrumentEntryBB() {
return PGOInstrumentEntry ||
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
return PGOInstrumentEntry || shouldInstrumentForCtxProf();
}

// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
// profiling implicitly captures indirect call cases, but not other values.
// Supporting other values is relatively straight-forward - just another counter
// range within the context.
bool isValueProfilingDisabled() {
return DisableValueProfiling ||
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
return DisableValueProfiling || shouldInstrumentForCtxProf();
}

// Return a string describing the branch condition that can be
Expand Down Expand Up @@ -902,7 +905,7 @@ static void instrumentOneFunc(
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();

if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
if (shouldInstrumentForCtxProf()) {
auto *CSIntrinsic =
Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
// We want to count the instrumentable callsites, then instrument them. This
Expand Down Expand Up @@ -1861,7 +1864,7 @@ static bool InstrumentAllFunctions(
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
if (!IsCS && !shouldInstrumentForCtxProf())
createIRLevelProfileFlagVar(M, /*IsCS=*/false);

Triple TT(M.getTargetTriple());
Expand Down Expand Up @@ -2112,7 +2115,7 @@ static bool annotateAllFunctions(
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
if (PGOInstrumentEntry.getNumOccurrences() > 0)
InstrumentFuncEntry = PGOInstrumentEntry;
InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
InstrumentFuncEntry |= shouldInstrumentForCtxProf();

bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
for (auto &F : M) {
Expand Down
14 changes: 10 additions & 4 deletions llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; There is no profile, but that's OK because the prelink does not care about
; the content of the profile, just that we intend to use one.
; There is no scenario currently of doing ctx profile use without thinlto.
Expand All @@ -7,19 +7,22 @@

declare void @bar()

;.
; CHECK: @__profn_foo = private constant [3 x i8] c"foo"
;.
define void @foo(i32 %a, ptr %fct) {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
; CHECK: [[YES]]:
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
; CHECK-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
; CHECK-NEXT: call void [[FCT]](i32 0)
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[NO]]:
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
Expand All @@ -36,3 +39,6 @@ no:
exit:
ret void
}
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
;.

0 comments on commit 18163ad

Please sign in to comment.