diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h index 5256aff56205bab..f127d16b8de124d 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h @@ -19,7 +19,8 @@ class Type; class PGOCtxProfLoweringPass : public PassInfoMixin { public: explicit PGOCtxProfLoweringPass() = default; - static bool isContextualIRPGOEnabled(); + // True if contextual instrumentation is enabled. + static bool isCtxIRPGOInstrEnabled(); PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); }; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index c175ee898098495..67f3464f745a160 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1173,13 +1173,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty(); // We don't want to mix pgo ctx gen and pgo gen; we also don't currently // enable ctx profiling from the frontend. - assert( - !(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) && - "Enabling both instrumented FDO and contextual instrumentation is not " - "supported."); + assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) && + "Enabling both instrumented PGO and contextual instrumentation is not " + "supported."); // Enable contextual profiling instrumentation. const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink && - PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); + PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled(); const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt && Phase == ThinOrFullLTOPhase::ThinLTOPreLink; @@ -1670,8 +1669,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let // thinlto use the contextual info to perform imports; then use the contextual // profile in the post-thinlink phase. - if (!UseCtxProfile.empty() && !PGOOpt) + if (!UseCtxProfile.empty() && !PGOOpt) { + addRequiredLTOPreLinkPasses(MPM); return MPM; + } // Run partial inlining pass to partially inline functions that have // large bodies. diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp index de1d4d2381c06ee..d6ba12465bb3283 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp @@ -30,7 +30,7 @@ static cl::list ContextRoots( "root of an interesting graph, which will be profiled independently " "from other similar graphs.")); -bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() { +bool PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() { return !ContextRoots.empty(); } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 1ce8f58c1aa1408..41618194d12ed7c 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -321,6 +321,7 @@ static cl::opt PGOFunctionCriticalEdgeThreshold( " greater than this threshold.")); extern cl::opt MaxNumVTableAnnotations; +extern cl::opt UseCtxProfile; namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. @@ -338,9 +339,12 @@ extern cl::opt EnableVTableProfileUse; extern cl::opt ProfileCorrelate; } // namespace llvm +bool shouldInstrumentForCtxProf() { + return PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() || + !UseCtxProfile.empty(); +} bool shouldInstrumentEntryBB() { - return PGOInstrumentEntry || - PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); + return PGOInstrumentEntry || shouldInstrumentForCtxProf(); } // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx @@ -348,8 +352,7 @@ bool shouldInstrumentEntryBB() { // Supporting other values is relatively straight-forward - just another counter // range within the context. bool isValueProfilingDisabled() { - return DisableValueProfiling || - PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); + return DisableValueProfiling || shouldInstrumentForCtxProf(); } // Return a string describing the branch condition that can be @@ -902,7 +905,7 @@ static void instrumentOneFunc( unsigned NumCounters = InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); - if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) { + if (shouldInstrumentForCtxProf()) { auto *CSIntrinsic = Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite); // We want to count the instrumentable callsites, then instrument them. This @@ -1861,7 +1864,7 @@ static bool InstrumentAllFunctions( function_ref LookupBFI, bool IsCS) { // For the context-sensitve instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. - if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) + if (!IsCS && !shouldInstrumentForCtxProf()) createIRLevelProfileFlagVar(M, /*IsCS=*/false); Triple TT(M.getTargetTriple()); @@ -2112,7 +2115,7 @@ static bool annotateAllFunctions( bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; - InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); + InstrumentFuncEntry |= shouldInstrumentForCtxProf(); bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); for (auto &F : M) { diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll index b50a815be5abf5b..18ac2f92aa39d46 100644 --- a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll +++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; There is no profile, but that's OK because the prelink does not care about ; the content of the profile, just that we intend to use one. ; There is no scenario currently of doing ctx profile use without thinlto. @@ -7,19 +7,22 @@ declare void @bar() +;. +; CHECK: @__profn_foo = private constant [3 x i8] c"foo" +;. define void @foo(i32 %a, ptr %fct) { ; CHECK-LABEL: define void @foo( ; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr { +; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0) ; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0 ; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]] ; CHECK: [[YES]]: ; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1) -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64 -; CHECK-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]]) ; CHECK-NEXT: call void [[FCT]](i32 0) ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[NO]]: -; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0) +; CHECK-NEXT: call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar) ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: @@ -36,3 +39,6 @@ no: exit: ret void } +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } +;.