Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MemProf] Support cloning for indirect calls with ThinLTO #110625

Merged
merged 7 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/ModuleSummaryIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,12 @@ struct ValueInfo {
return getRef()->second.SummaryList;
}

// Even if the index is built with GVs available, we may not have one for
// summary entries synthesized for profiled indirect call targets.
bool hasName() const { return !haveGVs() || getValue(); }

StringRef name() const {
assert(!haveGVs() || getRef()->second.U.GV);
return haveGVs() ? getRef()->second.U.GV->getName()
: getRef()->second.U.Name;
}
Expand Down
45 changes: 44 additions & 1 deletion llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H

#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <functional>

namespace llvm {
Expand All @@ -36,15 +38,56 @@ class MemProfContextDisambiguation
/// the IR.
bool applyImport(Module &M);

// Builds the symtab and analysis used for ICP during ThinLTO backends.
bool initializeIndirectCallPromotionInfo(Module &M);

// Data structure for saving indirect call profile info for use in ICP with
// cloning.
struct ICallAnalysisData {
CallBase *CB;
std::vector<InstrProfValueData> CandidateProfileData;
uint32_t NumCandidates;
uint64_t TotalCount;
size_t CallsiteInfoStartIndex;
};

// Record information needed for ICP of an indirect call, depending on its
// profile information and the clone information recorded in the corresponding
// CallsiteInfo records. The SI iterator point to the current iteration point
// through AllCallsites in this function, and will be updated in this method
// as we iterate through profiled targets. The number of clones recorded for
// this indirect call is returned. The necessary information is recorded in
// the ICallAnalysisInfo list for later ICP.
unsigned recordICPInfo(CallBase *CB, ArrayRef<CallsiteInfo> AllCallsites,
ArrayRef<CallsiteInfo>::iterator &SI,
SmallVector<ICallAnalysisData> &ICallAnalysisInfo);

// Actually performs any needed ICP in the function, using the information
// recorded in the ICallAnalysisInfo list.
void performICP(Module &M, ArrayRef<CallsiteInfo> AllCallsites,
ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
ArrayRef<ICallAnalysisData> ICallAnalysisInfo,
OptimizationRemarkEmitter &ORE);

/// Import summary containing cloning decisions for the ThinLTO backend.
const ModuleSummaryIndex *ImportSummary;

// Owns the import summary specified by internal options for testing the
// ThinLTO backend via opt (to simulate distributed ThinLTO).
std::unique_ptr<ModuleSummaryIndex> ImportSummaryForTesting;

// Whether we are building with SamplePGO. This is needed for correctly
// updating profile metadata on speculatively promoted calls.
bool isSamplePGO;

// Used when performing indirect call analysis and promotion when cloning in
// the ThinLTO backend during applyImport.
std::unique_ptr<InstrProfSymtab> Symtab;
std::unique_ptr<ICallPromotionAnalysis> ICallAnalysis;

public:
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary = nullptr);
MemProfContextDisambiguation(const ModuleSummaryIndex *Summary = nullptr,
bool isSamplePGO = false);

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);

Expand Down
59 changes: 36 additions & 23 deletions llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ static cl::opt<std::string> ModuleSummaryDotFile(
"module-summary-dot-file", cl::Hidden, cl::value_desc("filename"),
cl::desc("File to emit dot graph of new summary into"));

static cl::opt<bool> EnableMemProfIndirectCallSupport(
"enable-memprof-indirect-call-support", cl::init(true), cl::Hidden,
cl::desc(
"Enable MemProf support for summarizing and cloning indirect calls"));

extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;

extern cl::opt<unsigned> MaxNumVTableAnnotations;
Expand Down Expand Up @@ -404,6 +409,11 @@ static void computeFunctionSummary(
if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm())
HasInlineAsmMaybeReferencingInternal = true;

// Compute this once per indirect call.
uint32_t NumCandidates = 0;
uint64_t TotalCount = 0;
MutableArrayRef<InstrProfValueData> CandidateProfileData;

auto *CalledValue = CB->getCalledOperand();
auto *CalledFunction = CB->getCalledFunction();
if (CalledValue && !CalledFunction) {
Expand Down Expand Up @@ -481,9 +491,7 @@ static void computeFunctionSummary(
}
}

uint32_t NumCandidates;
uint64_t TotalCount;
auto CandidateProfileData =
CandidateProfileData =
ICallAnalysis.getPromotionCandidatesForInstruction(&I, TotalCount,
NumCandidates);
for (const auto &Candidate : CandidateProfileData)
Expand All @@ -495,16 +503,6 @@ static void computeFunctionSummary(
if (!IsThinLTO)
continue;

// TODO: Skip indirect calls for now. Need to handle these better, likely
// by creating multiple Callsites, one per target, then speculatively
// devirtualize while applying clone info in the ThinLTO backends. This
// will also be important because we will have a different set of clone
// versions per target. This handling needs to match that in the ThinLTO
// backend so we handle things consistently for matching of callsite
// summaries to instructions.
if (!CalledFunction)
continue;

// Ensure we keep this analysis in sync with the handling in the ThinLTO
// backend (see MemProfContextDisambiguation::applyImport). Save this call
// so that we can skip it in checking the reverse case later.
Expand Down Expand Up @@ -555,13 +553,24 @@ static void computeFunctionSummary(
SmallVector<unsigned> StackIdIndices;
for (auto StackId : InstCallsite)
StackIdIndices.push_back(Index.addOrGetStackIdIndex(StackId));
// Use the original CalledValue, in case it was an alias. We want
// to record the call edge to the alias in that case. Eventually
// an alias summary will be created to associate the alias and
// aliasee.
auto CalleeValueInfo =
Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
Callsites.push_back({CalleeValueInfo, StackIdIndices});
if (CalledFunction) {
// Use the original CalledValue, in case it was an alias. We want
// to record the call edge to the alias in that case. Eventually
// an alias summary will be created to associate the alias and
// aliasee.
auto CalleeValueInfo =
Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
Callsites.push_back({CalleeValueInfo, StackIdIndices});
} else if (EnableMemProfIndirectCallSupport) {
// For indirect callsites, create multiple Callsites, one per target.
// This enables having a different set of clone versions per target,
// and we will apply the cloning decisions while speculatively
// devirtualizing in the ThinLTO backends.
for (const auto &Candidate : CandidateProfileData) {
auto CalleeValueInfo = Index.getOrInsertValueInfo(Candidate.Value);
Callsites.push_back({CalleeValueInfo, StackIdIndices});
}
}
}
}
}
Expand Down Expand Up @@ -1214,9 +1223,13 @@ bool llvm::mayHaveMemprofSummary(const CallBase *CB) {
if (CI && CalledFunction->isIntrinsic())
return false;
} else {
// TODO: For now skip indirect calls. See comments in
// computeFunctionSummary for what is needed to handle this.
return false;
// Skip inline assembly calls.
if (CI && CI->isInlineAsm())
return false;
// Skip direct calls via Constant.
if (!CalledValue || isa<Constant>(CalledValue))
return false;
return true;
}
return true;
}
4 changes: 2 additions & 2 deletions llvm/lib/IR/AsmWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3613,7 +3613,7 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {

void AssemblyWriter::printSummaryInfo(unsigned Slot, const ValueInfo &VI) {
Out << "^" << Slot << " = gv: (";
if (!VI.name().empty())
if (VI.hasName() && !VI.name().empty())
Out << "name: \"" << VI.name() << "\"";
else
Out << "guid: " << VI.getGUID();
Expand All @@ -3627,7 +3627,7 @@ void AssemblyWriter::printSummaryInfo(unsigned Slot, const ValueInfo &VI) {
Out << ")";
}
Out << ")";
if (!VI.name().empty())
if (VI.hasName() && !VI.name().empty())
Out << " ; guid = " << VI.getGUID();
Out << "\n";
}
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1714,7 +1714,8 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
// For ThinLTO we must apply the context disambiguation decisions early, to
// ensure we can correctly match the callsites to summary data.
if (EnableMemProfContextDisambiguation)
MPM.addPass(MemProfContextDisambiguation(ImportSummary));
MPM.addPass(MemProfContextDisambiguation(
ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));

// These passes import type identifier resolutions for whole-program
// devirtualization and CFI. They must run early because other passes may
Expand Down Expand Up @@ -1927,7 +1928,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// amount of additional cloning required to distinguish the allocation
// contexts.
if (EnableMemProfContextDisambiguation)
MPM.addPass(MemProfContextDisambiguation());
MPM.addPass(MemProfContextDisambiguation(
/*Summary=*/nullptr,
PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));

// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());
Expand Down
Loading
Loading