Skip to content

Commit

Permalink
[llvm-profgen] Add support for Linux kenrel profile (#92831)
Browse files Browse the repository at this point in the history
Add the support to handle Linux kernel perf files. The functionality is
under option -kernel. Note that currently only main kernel (in vmlinux)
is handled: kernel modules are not handled.

---------

Co-authored-by: Han Shen <shenhan@google.com>
  • Loading branch information
xur-llvm and shenhanc78 authored Jun 13, 2024
1 parent e387299 commit 2fa6eaf
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 66 deletions.
137 changes: 88 additions & 49 deletions llvm/tools/llvm-profgen/PerfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {

std::unique_ptr<PerfReaderBase>
PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
std::optional<uint32_t> PIDFilter) {
std::optional<int32_t> PIDFilter) {
std::unique_ptr<PerfReaderBase> PerfReader;

if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) {
Expand All @@ -331,9 +331,10 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
}

// For perf data input, we need to convert them into perf script first.
// If this is a kernel perf file, there is no need for retrieving PIDs.
if (PerfInput.Format == PerfFormat::PerfData)
PerfInput =
PerfScriptReader::convertPerfDataToTrace(Binary, PerfInput, PIDFilter);
PerfInput = PerfScriptReader::convertPerfDataToTrace(
Binary, Binary->isKernel(), PerfInput, PIDFilter);

assert((PerfInput.Format == PerfFormat::PerfScript) &&
"Should be a perfscript!");
Expand All @@ -353,59 +354,69 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
}

PerfInputFile
PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary,
PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID,
PerfInputFile &File,
std::optional<uint32_t> PIDFilter) {
std::optional<int32_t> PIDFilter) {
StringRef PerfData = File.InputFile;
// Run perf script to retrieve PIDs matching binary we're interested in.
auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf");
if (!PerfExecutable) {
exitWithError("Perf not found.");
}
std::string PerfPath = *PerfExecutable;

SmallString<128> PerfTraceFile;
sys::fs::createUniquePath("perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp",
PerfTraceFile, /*MakeAbsolute=*/true);
std::string ErrorFile = std::string(PerfTraceFile) + ".err";
StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
"-F", "comm,pid", "-i",
PerfData};
std::optional<StringRef> Redirects[] = {std::nullopt, // Stdin
StringRef(PerfTraceFile), // Stdout
StringRef(ErrorFile)}; // Stderr
sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects);

PerfScriptReader::TempFileCleanups.emplace_back(PerfTraceFile);
PerfScriptReader::TempFileCleanups.emplace_back(ErrorFile);

// Collect the PIDs
TraceStream TraceIt(PerfTraceFile);
std::string PIDs;
std::unordered_set<uint32_t> PIDSet;
while (!TraceIt.isAtEoF()) {
MMapEvent MMap;
if (isMMap2Event(TraceIt.getCurrentLine()) &&
extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
auto It = PIDSet.emplace(MMap.PID);
if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
if (!PIDs.empty()) {
PIDs.append(",");
if (!SkipPID) {
StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
"-F", "comm,pid", "-i",
PerfData};
sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects);

// Collect the PIDs
TraceStream TraceIt(PerfTraceFile);
std::unordered_set<int32_t> PIDSet;
while (!TraceIt.isAtEoF()) {
MMapEvent MMap;
if (isMMapEvent(TraceIt.getCurrentLine()) &&
extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
auto It = PIDSet.emplace(MMap.PID);
if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
if (!PIDs.empty()) {
PIDs.append(",");
}
PIDs.append(utostr(MMap.PID));
}
PIDs.append(utostr(MMap.PID));
}
TraceIt.advance();
}
TraceIt.advance();
}

if (PIDs.empty()) {
exitWithError("No relevant mmap event is found in perf data.");
if (PIDs.empty()) {
exitWithError("No relevant mmap event is found in perf data.");
}
}

// Run perf script again to retrieve events for PIDs collected above
StringRef ScriptSampleArgs[] = {PerfPath, "script", "--show-mmap-events",
"-F", "ip,brstack", "--pid",
PIDs, "-i", PerfData};
SmallVector<StringRef, 8> ScriptSampleArgs;
ScriptSampleArgs.push_back(PerfPath);
ScriptSampleArgs.push_back("script");
ScriptSampleArgs.push_back("--show-mmap-events");
ScriptSampleArgs.push_back("-F");
ScriptSampleArgs.push_back("ip,brstack");
ScriptSampleArgs.push_back("-i");
ScriptSampleArgs.push_back(PerfData);
if (!PIDs.empty()) {
ScriptSampleArgs.push_back("--pid");
ScriptSampleArgs.push_back(PIDs);
}
sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, std::nullopt, Redirects);

return {std::string(PerfTraceFile), PerfFormat::PerfScript,
Expand All @@ -428,7 +439,10 @@ static StringRef filename(StringRef Path, bool UseBackSlash) {
void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
// Drop the event which doesn't belong to user-provided binary
StringRef BinaryName = filename(Event.BinaryPath, Binary->isCOFF());
if (Binary->getName() != BinaryName)
bool IsKernel = Binary->isKernel();
if (!IsKernel && Binary->getName() != BinaryName)
return;
if (IsKernel && !Binary->isKernelImageName(BinaryName))
return;

// Drop the event if process does not match pid filter
Expand All @@ -441,7 +455,7 @@ void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
return;
}

if (Event.Offset == Binary->getTextSegmentOffset()) {
if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) {
// A binary image could be unloaded and then reloaded at different
// place, so update binary load address.
// Only update for the first executable segment and assume all other
Expand Down Expand Up @@ -950,16 +964,23 @@ void PerfScriptReader::parseSample(TraceStream &TraceIt) {
parseSample(TraceIt, Count);
}

bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
StringRef Line,
MMapEvent &MMap) {
// Parse a line like:
bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
StringRef Line,
MMapEvent &MMap) {
// Parse a MMap2 line like:
// PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
// 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
constexpr static const char *const Pattern =
"PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
constexpr static const char *const MMap2Pattern =
"PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: "
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
"(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
// Parse a MMap line like
// PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \
// 0xffffffff81e00000]: x [kernel.kallsyms]_text
constexpr static const char *const MMapPattern =
"PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: "
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
"(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)";
// Field 0 - whole line
// Field 1 - PID
// Field 2 - base address
Expand All @@ -975,14 +996,25 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
BINARY_PATH = 5
};

Regex RegMmap2(Pattern);
bool R = false;
SmallVector<StringRef, 6> Fields;
bool R = RegMmap2.match(Line, &Fields);
if (Line.contains("PERF_RECORD_MMAP2 ")) {
Regex RegMmap2(MMap2Pattern);
R = RegMmap2.match(Line, &Fields);
} else if (Line.contains("PERF_RECORD_MMAP ")) {
Regex RegMmap(MMapPattern);
R = RegMmap.match(Line, &Fields);
} else
llvm_unreachable("unexpected MMAP event entry");

if (!R) {
std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n";
WithColor::warning() << WarningMsg;
return false;
}
Fields[PID].getAsInteger(10, MMap.PID);
long long MMapPID = 0;
getAsSignedInteger(Fields[PID], 10, MMapPID);
MMap.PID = MMapPID;
Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address);
Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size);
Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset);
Expand All @@ -993,19 +1025,22 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
}

StringRef BinaryName = filename(MMap.BinaryPath, Binary->isCOFF());
if (Binary->isKernel()) {
return Binary->isKernelImageName(BinaryName);
}
return Binary->getName() == BinaryName;
}

void PerfScriptReader::parseMMap2Event(TraceStream &TraceIt) {
void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) {
MMapEvent MMap;
if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
if (extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
updateBinaryAddress(MMap);
TraceIt.advance();
}

void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
if (isMMap2Event(TraceIt.getCurrentLine()))
parseMMap2Event(TraceIt);
if (isMMapEvent(TraceIt.getCurrentLine()))
parseMMapEvent(TraceIt);
else
parseSample(TraceIt);
}
Expand All @@ -1032,17 +1067,17 @@ bool PerfScriptReader::isLBRSample(StringRef Line) {
return false;
}

bool PerfScriptReader::isMMap2Event(StringRef Line) {
bool PerfScriptReader::isMMapEvent(StringRef Line) {
// Short cut to avoid string find is possible.
if (Line.empty() || Line.size() < 50)
return false;

if (std::isdigit(Line[0]))
return false;

// PERF_RECORD_MMAP2 does not appear at the beginning of the line
// for ` perf script --show-mmap-events -i ...`
return Line.contains("PERF_RECORD_MMAP2");
// PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of
// the line for ` perf script --show-mmap-events -i ...`
return Line.contains("PERF_RECORD_MMAP");
}

// The raw hybird sample is like
Expand Down Expand Up @@ -1208,6 +1243,10 @@ void PerfScriptReader::warnInvalidRange() {
void PerfScriptReader::parsePerfTraces() {
// Parse perf traces and do aggregation.
parseAndAggregateTrace();
if (Binary->isKernel() && !Binary->getIsLoadedByMMap()) {
exitWithError(
"Kernel is requested, but no kernel is found in mmap events.");
}

emitWarningSummary(NumLeafExternalFrame, NumTotalSample,
"of samples have leaf external frame in call stack.");
Expand Down
34 changes: 17 additions & 17 deletions llvm/tools/llvm-profgen/PerfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ class PerfReaderBase {
virtual ~PerfReaderBase() = default;
static std::unique_ptr<PerfReaderBase>
create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
std::optional<uint32_t> PIDFilter);
std::optional<int32_t> PIDFilter);

// Entry of the reader to parse multiple perf traces
virtual void parsePerfTraces() = 0;
Expand All @@ -595,15 +595,15 @@ class PerfReaderBase {
class PerfScriptReader : public PerfReaderBase {
public:
PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
std::optional<uint32_t> PID)
: PerfReaderBase(B, PerfTrace), PIDFilter(PID){};
std::optional<int32_t> PID)
: PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};

// Entry of the reader to parse multiple perf traces
void parsePerfTraces() override;
// Generate perf script from perf data
static PerfInputFile
convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File,
std::optional<uint32_t> PIDFilter);
static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
bool SkipPID, PerfInputFile &File,
std::optional<int32_t> PIDFilter);
// Extract perf script type by peaking at the input
static PerfContent checkPerfScriptType(StringRef FileName);

Expand All @@ -615,7 +615,7 @@ class PerfScriptReader : public PerfReaderBase {
protected:
// The parsed MMap event
struct MMapEvent {
uint64_t PID = 0;
int64_t PID = 0;
uint64_t Address = 0;
uint64_t Size = 0;
uint64_t Offset = 0;
Expand All @@ -625,15 +625,15 @@ class PerfScriptReader : public PerfReaderBase {
// Check whether a given line is LBR sample
static bool isLBRSample(StringRef Line);
// Check whether a given line is MMAP event
static bool isMMap2Event(StringRef Line);
// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
static bool isMMapEvent(StringRef Line);
// Parse a single line of a PERF_RECORD_MMAP event looking for a
// mapping between the binary name and its memory layout.
static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
MMapEvent &MMap);
static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
MMapEvent &MMap);
// Update base address based on mmap events
void updateBinaryAddress(const MMapEvent &Event);
// Parse mmap event and update binary address
void parseMMap2Event(TraceStream &TraceIt);
void parseMMapEvent(TraceStream &TraceIt);
// Parse perf events/samples and do aggregation
void parseAndAggregateTrace();
// Parse either an MMAP event or a perf sample
Expand Down Expand Up @@ -669,7 +669,7 @@ class PerfScriptReader : public PerfReaderBase {
// Keep track of all invalid return addresses
std::set<uint64_t> InvalidReturnAddresses;
// PID for the process of interest
std::optional<uint32_t> PIDFilter;
std::optional<int32_t> PIDFilter;
};

/*
Expand All @@ -681,8 +681,8 @@ class PerfScriptReader : public PerfReaderBase {
class LBRPerfReader : public PerfScriptReader {
public:
LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
std::optional<uint32_t> PID)
: PerfScriptReader(Binary, PerfTrace, PID){};
std::optional<int32_t> PID)
: PerfScriptReader(Binary, PerfTrace, PID) {};
// Parse the LBR only sample.
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
};
Expand All @@ -699,8 +699,8 @@ class LBRPerfReader : public PerfScriptReader {
class HybridPerfReader : public PerfScriptReader {
public:
HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
std::optional<uint32_t> PID)
: PerfScriptReader(Binary, PerfTrace, PID){};
std::optional<int32_t> PID)
: PerfScriptReader(Binary, PerfTrace, PID) {};
// Parse the hybrid sample including the call and LBR line
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
void generateUnsymbolizedProfile() override;
Expand Down
7 changes: 7 additions & 0 deletions llvm/tools/llvm-profgen/ProfiledBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ static cl::list<std::string> DisassembleFunctions(
cl::desc("List of functions to print disassembly for. Accept demangled "
"names only. Only work with show-disassembly-only"));

static cl::opt<bool>
KernelBinary("kernel",
cl::desc("Generate the profile for Linux kernel binary."));

extern cl::opt<bool> ShowDetailedWarning;
extern cl::opt<bool> InferMissingFrames;

Expand Down Expand Up @@ -221,6 +225,9 @@ void ProfiledBinary::load() {

LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");

// Mark the binary as a kernel image;
IsKernel = KernelBinary;

// Find the preferred load address for text sections.
setPreferredTextSegmentAddresses(Obj);

Expand Down
11 changes: 11 additions & 0 deletions llvm/tools/llvm-profgen/ProfiledBinary.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,9 @@ class ProfiledBinary {
// Whether we need to symbolize all instructions to get function context size.
bool TrackFuncContextSize = false;

// Whether this is a kernel image;
bool IsKernel = false;

// Indicate if the base loading address is parsed from the mmap event or uses
// the preferred address
bool IsLoadedByMMap = false;
Expand Down Expand Up @@ -428,6 +431,14 @@ class ProfiledBinary {

bool usePseudoProbes() const { return UsePseudoProbes; }
bool useFSDiscriminator() const { return UseFSDiscriminator; }
bool isKernel() const { return IsKernel; }

static bool isKernelImageName(StringRef BinaryName) {
return BinaryName == "[kernel.kallsyms]" ||
BinaryName == "[kernel.kallsyms]_stext" ||
BinaryName == "[kernel.kallsyms]_text";
}

// Get the index in CodeAddressVec for the address
// As we might get an address which is not the code
// here it would round to the next valid code address by
Expand Down

0 comments on commit 2fa6eaf

Please sign in to comment.