Skip to content

Commit

Permalink
merge main into amd-staging
Browse files Browse the repository at this point in the history
reverts: breaks hipBlender*
4eecf3c [SLP]Reorder buildvector/reduction vectorization and fuse

Change-Id: Ia0cb69bf51f5ff37a0f511db6c5365a7d84e501b
  • Loading branch information
ronlieb committed Jul 5, 2024
2 parents 2531512 + 3141c11 commit 25880f2
Show file tree
Hide file tree
Showing 315 changed files with 17,389 additions and 7,205 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/libcxx-build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ jobs:
cxx: [ 'clang++-19' ]
include:
- config: 'generic-gcc'
cc: 'gcc-13'
cxx: 'g++-13'
cc: 'gcc-14'
cxx: 'g++-14'
steps:
- uses: actions/checkout@v4
- name: ${{ matrix.config }}.${{ matrix.cxx }}
Expand Down Expand Up @@ -101,8 +101,8 @@ jobs:
cxx: [ 'clang++-19' ]
include:
- config: 'generic-gcc-cxx11'
cc: 'gcc-13'
cxx: 'g++-13'
cc: 'gcc-14'
cxx: 'g++-14'
- config: 'generic-cxx23'
cc: 'clang-17'
cxx: 'clang++-17'
Expand Down
4 changes: 4 additions & 0 deletions bolt/docs/CommandLineArgumentReference.md
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,10 @@

Use a modified clustering algorithm geared towards minimizing branches

- `--name-similarity-function-matching-threshold=<uint>`

Match functions using namespace and edit distance.

- `--no-inline`

Disable all inlining (overrides other inlining options)
Expand Down
5 changes: 5 additions & 0 deletions bolt/docs/OptimizingLinux.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ $ perf2bolt -p perf.data -o perf.fdata vmlinux

Under a high load, `perf.data` should be several gigabytes in size and you should expect the converted `perf.fdata` not to exceed 100 MB.

Profiles collected from multiple workloads could be joined into a single profile using `merge-fdata` utility:
```bash
$ merge-fdata perf.1.fdata perf.2.fdata ... perf.<N>.fdata > perf.merged.fdata
```

Two changes are required for the kernel build. The first one is optional but highly recommended. It introduces a BOLT-reserved space into `vmlinux` code section:


Expand Down
12 changes: 12 additions & 0 deletions bolt/include/bolt/Core/DebugData.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,15 @@ class DebugRangesSectionWriter {
static bool classof(const DebugRangesSectionWriter *Writer) {
return Writer->getKind() == RangesWriterKind::DebugRangesWriter;
}

/// Append a range to the main buffer.
void appendToRangeBuffer(const DebugBufferVector &CUBuffer);

/// Sets Unit DIE to be updated for CU.
void setDie(DIE *Die) { this->Die = Die; }

/// Returns Unit DIE to be updated for CU.
DIE *getDie() const { return Die; }

/// Writes out range lists for a current CU being processed.
void virtual finalizeSection(){};
Expand All @@ -232,6 +241,9 @@ class DebugRangesSectionWriter {
static constexpr uint64_t EmptyRangesOffset{0};

private:
/// Stores Unit DIE to be updated for CU.
DIE *Die{0};

RangesWriterKind Kind;
};

Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Core/DebugNames.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ class DWARF5AcceleratorTable {
uint64_t CurrentUnitOffset = 0;
const DWARFUnit *CurrentUnit = nullptr;
std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
/// Contains a map of TU hashes to a Foreign TU indecies.
/// This is used to reduce the size of Foreign TU list since there could be
/// multiple TUs with the same hash.
DenseMap<uint64_t, uint32_t> TUHashToIndexMap;

/// Represents a group of entries with identical name (and hence, hash value).
struct HashData {
Expand Down
3 changes: 3 additions & 0 deletions bolt/include/bolt/Profile/YAMLProfileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ class YAMLProfileReader : public ProfileReaderBase {
ProfiledFunctions.emplace(&BF);
}

/// Matches functions with similarly named profiled functions.
uint64_t matchWithNameSimilarity(BinaryContext &BC);

/// Check if the profile uses an event with a given \p Name.
bool usesEvent(StringRef Name) const;
};
Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Rewrite/DWARFRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ class DWARFRewriter {
/// Store Rangelists writer for each DWO CU.
RangeListsDWOWriers RangeListsWritersByCU;

/// Stores ranges writer for each DWO CU.
std::unordered_map<uint64_t, std::unique_ptr<DebugRangesSectionWriter>>
LegacyRangesWritersByCU;

std::mutex LocListDebugInfoPatchesMutex;

/// Dwo id specific its RangesBase.
Expand Down
8 changes: 0 additions & 8 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2403,16 +2403,8 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
Streamer->emitLabel(SplitStartLabel);
emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
Streamer->emitLabel(SplitEndLabel);
// To avoid calling MCObjectStreamer::flushPendingLabels() which is
// private
Streamer->emitBytes(StringRef(""));
Streamer->switchSection(Section);
}

// To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
// MCStreamer::Finish(), which does more than we want
Streamer->emitBytes(StringRef(""));

MCAssembler &Assembler =
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
Assembler.layout();
Expand Down
6 changes: 6 additions & 0 deletions bolt/lib/Core/DebugData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,12 @@ uint64_t DebugRangesSectionWriter::getSectionOffset() {
return SectionOffset;
}

void DebugRangesSectionWriter::appendToRangeBuffer(
const DebugBufferVector &CUBuffer) {
*RangesStream << CUBuffer;
SectionOffset = RangesBuffer->size();
}

DebugAddrWriter *DebugRangeListsSectionWriter::AddrWriter = nullptr;

uint64_t DebugRangeListsSectionWriter::addRanges(
Expand Down
15 changes: 12 additions & 3 deletions bolt/lib/Core/DebugNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
if (Iter.second)
CUList.push_back(BADCUOFFSET);
ForeignTUList.push_back(cast<DWARFTypeUnit>(&Unit)->getTypeHash());
const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
if (!TUHashToIndexMap.count(TUHash)) {
TUHashToIndexMap.insert({TUHash, ForeignTUList.size()});
ForeignTUList.push_back(TUHash);
}
} else {
LocalTUList.push_back(CurrentUnitOffset);
}
Expand Down Expand Up @@ -231,8 +235,13 @@ DWARF5AcceleratorTable::addAccelTableEntry(
IsTU = Unit.isTypeUnit();
DieTag = Die.getTag();
if (IsTU) {
if (DWOID)
return ForeignTUList.size() - 1;
if (DWOID) {
const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
auto Iter = TUHashToIndexMap.find(TUHash);
assert(Iter != TUHashToIndexMap.end() &&
"Could not find TU hash in map");
return Iter->second;
}
return LocalTUList.size() - 1;
}
return CUList.size() - 1;
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Profile/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ add_llvm_library(LLVMBOLTProfile
DISABLE_LLVM_LINK_LLVM_DYLIB

LINK_COMPONENTS
Demangle
Support
TransformUtils
)
Expand Down
121 changes: 121 additions & 0 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Passes/MCF.h"
#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Utils/NameResolver.h"
#include "bolt/Utils/Utils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/edit_distance.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;
Expand All @@ -24,6 +27,11 @@ extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> InferStaleProfile;
extern cl::opt<bool> Lite;

cl::opt<unsigned> NameSimilarityFunctionMatchingThreshold(
"name-similarity-function-matching-threshold",
cl::desc("Match functions using namespace and edit distance"), cl::init(0),
cl::Hidden, cl::cat(BoltOptCategory));

static llvm::cl::opt<bool>
IgnoreHash("profile-ignore-hash",
cl::desc("ignore hash while reading function profile"),
Expand Down Expand Up @@ -350,6 +358,111 @@ bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
return false;
}

uint64_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
uint64_t MatchedWithNameSimilarity = 0;
ItaniumPartialDemangler Demangler;

// Demangle and derive namespace from function name.
auto DemangleName = [&](std::string &FunctionName) {
StringRef RestoredName = NameResolver::restore(FunctionName);
return demangle(RestoredName);
};
auto DeriveNameSpace = [&](std::string &DemangledName) {
if (Demangler.partialDemangle(DemangledName.c_str()))
return std::string("");
std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
size_t BufferSize;
char *NameSpace =
Demangler.getFunctionDeclContextName(&Buffer[0], &BufferSize);
return std::string(NameSpace, BufferSize);
};

// Maps namespaces to associated function block counts and gets profile
// function names and namespaces to minimize the number of BFs to process and
// avoid repeated name demangling/namespace derivation.
StringMap<std::set<uint32_t>> NamespaceToProfiledBFSizes;
std::vector<std::string> ProfileBFDemangledNames;
ProfileBFDemangledNames.reserve(YamlBP.Functions.size());
std::vector<std::string> ProfiledBFNamespaces;
ProfiledBFNamespaces.reserve(YamlBP.Functions.size());

for (auto &YamlBF : YamlBP.Functions) {
std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
ProfileBFDemangledNames.push_back(YamlBFDemangledName);
std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
ProfiledBFNamespaces.push_back(YamlBFNamespace);
NamespaceToProfiledBFSizes[YamlBFNamespace].insert(YamlBF.NumBasicBlocks);
}

StringMap<std::vector<BinaryFunction *>> NamespaceToBFs;

// Maps namespaces to BFs excluding binary functions with no equal sized
// profiled functions belonging to the same namespace.
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
std::string DemangledName = BF->getDemangledName();
std::string Namespace = DeriveNameSpace(DemangledName);

auto NamespaceToProfiledBFSizesIt =
NamespaceToProfiledBFSizes.find(Namespace);
// Skip if there are no ProfileBFs with a given \p Namespace.
if (NamespaceToProfiledBFSizesIt == NamespaceToProfiledBFSizes.end())
continue;
// Skip if there are no ProfileBFs in a given \p Namespace with
// equal number of blocks.
if (NamespaceToProfiledBFSizesIt->second.count(BF->size()) == 0)
continue;
auto NamespaceToBFsIt = NamespaceToBFs.find(Namespace);
if (NamespaceToBFsIt == NamespaceToBFs.end())
NamespaceToBFs[Namespace] = {BF};
else
NamespaceToBFsIt->second.push_back(BF);
}

// Iterates through all profiled functions and binary functions belonging to
// the same namespace and matches based on edit distance threshold.
assert(YamlBP.Functions.size() == ProfiledBFNamespaces.size() &&
ProfiledBFNamespaces.size() == ProfileBFDemangledNames.size());
for (size_t I = 0; I < YamlBP.Functions.size(); ++I) {
yaml::bolt::BinaryFunctionProfile &YamlBF = YamlBP.Functions[I];
std::string &YamlBFNamespace = ProfiledBFNamespaces[I];
if (YamlBF.Used)
continue;
// Skip if there are no BFs in a given \p Namespace.
auto It = NamespaceToBFs.find(YamlBFNamespace);
if (It == NamespaceToBFs.end())
continue;

std::string &YamlBFDemangledName = ProfileBFDemangledNames[I];
std::vector<BinaryFunction *> BFs = It->second;
unsigned MinEditDistance = UINT_MAX;
BinaryFunction *ClosestNameBF = nullptr;

// Determines BF the closest to the profiled function, in the
// same namespace.
for (BinaryFunction *BF : BFs) {
if (ProfiledFunctions.count(BF))
continue;
if (BF->size() != YamlBF.NumBasicBlocks)
continue;
std::string BFDemangledName = BF->getDemangledName();
unsigned BFEditDistance =
StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
if (BFEditDistance < MinEditDistance) {
MinEditDistance = BFEditDistance;
ClosestNameBF = BF;
}
}

if (ClosestNameBF &&
MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
matchProfileToFunction(YamlBF, *ClosestNameBF);
++MatchedWithNameSimilarity;
}
}

return MatchedWithNameSimilarity;
}

Error YAMLProfileReader::readProfile(BinaryContext &BC) {
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: YAML profile with hash: ";
Expand Down Expand Up @@ -461,6 +574,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
matchProfileToFunction(YamlBF, *BF);

// Uses name similarity to match functions that were not matched by name.
uint64_t MatchedWithNameSimilarity =
opts::NameSimilarityFunctionMatchingThreshold > 0
? matchWithNameSimilarity(BC)
: 0;

for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
if (!YamlBF.Used && opts::Verbosity >= 1)
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
Expand All @@ -473,6 +592,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
<< " functions with hash\n";
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
<< " functions with matching LTO common names\n";
outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
<< " functions with similar names\n";
}

// Set for parseFunctionProfile().
Expand Down
Loading

0 comments on commit 25880f2

Please sign in to comment.