Skip to content

Commit

Permalink
Merge remote-tracking branch 'rocm/amd-trunk-dev' into non_const_bounds
Browse files Browse the repository at this point in the history
  • Loading branch information
ergawy committed Jul 14, 2024
2 parents 7411f1f + 24980a6 commit 6abfaf3
Show file tree
Hide file tree
Showing 2,962 changed files with 119,125 additions and 43,009 deletions.
23 changes: 23 additions & 0 deletions .github/new-prs-labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,29 @@ backend:AArch64:
- clang/include/clang/Sema/SemaARM.h
- clang/lib/Sema/SemaARM.cpp

backend:Hexagon:
- clang/include/clang/Basic/BuiltinsHexagon*.def
- clang/include/clang/Sema/SemaHexagon.h
- clang/lib/Basic/Targets/Hexagon.*
- clang/lib/CodeGen/Targets/Hexagon.cpp
- clang/lib/Driver/ToolChains/Hexagon.*
- clang/lib/Sema/SemaHexagon.cpp
- lld/ELF/Arch/Hexagon.cpp
- lldb/source/Plugins/ABI/Hexagon/**
- lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/**
- llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
- llvm/include/llvm/IR/IntrinsicsHexagon*
- llvm/include/llvm/Support/Hexagon*
- llvm/lib/Support/Hexagon*
- llvm/lib/Target/Hexagon/**
- llvm/test/CodeGen/Hexagon/**
- llvm/test/CodeGen/*/Hexagon/**
- llvm/test/DebugInfo/*/Hexagon/**
- llvm/test/Transforms/*/Hexagon
- llvm/test/MC/Disassembler/Hexagon/**
- llvm/test/MC/Hexagon/**
- llvm/test/tools/llvm-objdump/ELF/Hexagon/**

backend:loongarch:
- llvm/include/llvm/IR/IntrinsicsLoongArch.td
- llvm/test/MC/LoongArch/**
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/issue-write.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ jobs:
permissions:
pull-requests: write
if: >
github.event.workflow_run.event == 'pull_request'
github.event.workflow_run.event == 'pull_request' &&
(
github.event.workflow_run.conclusion == 'success' ||
github.event.workflow_run.conclusion == 'failure'
)
steps:
- name: 'Download artifact'
uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/libcxx-build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ jobs:
cxx: [ 'clang++-19' ]
include:
- config: 'generic-gcc'
cc: 'gcc-13'
cxx: 'g++-13'
cc: 'gcc-14'
cxx: 'g++-14'
steps:
- uses: actions/checkout@v4
- name: ${{ matrix.config }}.${{ matrix.cxx }}
Expand Down Expand Up @@ -101,8 +101,8 @@ jobs:
cxx: [ 'clang++-19' ]
include:
- config: 'generic-gcc-cxx11'
cc: 'gcc-13'
cxx: 'g++-13'
cc: 'gcc-14'
cxx: 'g++-14'
- config: 'generic-cxx23'
cc: 'clang-17'
cxx: 'clang++-17'
Expand Down
4 changes: 4 additions & 0 deletions bolt/docs/CommandLineArgumentReference.md
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,10 @@

Use a modified clustering algorithm geared towards minimizing branches

- `--name-similarity-function-matching-threshold=<uint>`

Match functions using namespace and edit distance.

- `--no-inline`

Disable all inlining (overrides other inlining options)
Expand Down
5 changes: 5 additions & 0 deletions bolt/docs/OptimizingLinux.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ $ perf2bolt -p perf.data -o perf.fdata vmlinux

Under a high load, `perf.data` should be several gigabytes in size and you should expect the converted `perf.fdata` not to exceed 100 MB.

Profiles collected from multiple workloads could be joined into a single profile using `merge-fdata` utility:
```bash
$ merge-fdata perf.1.fdata perf.2.fdata ... perf.<N>.fdata > perf.merged.fdata
```

Two changes are required for the kernel build. The first one is optional but highly recommended. It introduces a BOLT-reserved space into `vmlinux` code section:


Expand Down
9 changes: 0 additions & 9 deletions bolt/include/bolt/Core/BinaryBasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -842,15 +842,6 @@ class BinaryBasicBlock {
bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
MCInst *&CondBranch, MCInst *&UncondBranch);

/// Return true if iterator \p I is pointing to the first instruction in
/// a pair that could be macro-fused.
bool isMacroOpFusionPair(const_iterator I) const;

/// If the basic block has a pair of instructions suitable for macro-fusion,
/// return iterator to the first instruction of the pair.
/// Otherwise return end().
const_iterator getMacroOpFusionPair() const;

/// Printer required for printing dominator trees.
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
if (PrintType)
Expand Down
4 changes: 0 additions & 4 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -698,10 +698,6 @@ class BinaryContext {

/// Binary-wide aggregated stats.
struct BinaryStats {
/// Stats for macro-fusion.
uint64_t MissedMacroFusionPairs{0};
uint64_t MissedMacroFusionExecCount{0};

/// Stats for stale profile matching:
/// the total number of basic blocks in the profile
uint32_t NumStaleBlocks{0};
Expand Down
4 changes: 0 additions & 4 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -835,10 +835,6 @@ class BinaryFunction {
/// them.
void calculateLoopInfo();

/// Calculate missed macro-fusion opportunities and update BinaryContext
/// stats.
void calculateMacroOpFusionStats();

/// Returns if BinaryDominatorTree has been constructed for this function.
bool hasDomTree() const { return BDT != nullptr; }

Expand Down
12 changes: 12 additions & 0 deletions bolt/include/bolt/Core/DebugData.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,15 @@ class DebugRangesSectionWriter {
static bool classof(const DebugRangesSectionWriter *Writer) {
return Writer->getKind() == RangesWriterKind::DebugRangesWriter;
}

/// Append a range to the main buffer.
void appendToRangeBuffer(const DebugBufferVector &CUBuffer);

/// Sets Unit DIE to be updated for CU.
void setDie(DIE *Die) { this->Die = Die; }

/// Returns Unit DIE to be updated for CU.
DIE *getDie() const { return Die; }

/// Writes out range lists for a current CU being processed.
void virtual finalizeSection(){};
Expand All @@ -232,6 +241,9 @@ class DebugRangesSectionWriter {
static constexpr uint64_t EmptyRangesOffset{0};

private:
/// Stores Unit DIE to be updated for CU.
DIE *Die{0};

RangesWriterKind Kind;
};

Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Core/DebugNames.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ class DWARF5AcceleratorTable {
uint64_t CurrentUnitOffset = 0;
const DWARFUnit *CurrentUnit = nullptr;
std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
/// Contains a map of TU hashes to a Foreign TU indecies.
/// This is used to reduce the size of Foreign TU list since there could be
/// multiple TUs with the same hash.
DenseMap<uint64_t, uint32_t> TUHashToIndexMap;

/// Represents a group of entries with identical name (and hence, hash value).
struct HashData {
Expand Down
7 changes: 0 additions & 7 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -930,13 +930,6 @@ class MCPlusBuilder {
/// Return true if the instruction is encoded using EVEX (AVX-512).
virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; }

/// Return true if a pair of instructions represented by \p Insts
/// could be fused into a single uop.
virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
llvm_unreachable("not implemented");
return false;
}

struct X86MemOperand {
unsigned BaseRegNum;
int64_t ScaleImm;
Expand Down
16 changes: 16 additions & 0 deletions bolt/include/bolt/Profile/YAMLProfileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,29 @@ class YAMLProfileReader : public ProfileReaderBase {
bool parseFunctionProfile(BinaryFunction &Function,
const yaml::bolt::BinaryFunctionProfile &YamlBF);

/// Checks if a function profile matches a binary function.
bool profileMatches(const yaml::bolt::BinaryFunctionProfile &Profile,
const BinaryFunction &BF);

/// Infer function profile from stale data (collected on older binaries).
bool inferStaleProfile(BinaryFunction &Function,
const yaml::bolt::BinaryFunctionProfile &YamlBF);

/// Initialize maps for profile matching.
void buildNameMaps(BinaryContext &BC);

/// Matches functions using exact name.
size_t matchWithExactName();

/// Matches function using LTO comomon name.
size_t matchWithLTOCommonName();

/// Matches functions using exact hash.
size_t matchWithHash(BinaryContext &BC);

/// Matches functions with similarly named profiled functions.
size_t matchWithNameSimilarity(BinaryContext &BC);

/// Update matched YAML -> BinaryFunction pair.
void matchProfileToFunction(yaml::bolt::BinaryFunctionProfile &YamlBF,
BinaryFunction &BF) {
Expand Down
7 changes: 5 additions & 2 deletions bolt/include/bolt/Rewrite/DWARFRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/DWP/DWP.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/ToolOutputFile.h"
#include <cstdint>
Expand Down Expand Up @@ -90,6 +89,10 @@ class DWARFRewriter {
/// Store Rangelists writer for each DWO CU.
RangeListsDWOWriers RangeListsWritersByCU;

/// Stores ranges writer for each DWO CU.
std::unordered_map<uint64_t, std::unique_ptr<DebugRangesSectionWriter>>
LegacyRangesWritersByCU;

std::mutex LocListDebugInfoPatchesMutex;

/// Dwo id specific its RangesBase.
Expand Down Expand Up @@ -183,7 +186,7 @@ class DWARFRewriter {
void updateDebugInfo();

/// Update stmt_list for CUs based on the new .debug_line \p Layout.
void updateLineTableOffsets(const MCAsmLayout &Layout);
void updateLineTableOffsets(const MCAssembler &Asm);

uint64_t getDwoRangesBase(uint64_t DWOId) { return DwoRangesBase[DWOId]; }

Expand Down
39 changes: 0 additions & 39 deletions bolt/lib/Core/BinaryBasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
CondBranch, UncondBranch);
}

bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
auto &MIB = Function->getBinaryContext().MIB;
ArrayRef<MCInst> Insts = Instructions;
return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
}

BinaryBasicBlock::const_iterator
BinaryBasicBlock::getMacroOpFusionPair() const {
if (!Function->getBinaryContext().isX86())
return end();

if (getNumNonPseudos() < 2 || succ_size() != 2)
return end();

auto RI = getLastNonPseudo();
assert(RI != rend() && "cannot have an empty block with 2 successors");

BinaryContext &BC = Function->getBinaryContext();

// Skip instruction if it's an unconditional branch following
// a conditional one.
if (BC.MIB->isUnconditionalBranch(*RI))
++RI;

if (!BC.MIB->isConditionalBranch(*RI))
return end();

// Start checking with instruction preceding the conditional branch.
++RI;
if (RI == rend())
return end();

auto II = std::prev(RI.base()); // convert to a forward iterator
if (isMacroOpFusionPair(II))
return II;

return end();
}

MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
BinaryContext &BC = Function->getBinaryContext();
auto Itr = rbegin();
Expand Down
23 changes: 7 additions & 16 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
Expand Down Expand Up @@ -2404,32 +2403,23 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
Streamer->emitLabel(SplitStartLabel);
emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
Streamer->emitLabel(SplitEndLabel);
// To avoid calling MCObjectStreamer::flushPendingLabels() which is
// private
Streamer->emitBytes(StringRef(""));
Streamer->switchSection(Section);
}

// To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
// MCStreamer::Finish(), which does more than we want
Streamer->emitBytes(StringRef(""));

MCAssembler &Assembler =
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
MCAsmLayout Layout(Assembler);
Assembler.layout(Layout);
Assembler.layout();

// Obtain fragment sizes.
std::vector<uint64_t> FragmentSizes;
// Main fragment size.
const uint64_t HotSize =
Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
const uint64_t HotSize = Assembler.getSymbolOffset(*EndLabel) -
Assembler.getSymbolOffset(*StartLabel);
FragmentSizes.push_back(HotSize);
// Split fragment sizes.
uint64_t ColdSize = 0;
for (const auto &Labels : SplitLabels) {
uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
Layout.getSymbolOffset(*Labels.first);
uint64_t Size = Assembler.getSymbolOffset(*Labels.second) -
Assembler.getSymbolOffset(*Labels.first);
FragmentSizes.push_back(Size);
ColdSize += Size;
}
Expand All @@ -2439,7 +2429,8 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
for (FunctionFragment &FF : BF.getLayout().fragments()) {
BinaryBasicBlock *PrevBB = nullptr;
for (BinaryBasicBlock *BB : FF) {
const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
const uint64_t BBStartOffset =
Assembler.getSymbolOffset(*(BB->getLabel()));
BB->setOutputStartAddress(BBStartOffset);
if (PrevBB)
PrevBB->setOutputEndAddress(BBStartOffset);
Expand Down
Loading

0 comments on commit 6abfaf3

Please sign in to comment.