Skip to content

Commit

Permalink
Merged master:d380b582f7f0 into amd-gfx:0228fcc59645
Browse files Browse the repository at this point in the history
Local branch amd-gfx 0228fcc Merged master:e45b0708ae81 into amd-gfx:1e589fb96e5c
Remote branch master d380b58 [mlir][Linalg] Make LinalgBaseTilingPattern not delete the original operation.
  • Loading branch information
Sw authored and Sw committed Sep 11, 2020
2 parents 0228fcc + d380b58 commit b39f544
Show file tree
Hide file tree
Showing 23 changed files with 785 additions and 124 deletions.
36 changes: 21 additions & 15 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2408,20 +2408,6 @@ with ``__has_feature(cxx_constexpr_string_builtins)``.
Memory builtins
---------------
* ``__builtin_memcpy_inline``
.. code-block:: c
void __builtin_memcpy_inline(void *dst, const void *src, size_t size);
``__builtin_memcpy_inline(dst, src, size)`` is identical to
``__builtin_memcpy(dst, src, size)`` except that the generated code is
guaranteed not to call any external functions. See LLVM IR `llvm.memcpy.inline
<https://llvm.org/docs/LangRef.html#llvm-memcpy-inline-intrinsic>`_ Intrinsic
for more information.
Note that the `size` argument must be a compile time constant.
Clang provides constant expression evaluation support for builtin forms of the
following functions from the C standard library headers
``<string.h>`` and ``<wchar.h>``:
Expand All @@ -2439,7 +2425,27 @@ are pointers to arrays with the same trivially copyable element type, and the
given size is an exact multiple of the element size that is no greater than
the number of elements accessible through the source and destination operands.
Constant evaluation support is not yet provided for ``__builtin_memcpy_inline``.
Guaranteed inlined copy
^^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: c
void __builtin_memcpy_inline(void *dst, const void *src, size_t size);
``__builtin_memcpy_inline`` has been designed as a building block for efficient
``memcpy`` implementations. It is identical to ``__builtin_memcpy`` but also
guarantees not to call any external functions. See LLVM IR `llvm.memcpy.inline
<https://llvm.org/docs/LangRef.html#llvm-memcpy-inline-intrinsic>`_ Intrinsic
for more information.
This is useful to implement a custom version of ``memcpy``, implemement a
``libc`` memcpy or work around the absence of a ``libc``.
Note that the `size` argument must be a compile time constant.
Note that this intrinsic cannot yet be called in a ``constexpr`` context.
Atomic Min/Max builtins with memory ordering
--------------------------------------------
Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/MC/MCWinEH.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ struct Instruction {

Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off)
: Label(L), Offset(Off), Register(Reg), Operation(Op) {}

bool operator==(const Instruction &I) const {
// Check whether two instructions refer to the same operation
// applied at a different spot (i.e. pointing at a different label).
return Offset == I.Offset && Register == I.Register &&
Operation == I.Operation;
}
bool operator!=(const Instruction &I) const { return !(*this == I); }
};

struct FrameInfo {
Expand Down
52 changes: 52 additions & 0 deletions llvm/include/llvm/Passes/StandardInstrumentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,19 @@

#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"

#include <string>
#include <utility>

namespace llvm {

class Module;
class Function;

/// Instrumentation to print IR before/after passes.
///
Expand Down Expand Up @@ -73,13 +77,61 @@ class PrintPassInstrumentation {
bool DebugLogging;
};

class PreservedCFGCheckerInstrumentation {
private:
// CFG is a map BB -> {(Succ, Multiplicity)}, where BB is a non-leaf basic
// block, {(Succ, Multiplicity)} set of all pairs of the block's successors
// and the multiplicity of the edge (BB->Succ). As the mapped sets are
// unordered the order of successors is not tracked by the CFG. In other words
// this allows basic block successors to be swapped by a pass without
// reporting a CFG change. CFG can be guarded by basic block tracking pointers
// in the Graph (BBGuard). That is if any of the block is deleted or RAUWed
// then the CFG is treated poisoned and no block pointer of the Graph is used.
struct CFG {
struct BBGuard final : public CallbackVH {
BBGuard(const BasicBlock *BB) : CallbackVH(BB) {}
void deleted() override { CallbackVH::deleted(); }
void allUsesReplacedWith(Value *) override { CallbackVH::deleted(); }
bool isPoisoned() const { return !getValPtr(); }
};

Optional<DenseMap<intptr_t, BBGuard>> BBGuards;
DenseMap<const BasicBlock *, DenseMap<const BasicBlock *, unsigned>> Graph;

CFG(const Function *F, bool TrackBBLifetime = false);

bool operator==(const CFG &G) const {
return !isPoisoned() && !G.isPoisoned() && Graph == G.Graph;
}

bool isPoisoned() const {
if (BBGuards)
for (auto &BB : *BBGuards) {
if (BB.second.isPoisoned())
return true;
}
return false;
}

static void printDiff(raw_ostream &out, const CFG &Before,
const CFG &After);
};

SmallVector<std::pair<StringRef, Optional<CFG>>, 8> GraphStackBefore;

public:
static cl::opt<bool> VerifyPreservedCFG;
void registerCallbacks(PassInstrumentationCallbacks &PIC);
};

/// This class provides an interface to register all the standard pass
/// instrumentations and manages their state (if any).
class StandardInstrumentations {
PrintIRInstrumentation PrintIR;
PrintPassInstrumentation PrintPass;
TimePassesHandler TimePasses;
OptNoneInstrumentation OptNone;
PreservedCFGCheckerInstrumentation PreservedCFGChecker;

public:
StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {}
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/CodeGen/AsmPrinter/WinException.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,6 @@ void WinException::endFuncletImpl() {
if (F.hasPersonalityFn())
Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());

// On funclet exit, we emit a fake "function" end marker, so that the call
// to EmitWinEHHandlerData below can calculate the size of the funclet or
// function.
if (isAArch64) {
MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
Asm->OutStreamer->getCurrentSectionOnly());
Asm->OutStreamer->SwitchSection(XData);
}

// Emit an UNWIND_INFO struct describing the prologue.
Asm->OutStreamer->EmitWinEHHandlerData();

Expand Down
118 changes: 114 additions & 4 deletions llvm/lib/MC/MCWin64EH.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,7 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
return value;
}

static uint32_t
ARM64CountOfUnwindCodes(const std::vector<WinEH::Instruction> &Insns) {
static uint32_t ARM64CountOfUnwindCodes(ArrayRef<WinEH::Instruction> Insns) {
uint32_t Count = 0;
for (const auto &I : Insns) {
switch (static_cast<Win64EH::UnwindOpcodes>(I.Operation)) {
Expand Down Expand Up @@ -544,6 +543,109 @@ FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
return nullptr;
}

static void simplifyOpcodes(std::vector<WinEH::Instruction> &Instructions,
bool Reverse) {
unsigned PrevOffset = -1;
unsigned PrevRegister = -1;

auto VisitInstruction = [&](WinEH::Instruction &Inst) {
// Convert 2-byte opcodes into equivalent 1-byte ones.
if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) {
Inst.Operation = Win64EH::UOP_SaveFPLR;
Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
Inst.Register == 29) {
Inst.Operation = Win64EH::UOP_SaveFPLRX;
Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX &&
Inst.Register == 19 && Inst.Offset <= 248) {
Inst.Operation = Win64EH::UOP_SaveR19R20X;
Inst.Register = -1;
} else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) {
Inst.Operation = Win64EH::UOP_SetFP;
} else if (Inst.Operation == Win64EH::UOP_SaveRegP &&
Inst.Register == PrevRegister + 2 &&
Inst.Offset == PrevOffset + 16) {
Inst.Operation = Win64EH::UOP_SaveNext;
Inst.Register = -1;
Inst.Offset = 0;
// Intentionally not creating UOP_SaveNext for float register pairs,
// as current versions of Windows (up to at least 20.04) is buggy
// regarding SaveNext for float pairs.
}
// Update info about the previous instruction, for detecting if
// the next one can be made a UOP_SaveNext
if (Inst.Operation == Win64EH::UOP_SaveR19R20X) {
PrevOffset = 0;
PrevRegister = 19;
} else if (Inst.Operation == Win64EH::UOP_SaveRegPX) {
PrevOffset = 0;
PrevRegister = Inst.Register;
} else if (Inst.Operation == Win64EH::UOP_SaveRegP) {
PrevOffset = Inst.Offset;
PrevRegister = Inst.Register;
} else if (Inst.Operation == Win64EH::UOP_SaveNext) {
PrevRegister += 2;
PrevOffset += 16;
} else {
PrevRegister = -1;
PrevOffset = -1;
}
};

// Iterate over instructions in a forward order (for prologues),
// backwards for epilogues (i.e. always reverse compared to how the
// opcodes are stored).
if (Reverse) {
for (auto It = Instructions.rbegin(); It != Instructions.rend(); It++)
VisitInstruction(*It);
} else {
for (WinEH::Instruction &Inst : Instructions)
VisitInstruction(Inst);
}
}

static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info,
int PrologCodeBytes) {
// Can only pack if there's one single epilog
if (info->EpilogMap.size() != 1)
return -1;

const std::vector<WinEH::Instruction> &Epilog =
info->EpilogMap.begin()->second;

// Can pack if the epilog is a subset of the prolog but not vice versa
if (Epilog.size() > info->Instructions.size())
return -1;

// Check that the epilog actually is a perfect match for the end (backwrds)
// of the prolog.
for (int I = Epilog.size() - 1; I >= 0; I--) {
if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I])
return -1;
}

// Check that the epilog actually is at the very end of the function,
// otherwise it can't be packed.
uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference(
streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first);
if (DistanceFromEnd / 4 != Epilog.size())
return -1;

int Offset = ARM64CountOfUnwindCodes(
ArrayRef<WinEH::Instruction>(&info->Instructions[Epilog.size()],
info->Instructions.size() - Epilog.size()));

// Check that the offset and prolog size fits in the first word; it's
// unclear whether the epilog count in the extension word can be taken
// as packed epilog offset.
if (Offset > 31 || PrologCodeBytes > 124)
return -1;

info->EpilogMap.clear();
return Offset;
}

// Populate the .xdata section. The format of .xdata on ARM64 is documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
Expand Down Expand Up @@ -572,6 +674,10 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
return;
}

simplifyOpcodes(info->Instructions, false);
for (auto &I : info->EpilogMap)
simplifyOpcodes(I.second, true);

MCContext &context = streamer.getContext();
MCSymbol *Label = context.createTempSymbol();

Expand Down Expand Up @@ -618,6 +724,8 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
uint32_t TotalCodeBytes = PrologCodeBytes;

int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes);

// Process epilogs.
MapVector<MCSymbol *, uint32_t> EpilogInfo;
// Epilogs processed so far.
Expand Down Expand Up @@ -650,15 +758,17 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
uint32_t CodeWordsMod = TotalCodeBytes % 4;
if (CodeWordsMod)
CodeWords++;
uint32_t EpilogCount = info->EpilogMap.size();
uint32_t EpilogCount =
PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size();
bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124;
if (!ExtensionWord) {
row1 |= (EpilogCount & 0x1F) << 22;
row1 |= (CodeWords & 0x1F) << 27;
}
// E is always 0 right now, TODO: packed epilog setup
if (info->HandlesExceptions) // X
row1 |= 1 << 20;
if (PackedEpilogOffset >= 0) // E
row1 |= 1 << 21;
row1 |= FuncLength & 0x3FFFF;
streamer.emitInt32(row1);

Expand Down
Loading

0 comments on commit b39f544

Please sign in to comment.