Skip to content

Commit

Permalink
Merged master:40b72c9c7920 into amd-gfx:b1dd8d87a662
Browse files Browse the repository at this point in the history
Local branch amd-gfx b1dd8d8 Merged master:f980ed4184f9 into amd-gfx:411720708275
Remote branch master 40b72c9 [ARM] Extra MLA reductions tests. NFC
  • Loading branch information
Sw authored and Sw committed Sep 11, 2020
2 parents b1dd8d8 + 40b72c9 commit ec51c0b
Show file tree
Hide file tree
Showing 11 changed files with 3,533 additions and 71 deletions.
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/PPC.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
SimdDefaultAlign = 128;
LongDoubleWidth = LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble();
HasStrictFP = true;
}

// Set the language option for altivec based on our value.
Expand Down
2 changes: 0 additions & 2 deletions clang/test/CodeGen/builtins-ppc-fpconstrained.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@
// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
// RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-UNCONSTRAINED %s
// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
// RUN: -fexperimental-strict-floating-point \
// RUN: -ffp-exception-behavior=strict -emit-llvm %s -o - | FileCheck \
// RUN: --check-prefix=CHECK-CONSTRAINED -vv %s
// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
// RUN: -fallow-half-arguments-and-returns -S -o - %s | \
// RUN: FileCheck --check-prefix=CHECK-ASM --check-prefix=NOT-FIXME-CHECK %s
// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
// RUN: -fexperimental-strict-floating-point \
// RUN: -fallow-half-arguments-and-returns -S -ffp-exception-behavior=strict \
// RUN: -o - %s | FileCheck --check-prefix=CHECK-ASM \
// RUN: --check-prefix=FIXME-CHECK %s
Expand Down
14 changes: 6 additions & 8 deletions compiler-rt/test/dfsan/event_callbacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
// RUN: %clang_dfsan -O2 -mllvm -dfsan-event-callbacks %s %t-callbacks.o -o %t
// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s

// See PR47488, parts of this test get optimized out by a more aggressive
// dead store eliminator.
// XFAIL: *

// Tests that callbacks are inserted for store events when
// -dfsan-event-callbacks is specified.

Expand Down Expand Up @@ -118,14 +114,16 @@ int main(int Argc, char *Argv[]) {
LabelArgv = dfsan_create_label("Argv", 0);
dfsan_set_label(LabelArgv, Argv[1], LenArgv);

char SinkBuf[64];
assert(LenArgv < sizeof(SinkBuf) - 1);
char Buf[64];
assert(LenArgv < sizeof(Buf) - 1);

// CHECK: Label 4 copied to memory
memcpy(SinkBuf, Argv[1], LenArgv);
void *volatile SinkPtr = Buf;
memcpy(SinkPtr, Argv[1], LenArgv);

// CHECK: Label 4 copied to memory
memmove(&SinkBuf[1], SinkBuf, LenArgv);
SinkPtr = &Buf[1];
memmove(SinkPtr, Buf, LenArgv);

return 0;
}
Expand Down
26 changes: 14 additions & 12 deletions lld/ELF/Arch/PPC64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ enum DFormOpcd {
ADDI = 14
};

constexpr uint32_t NOP = 0x60000000;

enum class PPCLegacyInsn : uint32_t {
NOINSN = 0,
// Loads.
Expand Down Expand Up @@ -691,7 +693,7 @@ void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const {
writePrefixedInstruction(loc, pcRelInsn |
((totalDisp & 0x3ffff0000) << 16) |
(totalDisp & 0xffff));
write32(loc + rel.addend, 0x60000000); // nop accessInsn.
write32(loc + rel.addend, NOP); // nop accessInsn.
break;
}
default:
Expand All @@ -718,15 +720,15 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,

switch (rel.type) {
case R_PPC64_GOT_TLSGD16_HA:
writeFromHalf16(loc, 0x60000000); // nop
writeFromHalf16(loc, NOP);
break;
case R_PPC64_GOT_TLSGD16:
case R_PPC64_GOT_TLSGD16_LO:
writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13
relocateNoSym(loc, R_PPC64_TPREL16_HA, val);
break;
case R_PPC64_TLSGD:
write32(loc, 0x60000000); // nop
write32(loc, NOP);
write32(loc + 4, 0x38630000); // addi r3, r3
// Since we are relocating a half16 type relocation and Loc + 4 points to
// the start of an instruction we need to advance the buffer by an extra
Expand Down Expand Up @@ -758,13 +760,13 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,

switch (rel.type) {
case R_PPC64_GOT_TLSLD16_HA:
writeFromHalf16(loc, 0x60000000); // nop
writeFromHalf16(loc, NOP);
break;
case R_PPC64_GOT_TLSLD16_LO:
writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13, 0
break;
case R_PPC64_TLSLD:
write32(loc, 0x60000000); // nop
write32(loc, NOP);
write32(loc + 4, 0x38631000); // addi r3, r3, 4096
break;
case R_PPC64_DTPREL16:
Expand Down Expand Up @@ -829,7 +831,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
unsigned offset = (config->ekind == ELF64BEKind) ? 2 : 0;
switch (rel.type) {
case R_PPC64_GOT_TPREL16_HA:
write32(loc - offset, 0x60000000); // nop
write32(loc - offset, NOP);
break;
case R_PPC64_GOT_TPREL16_LO_DS:
case R_PPC64_GOT_TPREL16_DS: {
Expand Down Expand Up @@ -1128,7 +1130,7 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_PPC64_REL16_HA:
case R_PPC64_TPREL16_HA:
if (config->tocOptimize && shouldTocOptimize && ha(val) == 0)
writeFromHalf16(loc, 0x60000000);
writeFromHalf16(loc, NOP);
else
write16(loc, ha(val));
break;
Expand Down Expand Up @@ -1353,7 +1355,7 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
return;
}
case R_PPC64_TLSGD:
write32(loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop
write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop
write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r13
return;
default:
Expand Down Expand Up @@ -1424,7 +1426,7 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
uint32_t secondInstr = read32(loc + 8);
if (!loImm && getPrimaryOpCode(secondInstr) == 14) {
loImm = secondInstr & 0xFFFF;
} else if (secondInstr != 0x60000000) {
} else if (secondInstr != NOP) {
return false;
}

Expand All @@ -1438,7 +1440,7 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
};
if (!checkRegOperands(firstInstr, 12, 1))
return false;
if (secondInstr != 0x60000000 && !checkRegOperands(secondInstr, 12, 12))
if (secondInstr != NOP && !checkRegOperands(secondInstr, 12, 12))
return false;

int32_t stackFrameSize = (hiImm * 65536) + loImm;
Expand All @@ -1457,12 +1459,12 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
if (hiImm) {
write32(loc + 4, 0x3D810000 | (uint16_t)hiImm);
// If the low immediate is zero the second instruction will be a nop.
secondInstr = loImm ? 0x398C0000 | (uint16_t)loImm : 0x60000000;
secondInstr = loImm ? 0x398C0000 | (uint16_t)loImm : NOP;
write32(loc + 8, secondInstr);
} else {
// addi r12, r1, imm
write32(loc + 4, (0x39810000) | (uint16_t)loImm);
write32(loc + 8, 0x60000000);
write32(loc + 8, NOP);
}

return true;
Expand Down
4 changes: 2 additions & 2 deletions llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2678,7 +2678,7 @@ architectures.

DWARF address space identifiers are used by:

* The DWARF expession operations: ``DW_OP_LLVM_aspace_bregx``,
* The DWARF expression operations: ``DW_OP_LLVM_aspace_bregx``,
``DW_OP_LLVM_form_aspace_address``, ``DW_OP_LLVM_implicit_aspace_pointer``,
and ``DW_OP_xderef*``.

Expand Down Expand Up @@ -3387,7 +3387,7 @@ Standard Content Descriptions
provided by the* ``DW_LNCT_path`` *field. When the source field is absent,
consumers can access the file to get the source text.*

*This is particularly useful for programing languages that support runtime
*This is particularly useful for programming languages that support runtime
compilation and runtime generation of source text. In these cases, the
source text does not reside in any permanent file. For example, the OpenCL
language [:ref:`OpenCL <amdgpu-dwarf-OpenCL>`] supports online compilation.*
Expand Down
4 changes: 2 additions & 2 deletions llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ void ThinLtoInstrumentationLayer::nudgeIntoDiscovery(
LLVM_DEBUG(dbgs() << "Nudged " << Count << " new functions into discovery\n");
}

void ThinLtoInstrumentationLayer::emit(
std::unique_ptr<MaterializationResponsibility> R, ThreadSafeModule TSM) {
void ThinLtoInstrumentationLayer::emit(MaterializationResponsibility R,
ThreadSafeModule TSM) {
TSM.withModuleDo([this](Module &M) {
std::vector<Function *> FunctionsToInstrument;

Expand Down
3 changes: 1 addition & 2 deletions llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ class ThinLtoInstrumentationLayer : public IRLayer {

~ThinLtoInstrumentationLayer() override;

void emit(std::unique_ptr<MaterializationResponsibility> R,
ThreadSafeModule TSM) override;
void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;

unsigned reserveDiscoveryFlags(unsigned Count);
void registerDiscoveryFlagOwners(std::vector<GlobalValue::GUID> Guids,
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3694,11 +3694,13 @@ static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
TargetTransformInfo *TTI) {
// Look past the root to find a source value. Arbitrarily follow the
// path through operand 0 of any 'or'. Also, peek through optional
// shift-left-by-constant.
// shift-left-by-multiple-of-8-bits.
Value *ZextLoad = Root;
const APInt *ShAmtC;
while (!isa<ConstantExpr>(ZextLoad) &&
(match(ZextLoad, m_Or(m_Value(), m_Value())) ||
match(ZextLoad, m_Shl(m_Value(), m_Constant()))))
(match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) &&
ShAmtC->urem(8) == 0)))
ZextLoad = cast<BinaryOperator>(ZextLoad)->getOperand(0);

// Check if the input is an extended load of the required or/shift expression.
Expand Down
Loading

0 comments on commit ec51c0b

Please sign in to comment.